mirror of https://github.com/apache/lucene.git
LUCENE-5752: renames
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5752@1603012 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
63c2842d87
commit
be3cca1e44
|
@ -447,7 +447,7 @@ public class Dictionary {
|
|||
throw new UnsupportedOperationException("Too many patterns, please report this to dev@lucene.apache.org");
|
||||
}
|
||||
seenPatterns.put(regex, patternIndex);
|
||||
CharacterRunAutomaton pattern = new CharacterRunAutomaton(new RegExp(regex, RegExp.NONE).toLightAutomaton());
|
||||
CharacterRunAutomaton pattern = new CharacterRunAutomaton(new RegExp(regex, RegExp.NONE).toAutomaton());
|
||||
patterns.add(pattern);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,9 +31,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/**
|
||||
* Compares MockTokenizer (which is simple with no optimizations) with equivalent
|
||||
|
@ -48,7 +48,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
LightAutomaton single = new LightAutomaton();
|
||||
Automaton single = new Automaton();
|
||||
int initial = single.createState();
|
||||
int accept = single.createState();
|
||||
single.setAccept(accept, true);
|
||||
|
@ -59,7 +59,7 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
|
|||
single.addTransition(initial, accept, i);
|
||||
}
|
||||
}
|
||||
LightAutomaton repeat = BasicOperations.repeatLight(single);
|
||||
Automaton repeat = Operations.repeat(single);
|
||||
jvmLetter = new CharacterRunAutomaton(repeat);
|
||||
}
|
||||
|
||||
|
|
|
@ -46,7 +46,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
||||
|
@ -946,8 +946,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
states[0] = new State();
|
||||
states[0].changeOrd = terms.length;
|
||||
states[0].state = runAutomaton.getInitialState();
|
||||
states[0].transitionCount = compiledAutomaton.lightAutomaton.getNumTransitions(states[0].state);
|
||||
compiledAutomaton.lightAutomaton.initTransition(states[0].state, states[0].transition);
|
||||
states[0].transitionCount = compiledAutomaton.automaton.getNumTransitions(states[0].state);
|
||||
compiledAutomaton.automaton.initTransition(states[0].state, states[0].transition);
|
||||
states[0].transitionUpto = -1;
|
||||
states[0].transitionMax = -1;
|
||||
|
||||
|
@ -969,7 +969,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
while (label > states[i].transitionMax) {
|
||||
states[i].transitionUpto++;
|
||||
assert states[i].transitionUpto < states[i].transitionCount;
|
||||
compiledAutomaton.lightAutomaton.getNextTransition(states[i].transition);
|
||||
compiledAutomaton.automaton.getNextTransition(states[i].transition);
|
||||
states[i].transitionMin = states[i].transition.min;
|
||||
states[i].transitionMax = states[i].transition.max;
|
||||
assert states[i].transitionMin >= 0;
|
||||
|
@ -1028,8 +1028,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
stateUpto++;
|
||||
states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
|
||||
states[stateUpto].state = nextState;
|
||||
states[stateUpto].transitionCount = compiledAutomaton.lightAutomaton.getNumTransitions(nextState);
|
||||
compiledAutomaton.lightAutomaton.initTransition(states[stateUpto].state, states[stateUpto].transition);
|
||||
states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
|
||||
compiledAutomaton.automaton.initTransition(states[stateUpto].state, states[stateUpto].transition);
|
||||
states[stateUpto].transitionUpto = -1;
|
||||
states[stateUpto].transitionMax = -1;
|
||||
//System.out.println(" push " + states[stateUpto].transitions.length + " trans");
|
||||
|
@ -1202,7 +1202,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
continue nextTerm;
|
||||
}
|
||||
compiledAutomaton.lightAutomaton.getNextTransition(state.transition);
|
||||
compiledAutomaton.automaton.getNextTransition(state.transition);
|
||||
assert state.transitionUpto < state.transitionCount: " state.transitionUpto=" + state.transitionUpto + " vs " + state.transitionCount;
|
||||
state.transitionMin = state.transition.min;
|
||||
state.transitionMax = state.transition.max;
|
||||
|
@ -1303,8 +1303,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
stateUpto++;
|
||||
states[stateUpto].state = nextState;
|
||||
states[stateUpto].changeOrd = skips[skipOffset + skipUpto++];
|
||||
states[stateUpto].transitionCount = compiledAutomaton.lightAutomaton.getNumTransitions(nextState);
|
||||
compiledAutomaton.lightAutomaton.initTransition(nextState, states[stateUpto].transition);
|
||||
states[stateUpto].transitionCount = compiledAutomaton.automaton.getNumTransitions(nextState);
|
||||
compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
|
||||
states[stateUpto].transitionUpto = -1;
|
||||
states[stateUpto].transitionMax = -1;
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RollingBuffer;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
// TODO: maybe also toFST? then we can translate atts into FST outputs/weights
|
||||
|
||||
|
@ -96,8 +96,8 @@ public class TokenStreamToAutomaton {
|
|||
* TokenStream}, and creates the corresponding
|
||||
* automaton where arcs are bytes (or Unicode code points
|
||||
* if unicodeArcs = true) from each term. */
|
||||
public LightAutomaton toAutomaton(TokenStream in) throws IOException {
|
||||
final LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
public Automaton toAutomaton(TokenStream in) throws IOException {
|
||||
final Automaton.Builder builder = new Automaton.Builder();
|
||||
builder.createState();
|
||||
|
||||
final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class);
|
||||
|
@ -228,7 +228,7 @@ public class TokenStreamToAutomaton {
|
|||
}
|
||||
*/
|
||||
|
||||
private static void addHoles(LightAutomaton.Builder builder, RollingBuffer<Position> positions, int pos) {
|
||||
private static void addHoles(Automaton.Builder builder, RollingBuffer<Position> positions, int pos) {
|
||||
Position posData = positions.get(pos);
|
||||
Position prevPosData = positions.get(pos-1);
|
||||
|
||||
|
|
|
@ -359,7 +359,7 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
continue nextTerm;
|
||||
}
|
||||
currentFrame.transitionIndex++;
|
||||
compiledAutomaton.lightAutomaton.getNextTransition(currentFrame.transition);
|
||||
compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
|
||||
currentFrame.curTransitionMax = currentFrame.transition.max;
|
||||
//if (DEBUG) System.out.println(" next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.index.FieldInfo.IndexOptions;
|
|||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
|
||||
|
@ -122,10 +122,10 @@ final class IntersectTermsEnumFrame {
|
|||
public void setState(int state) {
|
||||
this.state = state;
|
||||
transitionIndex = 0;
|
||||
transitionCount = ite.compiledAutomaton.lightAutomaton.getNumTransitions(state);
|
||||
transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
|
||||
if (transitionCount != 0) {
|
||||
ite.compiledAutomaton.lightAutomaton.initTransition(state, transition);
|
||||
ite.compiledAutomaton.lightAutomaton.getNextTransition(transition);
|
||||
ite.compiledAutomaton.automaton.initTransition(state, transition);
|
||||
ite.compiledAutomaton.automaton.getNextTransition(transition);
|
||||
curTransitionMax = transition.max;
|
||||
} else {
|
||||
curTransitionMax = -1;
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.util.IntsRef;
|
|||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
||||
/**
|
||||
|
@ -52,7 +52,7 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
// true if the automaton accepts a finite language
|
||||
private final boolean finite;
|
||||
// array of sorted transitions for each state, indexed by state number
|
||||
private final LightAutomaton lightAutomaton;
|
||||
private final Automaton automaton;
|
||||
// for path tracking: each long records gen when we last
|
||||
// visited the state; we use gens to avoid having to clear
|
||||
private final long[] visited;
|
||||
|
@ -80,7 +80,7 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
this.runAutomaton = compiled.runAutomaton;
|
||||
assert this.runAutomaton != null;
|
||||
this.commonSuffixRef = compiled.commonSuffixRef;
|
||||
this.lightAutomaton = compiled.lightAutomaton;
|
||||
this.automaton = compiled.automaton;
|
||||
|
||||
// used for path tracking, where each bit is a numbered state.
|
||||
visited = new long[runAutomaton.getSize()];
|
||||
|
@ -143,10 +143,10 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
state = runAutomaton.step(state, seekBytesRef.bytes[i] & 0xff);
|
||||
assert state >= 0: "state=" + state;
|
||||
}
|
||||
final int numTransitions = lightAutomaton.getNumTransitions(state);
|
||||
lightAutomaton.initTransition(state, transition);
|
||||
final int numTransitions = automaton.getNumTransitions(state);
|
||||
automaton.initTransition(state, transition);
|
||||
for (int i = 0; i < numTransitions; i++) {
|
||||
lightAutomaton.getNextTransition(transition);
|
||||
automaton.getNextTransition(transition);
|
||||
if (transition.min <= (seekBytesRef.bytes[position] & 0xff) &&
|
||||
(seekBytesRef.bytes[position] & 0xff) <= transition.max) {
|
||||
maxInterval = transition.max;
|
||||
|
@ -257,12 +257,12 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
seekBytesRef.length = position;
|
||||
visited[state] = curGen;
|
||||
|
||||
final int numTransitions = lightAutomaton.getNumTransitions(state);
|
||||
lightAutomaton.initTransition(state, transition);
|
||||
final int numTransitions = automaton.getNumTransitions(state);
|
||||
automaton.initTransition(state, transition);
|
||||
// find the minimal path (lexicographic order) that is >= c
|
||||
|
||||
for (int i = 0; i < numTransitions; i++) {
|
||||
lightAutomaton.getNextTransition(transition);
|
||||
automaton.getNextTransition(transition);
|
||||
if (transition.max >= c) {
|
||||
int nextChar = Math.max(c, transition.min);
|
||||
// append either the next sequential char, or the minimum transition
|
||||
|
@ -281,8 +281,8 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
* so the below is ok, if it is not an accept state,
|
||||
* then there MUST be at least one transition.
|
||||
*/
|
||||
lightAutomaton.initTransition(state, transition);
|
||||
lightAutomaton.getNextTransition(transition);
|
||||
automaton.initTransition(state, transition);
|
||||
automaton.getNextTransition(transition);
|
||||
state = transition.dest;
|
||||
|
||||
// append the minimum transition
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/**
|
||||
* A {@link Query} that will match terms against a finite-state machine.
|
||||
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.automaton.LightAutomaton;
|
|||
*/
|
||||
public class AutomatonQuery extends MultiTermQuery {
|
||||
/** the automaton to match index terms against */
|
||||
protected final LightAutomaton lightAutomaton;
|
||||
protected final Automaton automaton;
|
||||
protected final CompiledAutomaton compiled;
|
||||
/** term containing the field, and possibly some pattern structure */
|
||||
protected final Term term;
|
||||
|
@ -60,10 +60,10 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
* @param automaton Automaton to run, terms that are accepted are considered a
|
||||
* match.
|
||||
*/
|
||||
public AutomatonQuery(final Term term, LightAutomaton automaton) {
|
||||
public AutomatonQuery(final Term term, Automaton automaton) {
|
||||
super(term.field());
|
||||
this.term = term;
|
||||
this.lightAutomaton = automaton;
|
||||
this.automaton = automaton;
|
||||
this.compiled = new CompiledAutomaton(automaton);
|
||||
}
|
||||
|
||||
|
@ -110,14 +110,14 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
buffer.append(getClass().getSimpleName());
|
||||
buffer.append(" {");
|
||||
buffer.append('\n');
|
||||
buffer.append(lightAutomaton.toString());
|
||||
buffer.append(automaton.toString());
|
||||
buffer.append("}");
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** Returns the light automaton used to create this query */
|
||||
public LightAutomaton getLightAutomaton() {
|
||||
return lightAutomaton;
|
||||
public Automaton getAutomaton() {
|
||||
return automaton;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/** Subclass of TermsEnum for enumerating all terms that are similar
|
||||
* to the specified filter term.
|
||||
|
@ -170,7 +170,7 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
|
||||
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
||||
for (int i = runAutomata.size(); i <= maxDistance; i++) {
|
||||
LightAutomaton a = builder.toAutomaton(i, prefix);
|
||||
Automaton a = builder.toAutomaton(i, prefix);
|
||||
//System.out.println("compute automaton n=" + i);
|
||||
runAutomata.add(new CompiledAutomaton(a, true, false));
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomatonProvider;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.AutomatonProvider;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/*
|
||||
|
@ -49,9 +49,9 @@ public class RegexpQuery extends AutomatonQuery {
|
|||
/**
|
||||
* A provider that provides no named automata
|
||||
*/
|
||||
private static LightAutomatonProvider defaultProvider = new LightAutomatonProvider() {
|
||||
private static AutomatonProvider defaultProvider = new AutomatonProvider() {
|
||||
@Override
|
||||
public LightAutomaton getAutomaton(String name) {
|
||||
public Automaton getAutomaton(String name) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
@ -85,8 +85,8 @@ public class RegexpQuery extends AutomatonQuery {
|
|||
* @param flags optional RegExp features from {@link RegExp}
|
||||
* @param provider custom AutomatonProvider for named automata
|
||||
*/
|
||||
public RegexpQuery(Term term, int flags, LightAutomatonProvider provider) {
|
||||
super(term, new RegExp(term.text(), flags).toLightAutomaton(provider));
|
||||
public RegexpQuery(Term term, int flags, AutomatonProvider provider) {
|
||||
super(term, new RegExp(term.text(), flags).toAutomaton(provider));
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
|
|
|
@ -22,9 +22,9 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
|
||||
* matches any character sequence (including the empty one), and <code>?</code>,
|
||||
|
@ -62,8 +62,8 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
* @lucene.internal
|
||||
*/
|
||||
@SuppressWarnings("fallthrough")
|
||||
public static LightAutomaton toAutomaton(Term wildcardquery) {
|
||||
List<LightAutomaton> automata = new ArrayList<>();
|
||||
public static Automaton toAutomaton(Term wildcardquery) {
|
||||
List<Automaton> automata = new ArrayList<>();
|
||||
|
||||
String wildcardText = wildcardquery.text();
|
||||
|
||||
|
@ -72,26 +72,26 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
int length = Character.charCount(c);
|
||||
switch(c) {
|
||||
case WILDCARD_STRING:
|
||||
automata.add(BasicAutomata.makeAnyStringLight());
|
||||
automata.add(Automata.makeAnyString());
|
||||
break;
|
||||
case WILDCARD_CHAR:
|
||||
automata.add(BasicAutomata.makeAnyCharLight());
|
||||
automata.add(Automata.makeAnyChar());
|
||||
break;
|
||||
case WILDCARD_ESCAPE:
|
||||
// add the next codepoint instead, if it exists
|
||||
if (i + length < wildcardText.length()) {
|
||||
final int nextChar = wildcardText.codePointAt(i + length);
|
||||
length += Character.charCount(nextChar);
|
||||
automata.add(BasicAutomata.makeCharLight(nextChar));
|
||||
automata.add(Automata.makeChar(nextChar));
|
||||
break;
|
||||
} // else fallthru, lenient parsing with a trailing \
|
||||
default:
|
||||
automata.add(BasicAutomata.makeCharLight(c));
|
||||
automata.add(Automata.makeChar(c));
|
||||
}
|
||||
i += length;
|
||||
}
|
||||
|
||||
return BasicOperations.concatenateLight(automata);
|
||||
return Operations.concatenate(automata);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,15 +38,15 @@ import org.apache.lucene.util.BytesRef;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final public class BasicAutomata {
|
||||
final public class Automata {
|
||||
|
||||
private BasicAutomata() {}
|
||||
private Automata() {}
|
||||
|
||||
/**
|
||||
* Returns a new (deterministic) automaton with the empty language.
|
||||
*/
|
||||
public static LightAutomaton makeEmptyLight() {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
public static Automaton makeEmpty() {
|
||||
Automaton a = new Automaton();
|
||||
a.finishState();
|
||||
return a;
|
||||
}
|
||||
|
@ -54,8 +54,8 @@ final public class BasicAutomata {
|
|||
/**
|
||||
* Returns a new (deterministic) automaton that accepts only the empty string.
|
||||
*/
|
||||
public static LightAutomaton makeEmptyStringLight() {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
public static Automaton makeEmptyString() {
|
||||
Automaton a = new Automaton();
|
||||
a.createState();
|
||||
a.setAccept(0, true);
|
||||
return a;
|
||||
|
@ -64,8 +64,8 @@ final public class BasicAutomata {
|
|||
/**
|
||||
* Returns a new (deterministic) automaton that accepts all strings.
|
||||
*/
|
||||
public static LightAutomaton makeAnyStringLight() {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
public static Automaton makeAnyString() {
|
||||
Automaton a = new Automaton();
|
||||
int s = a.createState();
|
||||
a.setAccept(s, true);
|
||||
a.addTransition(s, s, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
|
@ -76,11 +76,11 @@ final public class BasicAutomata {
|
|||
/**
|
||||
* Returns a new (deterministic) automaton that accepts any single codepoint.
|
||||
*/
|
||||
public static LightAutomaton makeAnyCharLight() {
|
||||
return makeCharRangeLight(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
public static Automaton makeAnyChar() {
|
||||
return makeCharRange(Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
}
|
||||
|
||||
public static int appendAnyChar(LightAutomaton a, int state) {
|
||||
public static int appendAnyChar(Automaton a, int state) {
|
||||
int newState = a.createState();
|
||||
a.addTransition(state, newState, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
return newState;
|
||||
|
@ -90,11 +90,11 @@ final public class BasicAutomata {
|
|||
* Returns a new (deterministic) automaton that accepts a single codepoint of
|
||||
* the given value.
|
||||
*/
|
||||
public static LightAutomaton makeCharLight(int c) {
|
||||
return makeCharRangeLight(c, c);
|
||||
public static Automaton makeChar(int c) {
|
||||
return makeCharRange(c, c);
|
||||
}
|
||||
|
||||
public static int appendChar(LightAutomaton a, int state, int c) {
|
||||
public static int appendChar(Automaton a, int state, int c) {
|
||||
int newState = a.createState();
|
||||
a.addTransition(state, newState, c, c);
|
||||
return newState;
|
||||
|
@ -104,11 +104,11 @@ final public class BasicAutomata {
|
|||
* Returns a new (deterministic) automaton that accepts a single codepoint whose
|
||||
* value is in the given interval (including both end points).
|
||||
*/
|
||||
public static LightAutomaton makeCharRangeLight(int min, int max) {
|
||||
public static Automaton makeCharRange(int min, int max) {
|
||||
if (min > max) {
|
||||
return makeEmptyLight();
|
||||
return makeEmpty();
|
||||
}
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int s1 = a.createState();
|
||||
int s2 = a.createState();
|
||||
a.setAccept(s2, true);
|
||||
|
@ -121,12 +121,12 @@ final public class BasicAutomata {
|
|||
* Constructs sub-automaton corresponding to decimal numbers of length
|
||||
* x.substring(n).length().
|
||||
*/
|
||||
private static int anyOfRightLengthLight(LightAutomaton.Builder builder, String x, int n) {
|
||||
private static int anyOfRightLength(Automaton.Builder builder, String x, int n) {
|
||||
int s = builder.createState();
|
||||
if (x.length() == n) {
|
||||
builder.setAccept(s, true);
|
||||
} else {
|
||||
builder.addTransition(s, anyOfRightLengthLight(builder, x, n + 1), '0', '9');
|
||||
builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', '9');
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ final public class BasicAutomata {
|
|||
* Constructs sub-automaton corresponding to decimal numbers of value at least
|
||||
* x.substring(n) and length x.substring(n).length().
|
||||
*/
|
||||
private static int atLeastLight(LightAutomaton.Builder builder, String x, int n, Collection<Integer> initials,
|
||||
private static int atLeast(Automaton.Builder builder, String x, int n, Collection<Integer> initials,
|
||||
boolean zeros) {
|
||||
int s = builder.createState();
|
||||
if (x.length() == n) {
|
||||
|
@ -145,9 +145,9 @@ final public class BasicAutomata {
|
|||
initials.add(s);
|
||||
}
|
||||
char c = x.charAt(n);
|
||||
builder.addTransition(s, atLeastLight(builder, x, n + 1, initials, zeros && c == '0'), c);
|
||||
builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && c == '0'), c);
|
||||
if (c < '9') {
|
||||
builder.addTransition(s, anyOfRightLengthLight(builder, x, n + 1), (char) (c + 1), '9');
|
||||
builder.addTransition(s, anyOfRightLength(builder, x, n + 1), (char) (c + 1), '9');
|
||||
}
|
||||
}
|
||||
return s;
|
||||
|
@ -157,15 +157,15 @@ final public class BasicAutomata {
|
|||
* Constructs sub-automaton corresponding to decimal numbers of value at most
|
||||
* x.substring(n) and length x.substring(n).length().
|
||||
*/
|
||||
private static int atMostLight(LightAutomaton.Builder builder, String x, int n) {
|
||||
private static int atMost(Automaton.Builder builder, String x, int n) {
|
||||
int s = builder.createState();
|
||||
if (x.length() == n) {
|
||||
builder.setAccept(s, true);
|
||||
} else {
|
||||
char c = x.charAt(n);
|
||||
builder.addTransition(s, atMostLight(builder, x, (char) n + 1), c);
|
||||
builder.addTransition(s, atMost(builder, x, (char) n + 1), c);
|
||||
if (c > '0') {
|
||||
builder.addTransition(s, anyOfRightLengthLight(builder, x, n + 1), '0', (char) (c - 1));
|
||||
builder.addTransition(s, anyOfRightLength(builder, x, n + 1), '0', (char) (c - 1));
|
||||
}
|
||||
}
|
||||
return s;
|
||||
|
@ -176,7 +176,7 @@ final public class BasicAutomata {
|
|||
* x.substring(n) and y.substring(n) and of length x.substring(n).length()
|
||||
* (which must be equal to y.substring(n).length()).
|
||||
*/
|
||||
private static int betweenLight(LightAutomaton.Builder builder,
|
||||
private static int between(Automaton.Builder builder,
|
||||
String x, String y, int n,
|
||||
Collection<Integer> initials, boolean zeros) {
|
||||
int s = builder.createState();
|
||||
|
@ -189,12 +189,12 @@ final public class BasicAutomata {
|
|||
char cx = x.charAt(n);
|
||||
char cy = y.charAt(n);
|
||||
if (cx == cy) {
|
||||
builder.addTransition(s, betweenLight(builder, x, y, n + 1, initials, zeros && cx == '0'), cx);
|
||||
builder.addTransition(s, between(builder, x, y, n + 1, initials, zeros && cx == '0'), cx);
|
||||
} else { // cx<cy
|
||||
builder.addTransition(s, atLeastLight(builder, x, n + 1, initials, zeros && cx == '0'), cx);
|
||||
builder.addTransition(s, atMostLight(builder, y, n + 1), cy);
|
||||
builder.addTransition(s, atLeast(builder, x, n + 1, initials, zeros && cx == '0'), cx);
|
||||
builder.addTransition(s, atMost(builder, y, n + 1), cy);
|
||||
if (cx + 1 < cy) {
|
||||
builder.addTransition(s, anyOfRightLengthLight(builder, x, n+1), (char) (cx + 1), (char) (cy - 1));
|
||||
builder.addTransition(s, anyOfRightLength(builder, x, n+1), (char) (cx + 1), (char) (cy - 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -216,7 +216,7 @@ final public class BasicAutomata {
|
|||
* interval cannot be expressed with the given fixed number of
|
||||
* digits
|
||||
*/
|
||||
public static LightAutomaton makeIntervalLight(int min, int max, int digits)
|
||||
public static Automaton makeInterval(int min, int max, int digits)
|
||||
throws IllegalArgumentException {
|
||||
String x = Integer.toString(min);
|
||||
String y = Integer.toString(max);
|
||||
|
@ -239,7 +239,7 @@ final public class BasicAutomata {
|
|||
by.append(y);
|
||||
y = by.toString();
|
||||
|
||||
LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
Automaton.Builder builder = new Automaton.Builder();
|
||||
|
||||
if (digits <= 0) {
|
||||
// Reserve the "real" initial state:
|
||||
|
@ -248,9 +248,9 @@ final public class BasicAutomata {
|
|||
|
||||
Collection<Integer> initials = new ArrayList<>();
|
||||
|
||||
betweenLight(builder, x, y, 0, initials, digits <= 0);
|
||||
between(builder, x, y, 0, initials, digits <= 0);
|
||||
|
||||
LightAutomaton a1 = builder.finish();
|
||||
Automaton a1 = builder.finish();
|
||||
|
||||
if (digits <= 0) {
|
||||
a1.addTransition(0, 0, '0');
|
||||
|
@ -267,8 +267,8 @@ final public class BasicAutomata {
|
|||
* Returns a new (deterministic) automaton that accepts the single given
|
||||
* string.
|
||||
*/
|
||||
public static LightAutomaton makeStringLight(String s) {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
public static Automaton makeString(String s) {
|
||||
Automaton a = new Automaton();
|
||||
int lastState = a.createState();
|
||||
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
|
||||
int state = a.createState();
|
||||
|
@ -281,7 +281,7 @@ final public class BasicAutomata {
|
|||
a.finishState();
|
||||
|
||||
assert a.isDeterministic();
|
||||
assert BasicOperations.hasDeadStates(a) == false;
|
||||
assert Operations.hasDeadStates(a) == false;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
@ -290,8 +290,8 @@ final public class BasicAutomata {
|
|||
* Returns a new (deterministic) automaton that accepts the single given
|
||||
* string from the specified unicode code points.
|
||||
*/
|
||||
public static LightAutomaton makeStringLight(int[] word, int offset, int length) {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
public static Automaton makeString(int[] word, int offset, int length) {
|
||||
Automaton a = new Automaton();
|
||||
a.createState();
|
||||
int s = 0;
|
||||
for (int i = offset; i < offset+length; i++) {
|
||||
|
@ -318,11 +318,11 @@ final public class BasicAutomata {
|
|||
* automaton is codepoint based (full unicode codepoints on
|
||||
* transitions).
|
||||
*/
|
||||
public static LightAutomaton makeStringUnionLight(Collection<BytesRef> utf8Strings) {
|
||||
public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) {
|
||||
if (utf8Strings.isEmpty()) {
|
||||
return makeEmptyLight();
|
||||
return makeEmpty();
|
||||
} else {
|
||||
return DaciukMihovAutomatonBuilderLight.build(utf8Strings);
|
||||
return DaciukMihovAutomatonBuilder.build(utf8Strings);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.Sorter;
|
||||
|
||||
// nocommit make tests that do the same ops w/ old and new and assertSameLang
|
||||
|
||||
// TODO
|
||||
// - could use packed int arrays instead
|
||||
|
@ -40,9 +39,7 @@ import org.apache.lucene.util.Sorter;
|
|||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
// nocommit rename to Automaton once everything is cutover
|
||||
|
||||
public class LightAutomaton {
|
||||
public class Automaton {
|
||||
private int nextState;
|
||||
|
||||
/** Where we next write to in int[] transitions; this
|
||||
|
@ -168,7 +165,7 @@ public class LightAutomaton {
|
|||
|
||||
/** Copies over all states/transitions from other. The states numbers
|
||||
* are sequentially assigned (appended). */
|
||||
public void copy(LightAutomaton other) {
|
||||
public void copy(Automaton other) {
|
||||
|
||||
int offset = getNumStates();
|
||||
/*
|
||||
|
@ -316,10 +313,10 @@ public class LightAutomaton {
|
|||
finishCurrentState();
|
||||
curState = -1;
|
||||
}
|
||||
// nocommit downsize the arrays?
|
||||
//assert getNumStates() > 0;
|
||||
}
|
||||
|
||||
// TODO: add finish() to shrink wrap the arrays?
|
||||
|
||||
public int getNumStates() {
|
||||
return nextState/2;
|
||||
}
|
||||
|
@ -511,8 +508,9 @@ public class LightAutomaton {
|
|||
}
|
||||
}
|
||||
|
||||
public LightAutomaton totalize() {
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
// nocommit move to Operations
|
||||
public Automaton totalize() {
|
||||
Automaton result = new Automaton();
|
||||
int numStates = getNumStates();
|
||||
for(int i=0;i<numStates;i++) {
|
||||
result.createState();
|
||||
|
@ -666,13 +664,13 @@ public class LightAutomaton {
|
|||
}
|
||||
|
||||
/** Records new states and transitions and then {@link
|
||||
* #finish} creates the {@link LightAutomaton}. Use this
|
||||
* #finish} creates the {@link Automaton}. Use this
|
||||
* when it's too restrictive to have to add all transitions
|
||||
* leaving each state at once. */
|
||||
public static class Builder {
|
||||
private int[] transitions = new int[4];
|
||||
private int nextTransition;
|
||||
private final LightAutomaton a = new LightAutomaton();
|
||||
private final Automaton a = new Automaton();
|
||||
|
||||
public void addTransition(int from, int to, int label) {
|
||||
addTransition(from, to, label, label);
|
||||
|
@ -753,7 +751,7 @@ public class LightAutomaton {
|
|||
}
|
||||
};
|
||||
|
||||
public LightAutomaton finish() {
|
||||
public Automaton finish() {
|
||||
//System.out.println("LA.Builder.finish: count=" + (nextTransition/4));
|
||||
// nocommit: we could make this more efficient,
|
||||
// e.g. somehow xfer the int[] to the automaton, or
|
||||
|
@ -790,7 +788,7 @@ public class LightAutomaton {
|
|||
}
|
||||
|
||||
/** Copies over all states/transitions from other. */
|
||||
public void copy(LightAutomaton other) {
|
||||
public void copy(Automaton other) {
|
||||
int offset = getNumStates();
|
||||
int otherNumStates = other.getNumStates();
|
||||
for(int s=0;s<otherNumStates;s++) {
|
|
@ -37,7 +37,7 @@ import java.io.IOException;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface LightAutomatonProvider {
|
||||
public interface AutomatonProvider {
|
||||
|
||||
/**
|
||||
* Returns automaton of the given name.
|
||||
|
@ -46,5 +46,5 @@ public interface LightAutomatonProvider {
|
|||
* @return automaton
|
||||
* @throws IOException if errors occur
|
||||
*/
|
||||
public LightAutomaton getAutomaton(String name) throws IOException;
|
||||
public Automaton getAutomaton(String name) throws IOException;
|
||||
}
|
|
@ -23,13 +23,13 @@ package org.apache.lucene.util.automaton;
|
|||
public class ByteRunAutomaton extends RunAutomaton {
|
||||
|
||||
/** Converts incoming automaton to byte-based (UTF32ToUTF8) first */
|
||||
public ByteRunAutomaton(LightAutomaton a) {
|
||||
public ByteRunAutomaton(Automaton a) {
|
||||
this(a, false);
|
||||
}
|
||||
|
||||
/** expert: if utf8 is true, the input is already byte-based */
|
||||
public ByteRunAutomaton(LightAutomaton a, boolean utf8) {
|
||||
super(utf8 ? a : new UTF32ToUTF8Light().convert(a), 256, true);
|
||||
public ByteRunAutomaton(Automaton a, boolean utf8) {
|
||||
super(utf8 ? a : new UTF32ToUTF8().convert(a), 256, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -23,7 +23,7 @@ package org.apache.lucene.util.automaton;
|
|||
public class CharacterRunAutomaton extends RunAutomaton {
|
||||
|
||||
/** Sole constructor. */
|
||||
public CharacterRunAutomaton(LightAutomaton a) {
|
||||
public CharacterRunAutomaton(Automaton a) {
|
||||
super(a, Character.MAX_CODE_POINT, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -71,13 +71,15 @@ public class CompiledAutomaton {
|
|||
* {@link #runAutomaton}.
|
||||
* Only valid for {@link AUTOMATON_TYPE#NORMAL}.
|
||||
*/
|
||||
public final LightAutomaton lightAutomaton;
|
||||
public final Automaton automaton;
|
||||
|
||||
/**
|
||||
* Shared common suffix accepted by the automaton. Only valid
|
||||
* for {@link AUTOMATON_TYPE#NORMAL}, and only when the
|
||||
* automaton accepts an infinite language.
|
||||
*/
|
||||
public final BytesRef commonSuffixRef;
|
||||
|
||||
/**
|
||||
* Indicates if the automaton accepts a finite set of strings.
|
||||
* Null if this was not computed.
|
||||
|
@ -85,11 +87,11 @@ public class CompiledAutomaton {
|
|||
*/
|
||||
public final Boolean finite;
|
||||
|
||||
public CompiledAutomaton(LightAutomaton automaton) {
|
||||
public CompiledAutomaton(Automaton automaton) {
|
||||
this(automaton, null, true);
|
||||
}
|
||||
|
||||
public CompiledAutomaton(LightAutomaton automaton, Boolean finite, boolean simplify) {
|
||||
public CompiledAutomaton(Automaton automaton, Boolean finite, boolean simplify) {
|
||||
|
||||
if (simplify) {
|
||||
|
||||
|
@ -97,33 +99,33 @@ public class CompiledAutomaton {
|
|||
// if so, don't create a runAutomaton. Note that on a
|
||||
// large automaton these tests could be costly:
|
||||
|
||||
if (BasicOperations.isEmpty(automaton)) {
|
||||
if (Operations.isEmpty(automaton)) {
|
||||
// matches nothing
|
||||
type = AUTOMATON_TYPE.NONE;
|
||||
term = null;
|
||||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
lightAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
return;
|
||||
// NOTE: only approximate, because automaton may not be minimal:
|
||||
} else if (BasicOperations.isTotal(automaton)) {
|
||||
} else if (Operations.isTotal(automaton)) {
|
||||
// matches all possible strings
|
||||
type = AUTOMATON_TYPE.ALL;
|
||||
term = null;
|
||||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
lightAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
return;
|
||||
} else {
|
||||
|
||||
automaton = BasicOperations.determinize(automaton);
|
||||
automaton = Operations.determinize(automaton);
|
||||
|
||||
final String commonPrefix = SpecialOperations.getCommonPrefix(automaton);
|
||||
final String commonPrefix = Operations.getCommonPrefix(automaton);
|
||||
final String singleton;
|
||||
|
||||
if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeStringLight(commonPrefix))) {
|
||||
if (commonPrefix.length() > 0 && Operations.sameLanguage(automaton, Automata.makeString(commonPrefix))) {
|
||||
singleton = commonPrefix;
|
||||
} else {
|
||||
singleton = null;
|
||||
|
@ -135,20 +137,20 @@ public class CompiledAutomaton {
|
|||
term = new BytesRef(singleton);
|
||||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
lightAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
return;
|
||||
} else if (commonPrefix.length() > 0) {
|
||||
LightAutomaton other = BasicOperations.concatenateLight(BasicAutomata.makeStringLight(commonPrefix), BasicAutomata.makeAnyStringLight());
|
||||
other = BasicOperations.determinize(other);
|
||||
assert BasicOperations.hasDeadStates(other) == false;
|
||||
if (BasicOperations.sameLanguage(automaton, other)) {
|
||||
Automaton other = Operations.concatenate(Automata.makeString(commonPrefix), Automata.makeAnyString());
|
||||
other = Operations.determinize(other);
|
||||
assert Operations.hasDeadStates(other) == false;
|
||||
if (Operations.sameLanguage(automaton, other)) {
|
||||
// matches a constant prefix
|
||||
type = AUTOMATON_TYPE.PREFIX;
|
||||
term = new BytesRef(commonPrefix);
|
||||
commonSuffixRef = null;
|
||||
runAutomaton = null;
|
||||
lightAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
return;
|
||||
}
|
||||
|
@ -160,20 +162,20 @@ public class CompiledAutomaton {
|
|||
term = null;
|
||||
|
||||
if (finite == null) {
|
||||
this.finite = SpecialOperations.isFinite(automaton);
|
||||
this.finite = Operations.isFinite(automaton);
|
||||
} else {
|
||||
this.finite = finite;
|
||||
}
|
||||
|
||||
LightAutomaton utf8 = new UTF32ToUTF8Light().convert(automaton);
|
||||
Automaton utf8 = new UTF32ToUTF8().convert(automaton);
|
||||
if (this.finite) {
|
||||
commonSuffixRef = null;
|
||||
} else {
|
||||
commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(utf8);
|
||||
commonSuffixRef = Operations.getCommonSuffixBytesRef(utf8);
|
||||
}
|
||||
runAutomaton = new ByteRunAutomaton(utf8, true);
|
||||
|
||||
lightAutomaton = runAutomaton.automaton;
|
||||
this.automaton = runAutomaton.automaton;
|
||||
}
|
||||
|
||||
private Transition transition = new Transition();
|
||||
|
@ -182,13 +184,13 @@ public class CompiledAutomaton {
|
|||
|
||||
private BytesRef addTail(int state, BytesRef term, int idx, int leadLabel) {
|
||||
//System.out.println("addTail state=" + state + " term=" + term.utf8ToString() + " idx=" + idx + " leadLabel=" + (char) leadLabel);
|
||||
//System.out.println(lightAutomaton.toDot());
|
||||
//System.out.println(automaton.toDot());
|
||||
// Find biggest transition that's < label
|
||||
// TODO: use binary search here
|
||||
int maxIndex = -1;
|
||||
int numTransitions = lightAutomaton.initTransition(state, transition);
|
||||
int numTransitions = automaton.initTransition(state, transition);
|
||||
for(int i=0;i<numTransitions;i++) {
|
||||
lightAutomaton.getNextTransition(transition);
|
||||
automaton.getNextTransition(transition);
|
||||
if (transition.min < leadLabel) {
|
||||
maxIndex = i;
|
||||
} else {
|
||||
|
@ -200,7 +202,7 @@ public class CompiledAutomaton {
|
|||
//System.out.println(" maxIndex=" + maxIndex);
|
||||
|
||||
assert maxIndex != -1;
|
||||
lightAutomaton.getTransition(state, maxIndex, transition);
|
||||
automaton.getTransition(state, maxIndex, transition);
|
||||
|
||||
// Append floorLabel
|
||||
final int floorLabel;
|
||||
|
@ -222,7 +224,7 @@ public class CompiledAutomaton {
|
|||
|
||||
// Push down to last accept state
|
||||
while (true) {
|
||||
numTransitions = lightAutomaton.getNumTransitions(state);
|
||||
numTransitions = automaton.getNumTransitions(state);
|
||||
if (numTransitions == 0) {
|
||||
//System.out.println("state=" + state + " 0 trans");
|
||||
assert runAutomaton.isAccept(state);
|
||||
|
@ -233,7 +235,7 @@ public class CompiledAutomaton {
|
|||
// We are pushing "top" -- so get last label of
|
||||
// last transition:
|
||||
//System.out.println("get state=" + state + " numTrans=" + numTransitions);
|
||||
lightAutomaton.getTransition(state, numTransitions-1, transition);
|
||||
automaton.getTransition(state, numTransitions-1, transition);
|
||||
if (idx >= term.bytes.length) {
|
||||
term.grow(1+idx);
|
||||
}
|
||||
|
@ -321,14 +323,14 @@ public class CompiledAutomaton {
|
|||
// Pop back to a state that has a transition
|
||||
// <= our label:
|
||||
while (true) {
|
||||
int numTransitions = lightAutomaton.getNumTransitions(state);
|
||||
int numTransitions = automaton.getNumTransitions(state);
|
||||
if (numTransitions == 0) {
|
||||
assert runAutomaton.isAccept(state);
|
||||
output.length = idx;
|
||||
//if (DEBUG) System.out.println(" return " + output.utf8ToString());
|
||||
return output;
|
||||
} else {
|
||||
lightAutomaton.getTransition(state, 0, transition);
|
||||
automaton.getTransition(state, 0, transition);
|
||||
|
||||
if (label-1 < transition.min) {
|
||||
|
||||
|
@ -374,15 +376,15 @@ public class CompiledAutomaton {
|
|||
StringBuilder b = new StringBuilder("digraph CompiledAutomaton {\n");
|
||||
b.append(" rankdir = LR;\n");
|
||||
int initial = 0;
|
||||
for (int i = 0; i < lightAutomaton.getNumStates(); i++) {
|
||||
for (int i = 0; i < automaton.getNumStates(); i++) {
|
||||
b.append(" ").append(i);
|
||||
if (lightAutomaton.isAccept(i)) b.append(" [shape=doublecircle,label=\"\"];\n");
|
||||
if (automaton.isAccept(i)) b.append(" [shape=doublecircle,label=\"\"];\n");
|
||||
else b.append(" [shape=circle,label=\"\"];\n");
|
||||
if (i == 0) {
|
||||
b.append(" initial [shape=plaintext,label=\"\"];\n");
|
||||
b.append(" initial -> ").append(i).append("\n");
|
||||
}
|
||||
int numTransitions = lightAutomaton.initTransition(i, transition);
|
||||
int numTransitions = automaton.initTransition(i, transition);
|
||||
for (int j = 0; j < numTransitions; j++) {
|
||||
b.append(" ").append(i);
|
||||
b.append(" -> ");
|
||||
|
@ -392,7 +394,7 @@ public class CompiledAutomaton {
|
|||
b.append("-");
|
||||
b.append(transition.max);
|
||||
}
|
||||
lightAutomaton.getNextTransition(transition);
|
||||
automaton.getNextTransition(transition);
|
||||
}
|
||||
}
|
||||
return b.append("}\n").toString();
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
* @see #build(Collection)
|
||||
* @see BasicAutomata#makeStringUnion(Collection)
|
||||
*/
|
||||
final class DaciukMihovAutomatonBuilderLight {
|
||||
final class DaciukMihovAutomatonBuilder {
|
||||
/**
|
||||
* DFSA state with <code>char</code> labels on transitions.
|
||||
*/
|
||||
|
@ -249,7 +249,7 @@ final class DaciukMihovAutomatonBuilderLight {
|
|||
/**
|
||||
* Internal recursive traversal for conversion.
|
||||
*/
|
||||
private static int convert(LightAutomaton.Builder a, State s,
|
||||
private static int convert(Automaton.Builder a, State s,
|
||||
IdentityHashMap<State,Integer> visited) {
|
||||
|
||||
Integer converted = visited.get(s);
|
||||
|
@ -263,7 +263,7 @@ final class DaciukMihovAutomatonBuilderLight {
|
|||
visited.put(s, converted);
|
||||
int i = 0;
|
||||
int[] labels = s.labels;
|
||||
for (DaciukMihovAutomatonBuilderLight.State target : s.states) {
|
||||
for (DaciukMihovAutomatonBuilder.State target : s.states) {
|
||||
a.addTransition(converted, convert(a, target, visited), labels[i++]);
|
||||
}
|
||||
|
||||
|
@ -274,8 +274,8 @@ final class DaciukMihovAutomatonBuilderLight {
|
|||
* Build a minimal, deterministic automaton from a sorted list of {@link BytesRef} representing
|
||||
* strings in UTF-8. These strings must be binary-sorted.
|
||||
*/
|
||||
public static LightAutomaton build(Collection<BytesRef> input) {
|
||||
final DaciukMihovAutomatonBuilderLight builder = new DaciukMihovAutomatonBuilderLight();
|
||||
public static Automaton build(Collection<BytesRef> input) {
|
||||
final DaciukMihovAutomatonBuilder builder = new DaciukMihovAutomatonBuilder();
|
||||
|
||||
CharsRef scratch = new CharsRef();
|
||||
for (BytesRef b : input) {
|
||||
|
@ -283,7 +283,7 @@ final class DaciukMihovAutomatonBuilderLight {
|
|||
builder.add(scratch);
|
||||
}
|
||||
|
||||
LightAutomaton.Builder a = new LightAutomaton.Builder();
|
||||
Automaton.Builder a = new Automaton.Builder();
|
||||
convert(a,
|
||||
builder.complete(),
|
||||
new IdentityHashMap<State,Integer>());
|
|
@ -126,14 +126,14 @@ public class LevenshteinAutomata {
|
|||
* </ul>
|
||||
* </p>
|
||||
*/
|
||||
public LightAutomaton toAutomaton(int n) {
|
||||
public Automaton toAutomaton(int n) {
|
||||
return toAutomaton(n, "");
|
||||
}
|
||||
|
||||
public LightAutomaton toAutomaton(int n, String prefix) {
|
||||
public Automaton toAutomaton(int n, String prefix) {
|
||||
assert prefix != null;
|
||||
if (n == 0) {
|
||||
return BasicAutomata.makeStringLight(prefix + UnicodeUtil.newString(word, 0, word.length));
|
||||
return Automata.makeString(prefix + UnicodeUtil.newString(word, 0, word.length));
|
||||
}
|
||||
|
||||
if (n >= descriptions.length)
|
||||
|
@ -144,7 +144,7 @@ public class LevenshteinAutomata {
|
|||
// the number of states is based on the length of the word and n
|
||||
int numStates = description.size();
|
||||
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int lastState;
|
||||
if (prefix != null) {
|
||||
// Insert prefix
|
||||
|
|
|
@ -39,9 +39,9 @@ import java.util.LinkedList;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final public class MinimizationOperationsLight {
|
||||
final public class MinimizationOperations {
|
||||
|
||||
private MinimizationOperationsLight() {}
|
||||
private MinimizationOperations() {}
|
||||
|
||||
/**
|
||||
* Minimizes (and determinizes if not already deterministic) the given
|
||||
|
@ -49,19 +49,19 @@ final public class MinimizationOperationsLight {
|
|||
*
|
||||
* @see Automaton#setMinimization(int)
|
||||
*/
|
||||
public static LightAutomaton minimize(LightAutomaton a) {
|
||||
public static Automaton minimize(Automaton a) {
|
||||
return minimizeHopcroft(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimizes the given automaton using Hopcroft's algorithm.
|
||||
*/
|
||||
public static LightAutomaton minimizeHopcroft(LightAutomaton a) {
|
||||
public static Automaton minimizeHopcroft(Automaton a) {
|
||||
if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) {
|
||||
// Fastmatch for common case
|
||||
return new LightAutomaton();
|
||||
return new Automaton();
|
||||
}
|
||||
a = BasicOperations.determinize(a);
|
||||
a = Operations.determinize(a);
|
||||
//a.writeDot("adet");
|
||||
if (a.getNumTransitions(0) == 1) {
|
||||
Transition t = new Transition();
|
||||
|
@ -200,7 +200,7 @@ final public class MinimizationOperationsLight {
|
|||
refine.clear();
|
||||
}
|
||||
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
Automaton result = new Automaton();
|
||||
|
||||
Transition t = new Transition();
|
||||
|
||||
|
@ -254,7 +254,7 @@ final public class MinimizationOperationsLight {
|
|||
result.finishState();
|
||||
//System.out.println(result.getNumStates() + " states");
|
||||
|
||||
return BasicOperations.removeDeadStates(result);
|
||||
return Operations.removeDeadStates(result);
|
||||
}
|
||||
|
||||
static final class IntPair {
|
|
@ -41,17 +41,18 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Basic automata operations.
|
||||
* Automata operations.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final public class BasicOperations {
|
||||
final public class Operations {
|
||||
|
||||
private BasicOperations() {}
|
||||
private Operations() {}
|
||||
|
||||
/**
|
||||
* Returns an automaton that accepts the concatenation of the languages of the
|
||||
|
@ -59,8 +60,8 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in total number of states.
|
||||
*/
|
||||
static public LightAutomaton concatenateLight(LightAutomaton a1, LightAutomaton a2) {
|
||||
return concatenateLight(Arrays.asList(a1, a2));
|
||||
static public Automaton concatenate(Automaton a1, Automaton a2) {
|
||||
return concatenate(Arrays.asList(a1, a2));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -69,11 +70,11 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in total number of states.
|
||||
*/
|
||||
static public LightAutomaton concatenateLight(List<LightAutomaton> l) {
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
static public Automaton concatenate(List<Automaton> l) {
|
||||
Automaton result = new Automaton();
|
||||
|
||||
// First pass: create all states
|
||||
for(LightAutomaton a : l) {
|
||||
for(Automaton a : l) {
|
||||
if (a.getNumStates() == 0) {
|
||||
result.finishState();
|
||||
return result;
|
||||
|
@ -89,10 +90,10 @@ final public class BasicOperations {
|
|||
int stateOffset = 0;
|
||||
Transition t = new Transition();
|
||||
for(int i=0;i<l.size();i++) {
|
||||
LightAutomaton a = l.get(i);
|
||||
Automaton a = l.get(i);
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
LightAutomaton nextA = (i == l.size()-1) ? null : l.get(i+1);
|
||||
Automaton nextA = (i == l.size()-1) ? null : l.get(i+1);
|
||||
|
||||
for(int s=0;s<numStates;s++) {
|
||||
int numTransitions = a.initTransition(s, t);
|
||||
|
@ -102,7 +103,7 @@ final public class BasicOperations {
|
|||
}
|
||||
|
||||
if (a.isAccept(s)) {
|
||||
LightAutomaton followA = nextA;
|
||||
Automaton followA = nextA;
|
||||
int followOffset = stateOffset;
|
||||
int upto = i+1;
|
||||
while (true) {
|
||||
|
@ -147,8 +148,8 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in number of states.
|
||||
*/
|
||||
static public LightAutomaton optionalLight(LightAutomaton a) {
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
static public Automaton optional(Automaton a) {
|
||||
Automaton result = new Automaton();
|
||||
result.createState();
|
||||
result.setAccept(0, true);
|
||||
if (a.getNumStates() > 0) {
|
||||
|
@ -166,8 +167,8 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in number of states.
|
||||
*/
|
||||
static public LightAutomaton repeatLight(LightAutomaton a) {
|
||||
LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
static public Automaton repeat(Automaton a) {
|
||||
Automaton.Builder builder = new Automaton.Builder();
|
||||
builder.createState();
|
||||
builder.setAccept(0, true);
|
||||
builder.copy(a);
|
||||
|
@ -199,16 +200,16 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in number of states and in <code>min</code>.
|
||||
*/
|
||||
static public LightAutomaton repeatLight(LightAutomaton a, int min) {
|
||||
static public Automaton repeat(Automaton a, int min) {
|
||||
if (min == 0) {
|
||||
return repeatLight(a);
|
||||
return repeat(a);
|
||||
}
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
while (min-- > 0) {
|
||||
as.add(a);
|
||||
}
|
||||
as.add(repeatLight(a));
|
||||
return concatenateLight(as);
|
||||
as.add(repeat(a));
|
||||
return concatenate(as);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -219,23 +220,23 @@ final public class BasicOperations {
|
|||
* Complexity: linear in number of states and in <code>min</code> and
|
||||
* <code>max</code>.
|
||||
*/
|
||||
static public LightAutomaton repeatLight(LightAutomaton a, int min, int max) {
|
||||
static public Automaton repeat(Automaton a, int min, int max) {
|
||||
if (min > max) {
|
||||
return BasicAutomata.makeEmptyLight();
|
||||
return Automata.makeEmpty();
|
||||
}
|
||||
|
||||
LightAutomaton b;
|
||||
Automaton b;
|
||||
if (min == 0) {
|
||||
b = BasicAutomata.makeEmptyStringLight();
|
||||
b = Automata.makeEmptyString();
|
||||
} else if (min == 1) {
|
||||
b = new LightAutomaton();
|
||||
b = new Automaton();
|
||||
b.copy(a);
|
||||
} else {
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
for(int i=0;i<min;i++) {
|
||||
as.add(a);
|
||||
}
|
||||
b = concatenateLight(as);
|
||||
b = concatenate(as);
|
||||
}
|
||||
|
||||
Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
|
||||
|
@ -263,7 +264,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in number of states (if already deterministic).
|
||||
*/
|
||||
static public LightAutomaton complementLight(LightAutomaton a) {
|
||||
static public Automaton complement(Automaton a) {
|
||||
a = determinize(a).totalize();
|
||||
int numStates = a.getNumStates();
|
||||
for (int p=0;p<numStates;p++) {
|
||||
|
@ -280,14 +281,14 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: quadratic in number of states (if already deterministic).
|
||||
*/
|
||||
static public LightAutomaton minusLight(LightAutomaton a1, LightAutomaton a2) {
|
||||
if (BasicOperations.isEmpty(a1) || a1 == a2) {
|
||||
return BasicAutomata.makeEmptyLight();
|
||||
static public Automaton minus(Automaton a1, Automaton a2) {
|
||||
if (Operations.isEmpty(a1) || a1 == a2) {
|
||||
return Automata.makeEmpty();
|
||||
}
|
||||
if (BasicOperations.isEmpty(a2)) {
|
||||
if (Operations.isEmpty(a2)) {
|
||||
return a1;
|
||||
}
|
||||
return intersectionLight(a1, complementLight(a2));
|
||||
return intersection(a1, complement(a2));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -296,7 +297,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: quadratic in number of states.
|
||||
*/
|
||||
static public LightAutomaton intersectionLight(LightAutomaton a1, LightAutomaton a2) {
|
||||
static public Automaton intersection(Automaton a1, Automaton a2) {
|
||||
if (a1 == a2) {
|
||||
return a1;
|
||||
}
|
||||
|
@ -308,11 +309,11 @@ final public class BasicOperations {
|
|||
}
|
||||
Transition[][] transitions1 = a1.getSortedTransitions();
|
||||
Transition[][] transitions2 = a2.getSortedTransitions();
|
||||
LightAutomaton c = new LightAutomaton();
|
||||
Automaton c = new Automaton();
|
||||
c.createState();
|
||||
LinkedList<LightStatePair> worklist = new LinkedList<>();
|
||||
HashMap<LightStatePair,LightStatePair> newstates = new HashMap<>();
|
||||
LightStatePair p = new LightStatePair(0, 0, 0);
|
||||
LinkedList<StatePair> worklist = new LinkedList<>();
|
||||
HashMap<StatePair,StatePair> newstates = new HashMap<>();
|
||||
StatePair p = new StatePair(0, 0, 0);
|
||||
worklist.add(p);
|
||||
newstates.put(p, p);
|
||||
while (worklist.size() > 0) {
|
||||
|
@ -325,8 +326,8 @@ final public class BasicOperations {
|
|||
b2++;
|
||||
for (int n2 = b2; n2 < t2.length && t1[n1].max >= t2[n2].min; n2++)
|
||||
if (t2[n2].max >= t1[n1].min) {
|
||||
LightStatePair q = new LightStatePair(t1[n1].dest, t2[n2].dest);
|
||||
LightStatePair r = newstates.get(q);
|
||||
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
|
||||
StatePair r = newstates.get(q);
|
||||
if (r == null) {
|
||||
q.s = c.createState();
|
||||
worklist.add(q);
|
||||
|
@ -349,7 +350,7 @@ final public class BasicOperations {
|
|||
* also that a1 and a2 will be determinized as a side
|
||||
* effect. Both automata must be determinized and have
|
||||
* no dead states! */
|
||||
public static boolean sameLanguage(LightAutomaton a1, LightAutomaton a2) {
|
||||
public static boolean sameLanguage(Automaton a1, Automaton a2) {
|
||||
if (a1 == a2) {
|
||||
return true;
|
||||
}
|
||||
|
@ -360,7 +361,7 @@ final public class BasicOperations {
|
|||
/** Returns true if this automaton has any states that cannot
|
||||
* be reached from the initial state or cannot reach an accept state.
|
||||
* Cost is O(numTransitions+numStates). */
|
||||
public static boolean hasDeadStates(LightAutomaton a) {
|
||||
public static boolean hasDeadStates(Automaton a) {
|
||||
BitSet liveStates = getLiveStates(a);
|
||||
int numLive = liveStates.cardinality();
|
||||
int numStates = a.getNumStates();
|
||||
|
@ -370,7 +371,7 @@ final public class BasicOperations {
|
|||
|
||||
// TODO: move to test-framework?
|
||||
/** Returns true if there are dead states reachable from an initial state. */
|
||||
public static boolean hasDeadStatesFromInitial(LightAutomaton a) {
|
||||
public static boolean hasDeadStatesFromInitial(Automaton a) {
|
||||
BitSet reachableFromInitial = getLiveStatesFromInitial(a);
|
||||
BitSet reachableFromAccept = getLiveStatesToAccept(a);
|
||||
reachableFromInitial.andNot(reachableFromAccept);
|
||||
|
@ -379,7 +380,7 @@ final public class BasicOperations {
|
|||
|
||||
// TODO: move to test-framework?
|
||||
/** Returns true if there are dead states that reach an accept state. */
|
||||
public static boolean hasDeadStatesToAccept(LightAutomaton a) {
|
||||
public static boolean hasDeadStatesToAccept(Automaton a) {
|
||||
BitSet reachableFromInitial = getLiveStatesFromInitial(a);
|
||||
BitSet reachableFromAccept = getLiveStatesToAccept(a);
|
||||
reachableFromAccept.andNot(reachableFromInitial);
|
||||
|
@ -393,7 +394,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: quadratic in number of states.
|
||||
*/
|
||||
public static boolean subsetOf(LightAutomaton a1, LightAutomaton a2) {
|
||||
public static boolean subsetOf(Automaton a1, Automaton a2) {
|
||||
if (a1.isDeterministic() == false) {
|
||||
throw new IllegalArgumentException("a1 must be deterministic");
|
||||
}
|
||||
|
@ -412,9 +413,9 @@ final public class BasicOperations {
|
|||
// TODO: cutover to iterators instead
|
||||
Transition[][] transitions1 = a1.getSortedTransitions();
|
||||
Transition[][] transitions2 = a2.getSortedTransitions();
|
||||
LinkedList<LightStatePair> worklist = new LinkedList<>();
|
||||
HashSet<LightStatePair> visited = new HashSet<>();
|
||||
LightStatePair p = new LightStatePair(0, 0);
|
||||
LinkedList<StatePair> worklist = new LinkedList<>();
|
||||
HashSet<StatePair> visited = new HashSet<>();
|
||||
StatePair p = new StatePair(0, 0);
|
||||
worklist.add(p);
|
||||
visited.add(p);
|
||||
while (worklist.size() > 0) {
|
||||
|
@ -440,7 +441,7 @@ final public class BasicOperations {
|
|||
min1 = Character.MAX_CODE_POINT;
|
||||
max1 = Character.MIN_CODE_POINT;
|
||||
}
|
||||
LightStatePair q = new LightStatePair(t1[n1].dest, t2[n2].dest);
|
||||
StatePair q = new StatePair(t1[n1].dest, t2[n2].dest);
|
||||
if (!visited.contains(q)) {
|
||||
worklist.add(q);
|
||||
visited.add(q);
|
||||
|
@ -460,25 +461,25 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Complexity: linear in number of states.
|
||||
*/
|
||||
public static LightAutomaton unionLight(LightAutomaton a1, LightAutomaton a2) {
|
||||
return unionLight(Arrays.asList(a1, a2));
|
||||
public static Automaton union(Automaton a1, Automaton a2) {
|
||||
return union(Arrays.asList(a1, a2));
|
||||
}
|
||||
|
||||
public static LightAutomaton unionLight(Collection<LightAutomaton> l) {
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
public static Automaton union(Collection<Automaton> l) {
|
||||
Automaton result = new Automaton();
|
||||
|
||||
// Create initial state:
|
||||
result.createState();
|
||||
|
||||
// Copy over all automata
|
||||
Transition t = new Transition();
|
||||
for(LightAutomaton a : l) {
|
||||
for(Automaton a : l) {
|
||||
result.copy(a);
|
||||
}
|
||||
|
||||
// Add epsilon transition from new initial state
|
||||
int stateOffset = 1;
|
||||
for(LightAutomaton a : l) {
|
||||
for(Automaton a : l) {
|
||||
if (a.getNumStates() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -492,7 +493,7 @@ final public class BasicOperations {
|
|||
}
|
||||
|
||||
// Simple custom ArrayList<Transition>
|
||||
private final static class TransitionListLight {
|
||||
private final static class TransitionList {
|
||||
// dest, min, max
|
||||
int[] transitions = new int[3];
|
||||
int next;
|
||||
|
@ -510,13 +511,13 @@ final public class BasicOperations {
|
|||
|
||||
// Holds all transitions that start on this int point, or
|
||||
// end at this point-1
|
||||
private final static class PointTransitionsLight implements Comparable<PointTransitionsLight> {
|
||||
private final static class PointTransitions implements Comparable<PointTransitions> {
|
||||
int point;
|
||||
final TransitionListLight ends = new TransitionListLight();
|
||||
final TransitionListLight starts = new TransitionListLight();
|
||||
final TransitionList ends = new TransitionList();
|
||||
final TransitionList starts = new TransitionList();
|
||||
|
||||
@Override
|
||||
public int compareTo(PointTransitionsLight other) {
|
||||
public int compareTo(PointTransitions other) {
|
||||
return point - other.point;
|
||||
}
|
||||
|
||||
|
@ -528,7 +529,7 @@ final public class BasicOperations {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return ((PointTransitionsLight) other).point == point;
|
||||
return ((PointTransitions) other).point == point;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -537,34 +538,34 @@ final public class BasicOperations {
|
|||
}
|
||||
}
|
||||
|
||||
private final static class PointTransitionSetLight {
|
||||
private final static class PointTransitionSet {
|
||||
int count;
|
||||
PointTransitionsLight[] points = new PointTransitionsLight[5];
|
||||
PointTransitions[] points = new PointTransitions[5];
|
||||
|
||||
private final static int HASHMAP_CUTOVER = 30;
|
||||
private final HashMap<Integer,PointTransitionsLight> map = new HashMap<>();
|
||||
private final HashMap<Integer,PointTransitions> map = new HashMap<>();
|
||||
private boolean useHash = false;
|
||||
|
||||
private PointTransitionsLight next(int point) {
|
||||
private PointTransitions next(int point) {
|
||||
// 1st time we are seeing this point
|
||||
if (count == points.length) {
|
||||
final PointTransitionsLight[] newArray = new PointTransitionsLight[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
final PointTransitions[] newArray = new PointTransitions[ArrayUtil.oversize(1+count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(points, 0, newArray, 0, count);
|
||||
points = newArray;
|
||||
}
|
||||
PointTransitionsLight points0 = points[count];
|
||||
PointTransitions points0 = points[count];
|
||||
if (points0 == null) {
|
||||
points0 = points[count] = new PointTransitionsLight();
|
||||
points0 = points[count] = new PointTransitions();
|
||||
}
|
||||
points0.reset(point);
|
||||
count++;
|
||||
return points0;
|
||||
}
|
||||
|
||||
private PointTransitionsLight find(int point) {
|
||||
private PointTransitions find(int point) {
|
||||
if (useHash) {
|
||||
final Integer pi = point;
|
||||
PointTransitionsLight p = map.get(pi);
|
||||
PointTransitions p = map.get(pi);
|
||||
if (p == null) {
|
||||
p = next(point);
|
||||
map.put(pi, p);
|
||||
|
@ -577,7 +578,7 @@ final public class BasicOperations {
|
|||
}
|
||||
}
|
||||
|
||||
final PointTransitionsLight p = next(point);
|
||||
final PointTransitions p = next(point);
|
||||
if (count == HASHMAP_CUTOVER) {
|
||||
// switch to HashMap on the fly
|
||||
assert map.size() == 0;
|
||||
|
@ -626,7 +627,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* Worst case complexity: exponential in number of states.
|
||||
*/
|
||||
public static LightAutomaton determinize(LightAutomaton a) {
|
||||
public static Automaton determinize(Automaton a) {
|
||||
if (a.isDeterministic()) {
|
||||
// Already determinized
|
||||
return a;
|
||||
|
@ -637,18 +638,18 @@ final public class BasicOperations {
|
|||
}
|
||||
|
||||
// subset construction
|
||||
LightAutomaton.Builder b = new LightAutomaton.Builder();
|
||||
Automaton.Builder b = new Automaton.Builder();
|
||||
|
||||
//System.out.println("DET:");
|
||||
//a.writeDot("/l/la/lucene/core/detin.dot");
|
||||
|
||||
SortedIntSetLight.FrozenIntSetLight initialset = new SortedIntSetLight.FrozenIntSetLight(0, 0);
|
||||
SortedIntSet.FrozenIntSet initialset = new SortedIntSet.FrozenIntSet(0, 0);
|
||||
|
||||
// Create state 0:
|
||||
b.createState();
|
||||
|
||||
LinkedList<SortedIntSetLight.FrozenIntSetLight> worklist = new LinkedList<>();
|
||||
Map<SortedIntSetLight.FrozenIntSetLight,Integer> newstate = new HashMap<>();
|
||||
LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<>();
|
||||
Map<SortedIntSet.FrozenIntSet,Integer> newstate = new HashMap<>();
|
||||
|
||||
worklist.add(initialset);
|
||||
|
||||
|
@ -661,15 +662,15 @@ final public class BasicOperations {
|
|||
newStateUpto++;
|
||||
|
||||
// like Set<Integer,PointTransitions>
|
||||
final PointTransitionSetLight points = new PointTransitionSetLight();
|
||||
final PointTransitionSet points = new PointTransitionSet();
|
||||
|
||||
// like SortedMap<Integer,Integer>
|
||||
final SortedIntSetLight statesSet = new SortedIntSetLight(5);
|
||||
final SortedIntSet statesSet = new SortedIntSet(5);
|
||||
|
||||
Transition t = new Transition();
|
||||
|
||||
while (worklist.size() > 0) {
|
||||
SortedIntSetLight.FrozenIntSetLight s = worklist.removeFirst();
|
||||
SortedIntSet.FrozenIntSet s = worklist.removeFirst();
|
||||
//System.out.println("det: pop set=" + s);
|
||||
|
||||
// Collate all outgoing transitions by min/1+max:
|
||||
|
@ -707,7 +708,7 @@ final public class BasicOperations {
|
|||
Integer q = newstate.get(statesSet);
|
||||
if (q == null) {
|
||||
q = b.createState();
|
||||
final SortedIntSetLight.FrozenIntSetLight p = statesSet.freeze(q);
|
||||
final SortedIntSet.FrozenIntSet p = statesSet.freeze(q);
|
||||
//System.out.println(" make new state=" + q + " -> " + p + " accCount=" + accCount);
|
||||
worklist.add(p);
|
||||
b.setAccept(q, accCount > 0);
|
||||
|
@ -749,7 +750,7 @@ final public class BasicOperations {
|
|||
assert statesSet.upto == 0: "upto=" + statesSet.upto;
|
||||
}
|
||||
|
||||
LightAutomaton result = b.finish();
|
||||
Automaton result = b.finish();
|
||||
assert result.isDeterministic();
|
||||
return result;
|
||||
}
|
||||
|
@ -757,7 +758,7 @@ final public class BasicOperations {
|
|||
/**
|
||||
* Returns true if the given automaton accepts no strings.
|
||||
*/
|
||||
public static boolean isEmpty(LightAutomaton a) {
|
||||
public static boolean isEmpty(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
// Common case: no states
|
||||
return true;
|
||||
|
@ -798,7 +799,7 @@ final public class BasicOperations {
|
|||
/**
|
||||
* Returns true if the given automaton accepts all strings. The automaton must be minimized.
|
||||
*/
|
||||
public static boolean isTotal(LightAutomaton a) {
|
||||
public static boolean isTotal(Automaton a) {
|
||||
if (a.isAccept(0) && a.getNumTransitions(0) == 1) {
|
||||
Transition t = new Transition();
|
||||
a.getTransition(0, 0, t);
|
||||
|
@ -815,7 +816,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* <b>Note:</b> for full performance, use the {@link RunAutomaton} class.
|
||||
*/
|
||||
public static boolean run(LightAutomaton a, String s) {
|
||||
public static boolean run(Automaton a, String s) {
|
||||
assert a.isDeterministic();
|
||||
int state = 0;
|
||||
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
|
||||
|
@ -835,7 +836,7 @@ final public class BasicOperations {
|
|||
* <p>
|
||||
* <b>Note:</b> for full performance, use the {@link RunAutomaton} class.
|
||||
*/
|
||||
public static boolean run(LightAutomaton a, IntsRef s) {
|
||||
public static boolean run(Automaton a, IntsRef s) {
|
||||
assert a.isDeterministic();
|
||||
int state = 0;
|
||||
for (int i=0;i<s.length;i++) {
|
||||
|
@ -852,14 +853,14 @@ final public class BasicOperations {
|
|||
* Returns the set of live states. A state is "live" if an accept state is
|
||||
* reachable from it and if it is reachable from the initial state.
|
||||
*/
|
||||
private static BitSet getLiveStates(LightAutomaton a) {
|
||||
private static BitSet getLiveStates(Automaton a) {
|
||||
BitSet live = getLiveStatesFromInitial(a);
|
||||
live.and(getLiveStatesToAccept(a));
|
||||
return live;
|
||||
}
|
||||
|
||||
/** Returns bitset marking states reachable from the initial state. */
|
||||
private static BitSet getLiveStatesFromInitial(LightAutomaton a) {
|
||||
private static BitSet getLiveStatesFromInitial(Automaton a) {
|
||||
int numStates = a.getNumStates();
|
||||
BitSet live = new BitSet(numStates);
|
||||
if (numStates == 0) {
|
||||
|
@ -886,8 +887,8 @@ final public class BasicOperations {
|
|||
}
|
||||
|
||||
/** Returns bitset marking states that can reach an accept state. */
|
||||
private static BitSet getLiveStatesToAccept(LightAutomaton a) {
|
||||
LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
private static BitSet getLiveStatesToAccept(Automaton a) {
|
||||
Automaton.Builder builder = new Automaton.Builder();
|
||||
|
||||
// NOTE: not quite the same thing as what SpecialOperations.reverse does:
|
||||
Transition t = new Transition();
|
||||
|
@ -902,7 +903,7 @@ final public class BasicOperations {
|
|||
builder.addTransition(t.dest, s, t.min, t.max);
|
||||
}
|
||||
}
|
||||
LightAutomaton a2 = builder.finish();
|
||||
Automaton a2 = builder.finish();
|
||||
|
||||
LinkedList<Integer> workList = new LinkedList<>();
|
||||
BitSet live = new BitSet(numStates);
|
||||
|
@ -930,13 +931,13 @@ final public class BasicOperations {
|
|||
* Removes transitions to dead states (a state is "dead" if it is not
|
||||
* reachable from the initial state or no accept state is reachable from it.)
|
||||
*/
|
||||
public static LightAutomaton removeDeadStates(LightAutomaton a) {
|
||||
public static Automaton removeDeadStates(Automaton a) {
|
||||
int numStates = a.getNumStates();
|
||||
BitSet liveSet = getLiveStates(a);
|
||||
|
||||
int[] map = new int[numStates];
|
||||
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
Automaton result = new Automaton();
|
||||
//System.out.println("liveSet: " + liveSet + " numStates=" + numStates);
|
||||
for(int i=0;i<numStates;i++) {
|
||||
if (liveSet.get(i)) {
|
||||
|
@ -964,4 +965,321 @@ final public class BasicOperations {
|
|||
assert hasDeadStates(result) == false;
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* Finds the largest entry whose value is less than or equal to c, or 0 if
|
||||
* there is no such entry.
|
||||
*/
|
||||
static int findIndex(int c, int[] points) {
|
||||
int a = 0;
|
||||
int b = points.length;
|
||||
while (b - a > 1) {
|
||||
int d = (a + b) >>> 1;
|
||||
if (points[d] > c) b = d;
|
||||
else if (points[d] < c) a = d;
|
||||
else return d;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the language of this automaton is finite.
|
||||
*/
|
||||
public static boolean isFinite(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether there is a loop containing s. (This is sufficient since
|
||||
* there are never transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack
|
||||
private static boolean isFinite(Transition scratch, Automaton a, int state, BitSet path, BitSet visited) {
|
||||
path.set(state);
|
||||
int numTransitions = a.initTransition(state, scratch);
|
||||
for(int t=0;t<numTransitions;t++) {
|
||||
a.getTransition(state, t, scratch);
|
||||
if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch, a, scratch.dest, path, visited))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
path.clear(state);
|
||||
visited.set(state);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the longest string that is a prefix of all accepted strings and
|
||||
* visits each state at most once. The automaton must be deterministic.
|
||||
*
|
||||
* @return common prefix
|
||||
*/
|
||||
public static String getCommonPrefix(Automaton a) {
|
||||
if (a.isDeterministic() == false) {
|
||||
throw new IllegalArgumentException("input automaton must be deterministic");
|
||||
}
|
||||
StringBuilder b = new StringBuilder();
|
||||
HashSet<Integer> visited = new HashSet<>();
|
||||
int s = 0;
|
||||
boolean done;
|
||||
Transition t = new Transition();
|
||||
do {
|
||||
done = true;
|
||||
visited.add(s);
|
||||
if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
|
||||
a.getTransition(s, 0, t);
|
||||
if (t.min == t.max && !visited.contains(t.dest)) {
|
||||
b.appendCodePoint(t.min);
|
||||
s = t.dest;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public static BytesRef getCommonPrefixBytesRef(Automaton a) {
|
||||
BytesRef ref = new BytesRef(10);
|
||||
HashSet<Integer> visited = new HashSet<>();
|
||||
int s = 0;
|
||||
boolean done;
|
||||
Transition t = new Transition();
|
||||
do {
|
||||
done = true;
|
||||
visited.add(s);
|
||||
if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
|
||||
a.getTransition(s, 0, t);
|
||||
if (t.min == t.max && !visited.contains(t.dest)) {
|
||||
ref.grow(++ref.length);
|
||||
ref.bytes[ref.length - 1] = (byte) t.min;
|
||||
s = t.dest;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
public static BytesRef getCommonSuffixBytesRef(Automaton a) {
|
||||
// reverse the language of the automaton, then reverse its common prefix.
|
||||
Automaton r = Operations.determinize(reverse(a));
|
||||
BytesRef ref = getCommonPrefixBytesRef(r);
|
||||
reverseBytes(ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
private static void reverseBytes(BytesRef ref) {
|
||||
if (ref.length <= 1) return;
|
||||
int num = ref.length >> 1;
|
||||
for (int i = ref.offset; i < ( ref.offset + num ); i++) {
|
||||
byte b = ref.bytes[i];
|
||||
ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
|
||||
ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
public static Automaton reverse(Automaton a) {
|
||||
return reverse(a, null);
|
||||
}
|
||||
|
||||
public static Automaton reverse(Automaton a, Set<Integer> initialStates) {
|
||||
|
||||
if (Operations.isEmpty(a)) {
|
||||
return new Automaton();
|
||||
}
|
||||
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
// Build a new automaton with all edges reversed
|
||||
Automaton.Builder builder = new Automaton.Builder();
|
||||
|
||||
// Initial node; we'll add epsilon transitions in the end:
|
||||
builder.createState();
|
||||
|
||||
for(int s=0;s<numStates;s++) {
|
||||
builder.createState();
|
||||
}
|
||||
|
||||
// Old initial state becomes new accept state:
|
||||
builder.setAccept(1, true);
|
||||
|
||||
Transition t = new Transition();
|
||||
for (int s=0;s<numStates;s++) {
|
||||
int numTransitions = a.getNumTransitions(s);
|
||||
a.initTransition(s, t);
|
||||
for(int i=0;i<numTransitions;i++) {
|
||||
a.getNextTransition(t);
|
||||
builder.addTransition(t.dest+1, s+1, t.min, t.max);
|
||||
}
|
||||
}
|
||||
|
||||
Automaton result = builder.finish();
|
||||
|
||||
for(int s : a.getAcceptStates()) {
|
||||
assert s < numStates;
|
||||
result.addEpsilon(0, s+1);
|
||||
if (initialStates != null) {
|
||||
initialStates.add(s+1);
|
||||
}
|
||||
}
|
||||
|
||||
result.finishState();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class PathNode {
|
||||
|
||||
/** Which state the path node ends on, whose
|
||||
* transitions we are enumerating. */
|
||||
public int state;
|
||||
|
||||
/** Which state the current transition leads to. */
|
||||
public int to;
|
||||
|
||||
/** Which transition we are on. */
|
||||
public int transition;
|
||||
|
||||
/** Which label we are on, in the min-max range of the
|
||||
* current Transition */
|
||||
public int label;
|
||||
|
||||
private final Transition t = new Transition();
|
||||
|
||||
public void resetState(Automaton a, int state) {
|
||||
assert a.getNumTransitions(state) != 0;
|
||||
this.state = state;
|
||||
transition = 0;
|
||||
a.getTransition(state, 0, t);
|
||||
label = t.min;
|
||||
to = t.dest;
|
||||
}
|
||||
|
||||
/** Returns next label of current transition, or
|
||||
* advances to next transition and returns its first
|
||||
* label, if current one is exhausted. If there are
|
||||
* no more transitions, returns -1. */
|
||||
public int nextLabel(Automaton a) {
|
||||
if (label > t.max) {
|
||||
// We've exhaused the current transition's labels;
|
||||
// move to next transitions:
|
||||
transition++;
|
||||
if (transition >= a.getNumTransitions(state)) {
|
||||
// We're done iterating transitions leaving this state
|
||||
return -1;
|
||||
}
|
||||
a.getTransition(state, transition, t);
|
||||
label = t.min;
|
||||
to = t.dest;
|
||||
}
|
||||
return label++;
|
||||
}
|
||||
}
|
||||
|
||||
private static PathNode getNode(PathNode[] nodes, int index) {
|
||||
assert index < nodes.length;
|
||||
if (nodes[index] == null) {
|
||||
nodes[index] = new PathNode();
|
||||
}
|
||||
return nodes[index];
|
||||
}
|
||||
|
||||
// TODO: this is a dangerous method ... Automaton could be
|
||||
// huge ... and it's better in general for caller to
|
||||
// enumerate & process in a single walk:
|
||||
|
||||
/** Returns the set of accepted strings, up to at most
|
||||
* <code>limit</code> strings. If more than <code>limit</code>
|
||||
* strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then
|
||||
* the limit is infinite. If the {@link Automaton} has
|
||||
* cycles then this method might throw {@code
|
||||
* IllegalArgumentException} but that is not guaranteed
|
||||
* when the limit is set. */
|
||||
public static Set<IntsRef> getFiniteStrings(Automaton a, int limit) {
|
||||
Set<IntsRef> results = new HashSet<>();
|
||||
|
||||
if (limit == -1 || limit > 0) {
|
||||
// OK
|
||||
} else {
|
||||
throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
|
||||
}
|
||||
|
||||
if (a.isAccept(0)) {
|
||||
// Special case the empty string, as usual:
|
||||
results.add(new IntsRef());
|
||||
}
|
||||
|
||||
if (a.getNumTransitions(0) > 0 && (limit == -1 || results.size() < limit)) {
|
||||
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
// Tracks which states are in the current path, for
|
||||
// cycle detection:
|
||||
BitSet pathStates = new BitSet(numStates);
|
||||
|
||||
// Stack to hold our current state in the
|
||||
// recursion/iteration:
|
||||
PathNode[] nodes = new PathNode[4];
|
||||
|
||||
pathStates.set(0);
|
||||
PathNode root = getNode(nodes, 0);
|
||||
root.resetState(a, 0);
|
||||
|
||||
IntsRef string = new IntsRef(1);
|
||||
string.length = 1;
|
||||
|
||||
while (string.length > 0) {
|
||||
|
||||
PathNode node = nodes[string.length-1];
|
||||
|
||||
// Get next label leaving the current node:
|
||||
int label = node.nextLabel(a);
|
||||
|
||||
if (label != -1) {
|
||||
string.ints[string.length-1] = label;
|
||||
|
||||
if (a.isAccept(node.to)) {
|
||||
// This transition leads to an accept state,
|
||||
// so we save the current string:
|
||||
results.add(IntsRef.deepCopyOf(string));
|
||||
if (results.size() == limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (a.getNumTransitions(node.to) != 0) {
|
||||
// Now recurse: the destination of this transition has
|
||||
// outgoing transitions:
|
||||
if (pathStates.get(node.to)) {
|
||||
throw new IllegalArgumentException("automaton has cycles");
|
||||
}
|
||||
pathStates.set(node.to);
|
||||
|
||||
// Push node onto stack:
|
||||
if (nodes.length == string.length) {
|
||||
PathNode[] newNodes = new PathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
|
||||
nodes = newNodes;
|
||||
}
|
||||
getNode(nodes, string.length).resetState(a, node.to);
|
||||
string.length++;
|
||||
string.grow(string.length);
|
||||
}
|
||||
} else {
|
||||
// No more transitions leaving this state,
|
||||
// pop/return back to previous state:
|
||||
assert pathStates.get(node.state);
|
||||
pathStates.clear(node.state);
|
||||
string.length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
|
@ -422,12 +422,12 @@ public class RegExp {
|
|||
* Constructs new <code>Automaton</code> from this <code>RegExp</code>. Same
|
||||
* as <code>toAutomaton(null)</code> (empty automaton map).
|
||||
*/
|
||||
public LightAutomaton toLightAutomaton() {
|
||||
return toLightAutomaton(null, null);
|
||||
public Automaton toAutomaton() {
|
||||
return toAutomaton(null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs new <code>LightAutomaton</code> from this <code>RegExp</code>. The
|
||||
* Constructs new <code>Automaton</code> from this <code>RegExp</code>. The
|
||||
* constructed automaton is minimal and deterministic and has no transitions
|
||||
* to dead states.
|
||||
*
|
||||
|
@ -435,9 +435,9 @@ public class RegExp {
|
|||
* @exception IllegalArgumentException if this regular expression uses a named
|
||||
* identifier that is not available from the automaton provider
|
||||
*/
|
||||
public LightAutomaton toLightAutomaton(LightAutomatonProvider automaton_provider)
|
||||
public Automaton toAutomaton(AutomatonProvider automaton_provider)
|
||||
throws IllegalArgumentException {
|
||||
return toLightAutomaton(null, automaton_provider);
|
||||
return toAutomaton(null, automaton_provider);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -446,26 +446,26 @@ public class RegExp {
|
|||
* to dead states.
|
||||
*
|
||||
* @param automata a map from automaton identifiers to automata (of type
|
||||
* <code>LightAutomaton</code>).
|
||||
* <code>Automaton</code>).
|
||||
* @exception IllegalArgumentException if this regular expression uses a named
|
||||
* identifier that does not occur in the automaton map
|
||||
*/
|
||||
public LightAutomaton toLightAutomaton(Map<String,LightAutomaton> automata)
|
||||
public Automaton toAutomaton(Map<String,Automaton> automata)
|
||||
throws IllegalArgumentException {
|
||||
return toLightAutomaton(automata, null);
|
||||
return toAutomaton(automata, null);
|
||||
}
|
||||
|
||||
private LightAutomaton toLightAutomaton(Map<String,LightAutomaton> automata,
|
||||
LightAutomatonProvider automaton_provider) throws IllegalArgumentException {
|
||||
List<LightAutomaton> list;
|
||||
LightAutomaton a = null;
|
||||
private Automaton toAutomaton(Map<String,Automaton> automata,
|
||||
AutomatonProvider automaton_provider) throws IllegalArgumentException {
|
||||
List<Automaton> list;
|
||||
Automaton a = null;
|
||||
switch (kind) {
|
||||
case REGEXP_UNION:
|
||||
list = new ArrayList<>();
|
||||
findLeaves(exp1, Kind.REGEXP_UNION, list, automata, automaton_provider);
|
||||
findLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider);
|
||||
a = BasicOperations.unionLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.union(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_CONCATENATION:
|
||||
list = new ArrayList<>();
|
||||
|
@ -473,55 +473,55 @@ public class RegExp {
|
|||
automaton_provider);
|
||||
findLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata,
|
||||
automaton_provider);
|
||||
a = BasicOperations.concatenateLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.concatenate(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_INTERSECTION:
|
||||
a = BasicOperations.intersectionLight(
|
||||
exp1.toLightAutomaton(automata, automaton_provider),
|
||||
exp2.toLightAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.intersection(
|
||||
exp1.toAutomaton(automata, automaton_provider),
|
||||
exp2.toAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_OPTIONAL:
|
||||
a = BasicOperations.optionalLight(exp1.toLightAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.optional(exp1.toAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_REPEAT:
|
||||
a = BasicOperations.repeatLight(exp1.toLightAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_REPEAT_MIN:
|
||||
a = BasicOperations.repeatLight(exp1.toLightAutomaton(automata, automaton_provider), min);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider), min);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_REPEAT_MINMAX:
|
||||
a = BasicOperations.repeatLight(exp1.toLightAutomaton(automata, automaton_provider), min, max);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.repeat(exp1.toAutomaton(automata, automaton_provider), min, max);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_COMPLEMENT:
|
||||
a = BasicOperations.complementLight(exp1.toLightAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = Operations.complement(exp1.toAutomaton(automata, automaton_provider));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
case REGEXP_CHAR:
|
||||
a = BasicAutomata.makeCharLight(c);
|
||||
a = Automata.makeChar(c);
|
||||
break;
|
||||
case REGEXP_CHAR_RANGE:
|
||||
a = BasicAutomata.makeCharRangeLight(from, to);
|
||||
a = Automata.makeCharRange(from, to);
|
||||
break;
|
||||
case REGEXP_ANYCHAR:
|
||||
a = BasicAutomata.makeAnyCharLight();
|
||||
a = Automata.makeAnyChar();
|
||||
break;
|
||||
case REGEXP_EMPTY:
|
||||
a = BasicAutomata.makeEmptyLight();
|
||||
a = Automata.makeEmpty();
|
||||
break;
|
||||
case REGEXP_STRING:
|
||||
a = BasicAutomata.makeStringLight(s);
|
||||
a = Automata.makeString(s);
|
||||
break;
|
||||
case REGEXP_ANYSTRING:
|
||||
a = BasicAutomata.makeAnyStringLight();
|
||||
a = Automata.makeAnyString();
|
||||
break;
|
||||
case REGEXP_AUTOMATON:
|
||||
LightAutomaton aa = null;
|
||||
Automaton aa = null;
|
||||
if (automata != null) {
|
||||
aa = automata.get(s);
|
||||
}
|
||||
|
@ -538,19 +538,19 @@ public class RegExp {
|
|||
a = aa;
|
||||
break;
|
||||
case REGEXP_INTERVAL:
|
||||
a = BasicAutomata.makeIntervalLight(min, max, digits);
|
||||
a = Automata.makeInterval(min, max, digits);
|
||||
break;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
private void findLeaves(RegExp exp, Kind kind, List<LightAutomaton> list,
|
||||
Map<String,LightAutomaton> automata, LightAutomatonProvider automaton_provider) {
|
||||
private void findLeaves(RegExp exp, Kind kind, List<Automaton> list,
|
||||
Map<String,Automaton> automata, AutomatonProvider automaton_provider) {
|
||||
if (exp.kind == kind) {
|
||||
findLeaves(exp.exp1, kind, list, automata, automaton_provider);
|
||||
findLeaves(exp.exp2, kind, list, automata, automaton_provider);
|
||||
} else {
|
||||
list.add(exp.toLightAutomaton(automata, automaton_provider));
|
||||
list.add(exp.toAutomaton(automata, automaton_provider));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ import java.util.Arrays;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class RunAutomaton {
|
||||
final LightAutomaton automaton;
|
||||
final Automaton automaton;
|
||||
final int maxInterval;
|
||||
final int size;
|
||||
final boolean[] accept;
|
||||
|
@ -66,10 +66,10 @@ public abstract class RunAutomaton {
|
|||
if (j + 1 < points.length) max = (points[j + 1] - 1);
|
||||
else max = maxInterval;
|
||||
b.append(" ");
|
||||
LightAutomaton.appendCharString(min, b);
|
||||
Automaton.appendCharString(min, b);
|
||||
if (min != max) {
|
||||
b.append("-");
|
||||
LightAutomaton.appendCharString(max, b);
|
||||
Automaton.appendCharString(max, b);
|
||||
}
|
||||
b.append(" -> ").append(k).append("\n");
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ public abstract class RunAutomaton {
|
|||
* Gets character class of given codepoint
|
||||
*/
|
||||
final int getCharClass(int c) {
|
||||
return SpecialOperations.findIndex(c, points);
|
||||
return Operations.findIndex(c, points);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -120,10 +120,10 @@ public abstract class RunAutomaton {
|
|||
*
|
||||
* @param a an automaton
|
||||
*/
|
||||
public RunAutomaton(LightAutomaton a, int maxInterval, boolean tableize) {
|
||||
public RunAutomaton(Automaton a, int maxInterval, boolean tableize) {
|
||||
this.maxInterval = maxInterval;
|
||||
//System.out.println("before det a=" + a.getNumStates());
|
||||
a = BasicOperations.determinize(a);
|
||||
a = Operations.determinize(a);
|
||||
this.automaton = a;
|
||||
//System.out.println("AFTER DET tableize= " + tableize + ": ");
|
||||
//System.out.println(a.toDot());
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
// Just holds a set of int[] states, plus a corresponding
|
||||
// int[] count per state. Used by
|
||||
// BasicOperations.determinize
|
||||
final class SortedIntSetLight {
|
||||
final class SortedIntSet {
|
||||
int[] values;
|
||||
int[] counts;
|
||||
int upto;
|
||||
|
@ -41,7 +41,7 @@ final class SortedIntSetLight {
|
|||
|
||||
int state;
|
||||
|
||||
public SortedIntSetLight(int capacity) {
|
||||
public SortedIntSet(int capacity) {
|
||||
values = new int[capacity];
|
||||
counts = new int[capacity];
|
||||
}
|
||||
|
@ -153,10 +153,10 @@ final class SortedIntSetLight {
|
|||
}
|
||||
}
|
||||
|
||||
public FrozenIntSetLight freeze(int state) {
|
||||
public FrozenIntSet freeze(int state) {
|
||||
final int[] c = new int[upto];
|
||||
System.arraycopy(values, 0, c, 0, upto);
|
||||
return new FrozenIntSetLight(c, hashCode, state);
|
||||
return new FrozenIntSet(c, hashCode, state);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -169,10 +169,10 @@ final class SortedIntSetLight {
|
|||
if (_other == null) {
|
||||
return false;
|
||||
}
|
||||
if (!(_other instanceof FrozenIntSetLight)) {
|
||||
if (!(_other instanceof FrozenIntSet)) {
|
||||
return false;
|
||||
}
|
||||
FrozenIntSetLight other = (FrozenIntSetLight) _other;
|
||||
FrozenIntSet other = (FrozenIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
|
@ -201,18 +201,18 @@ final class SortedIntSetLight {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
public final static class FrozenIntSetLight {
|
||||
public final static class FrozenIntSet {
|
||||
final int[] values;
|
||||
final int hashCode;
|
||||
final int state;
|
||||
|
||||
public FrozenIntSetLight(int[] values, int hashCode, int state) {
|
||||
public FrozenIntSet(int[] values, int hashCode, int state) {
|
||||
this.values = values;
|
||||
this.hashCode = hashCode;
|
||||
this.state = state;
|
||||
}
|
||||
|
||||
public FrozenIntSetLight(int num, int state) {
|
||||
public FrozenIntSet(int num, int state) {
|
||||
this.values = new int[] {num};
|
||||
this.state = state;
|
||||
this.hashCode = 683+num;
|
||||
|
@ -228,8 +228,8 @@ final class SortedIntSetLight {
|
|||
if (_other == null) {
|
||||
return false;
|
||||
}
|
||||
if (_other instanceof FrozenIntSetLight) {
|
||||
FrozenIntSetLight other = (FrozenIntSetLight) _other;
|
||||
if (_other instanceof FrozenIntSet) {
|
||||
FrozenIntSet other = (FrozenIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
|
@ -242,8 +242,8 @@ final class SortedIntSetLight {
|
|||
}
|
||||
}
|
||||
return true;
|
||||
} else if (_other instanceof SortedIntSetLight) {
|
||||
SortedIntSetLight other = (SortedIntSetLight) _other;
|
||||
} else if (_other instanceof SortedIntSet) {
|
||||
SortedIntSet other = (SortedIntSet) _other;
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
|
@ -1,369 +0,0 @@
|
|||
/*
|
||||
* dk.brics.automaton
|
||||
*
|
||||
* Copyright (c) 2001-2009 Anders Moeller
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util.automaton;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Special automata operations.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
final public class SpecialOperations {
|
||||
|
||||
private SpecialOperations() {}
|
||||
|
||||
/**
|
||||
* Finds the largest entry whose value is less than or equal to c, or 0 if
|
||||
* there is no such entry.
|
||||
*/
|
||||
static int findIndex(int c, int[] points) {
|
||||
int a = 0;
|
||||
int b = points.length;
|
||||
while (b - a > 1) {
|
||||
int d = (a + b) >>> 1;
|
||||
if (points[d] > c) b = d;
|
||||
else if (points[d] < c) a = d;
|
||||
else return d;
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the language of this automaton is finite.
|
||||
*/
|
||||
public static boolean isFinite(LightAutomaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
return isFinite(new Transition(), a, 0, new BitSet(a.getNumStates()), new BitSet(a.getNumStates()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether there is a loop containing s. (This is sufficient since
|
||||
* there are never transitions to dead states.)
|
||||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack
|
||||
private static boolean isFinite(Transition scratch, LightAutomaton a, int state, BitSet path, BitSet visited) {
|
||||
path.set(state);
|
||||
int numTransitions = a.initTransition(state, scratch);
|
||||
for(int t=0;t<numTransitions;t++) {
|
||||
a.getTransition(state, t, scratch);
|
||||
if (path.get(scratch.dest) || (!visited.get(scratch.dest) && !isFinite(scratch, a, scratch.dest, path, visited))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
path.clear(state);
|
||||
visited.set(state);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the longest string that is a prefix of all accepted strings and
|
||||
* visits each state at most once. The automaton must be deterministic.
|
||||
*
|
||||
* @return common prefix
|
||||
*/
|
||||
public static String getCommonPrefix(LightAutomaton a) {
|
||||
if (a.isDeterministic() == false) {
|
||||
throw new IllegalArgumentException("input automaton must be deterministic");
|
||||
}
|
||||
StringBuilder b = new StringBuilder();
|
||||
HashSet<Integer> visited = new HashSet<>();
|
||||
int s = 0;
|
||||
boolean done;
|
||||
Transition t = new Transition();
|
||||
do {
|
||||
done = true;
|
||||
visited.add(s);
|
||||
if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
|
||||
a.getTransition(s, 0, t);
|
||||
if (t.min == t.max && !visited.contains(t.dest)) {
|
||||
b.appendCodePoint(t.min);
|
||||
s = t.dest;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
public static BytesRef getCommonPrefixBytesRef(LightAutomaton a) {
|
||||
BytesRef ref = new BytesRef(10);
|
||||
HashSet<Integer> visited = new HashSet<>();
|
||||
int s = 0;
|
||||
boolean done;
|
||||
Transition t = new Transition();
|
||||
do {
|
||||
done = true;
|
||||
visited.add(s);
|
||||
if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) {
|
||||
a.getTransition(s, 0, t);
|
||||
if (t.min == t.max && !visited.contains(t.dest)) {
|
||||
ref.grow(++ref.length);
|
||||
ref.bytes[ref.length - 1] = (byte) t.min;
|
||||
s = t.dest;
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
} while (!done);
|
||||
|
||||
return ref;
|
||||
}
|
||||
|
||||
public static BytesRef getCommonSuffixBytesRef(LightAutomaton a) {
|
||||
// reverse the language of the automaton, then reverse its common prefix.
|
||||
LightAutomaton r = BasicOperations.determinize(reverse(a));
|
||||
BytesRef ref = getCommonPrefixBytesRef(r);
|
||||
reverseBytes(ref);
|
||||
return ref;
|
||||
}
|
||||
|
||||
private static void reverseBytes(BytesRef ref) {
|
||||
if (ref.length <= 1) return;
|
||||
int num = ref.length >> 1;
|
||||
for (int i = ref.offset; i < ( ref.offset + num ); i++) {
|
||||
byte b = ref.bytes[i];
|
||||
ref.bytes[i] = ref.bytes[ref.offset * 2 + ref.length - i - 1];
|
||||
ref.bytes[ref.offset * 2 + ref.length - i - 1] = b;
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit merge Special/Basic operations
|
||||
|
||||
public static LightAutomaton reverse(LightAutomaton a) {
|
||||
return reverse(a, null);
|
||||
}
|
||||
|
||||
public static LightAutomaton reverse(LightAutomaton a, Set<Integer> initialStates) {
|
||||
|
||||
if (BasicOperations.isEmpty(a)) {
|
||||
return new LightAutomaton();
|
||||
}
|
||||
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
// Build a new automaton with all edges reversed
|
||||
LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
|
||||
// Initial node; we'll add epsilon transitions in the end:
|
||||
builder.createState();
|
||||
|
||||
for(int s=0;s<numStates;s++) {
|
||||
builder.createState();
|
||||
}
|
||||
|
||||
// Old initial state becomes new accept state:
|
||||
builder.setAccept(1, true);
|
||||
|
||||
Transition t = new Transition();
|
||||
for (int s=0;s<numStates;s++) {
|
||||
int numTransitions = a.getNumTransitions(s);
|
||||
a.initTransition(s, t);
|
||||
for(int i=0;i<numTransitions;i++) {
|
||||
a.getNextTransition(t);
|
||||
builder.addTransition(t.dest+1, s+1, t.min, t.max);
|
||||
}
|
||||
}
|
||||
|
||||
LightAutomaton result = builder.finish();
|
||||
|
||||
for(int s : a.getAcceptStates()) {
|
||||
assert s < numStates;
|
||||
result.addEpsilon(0, s+1);
|
||||
if (initialStates != null) {
|
||||
initialStates.add(s+1);
|
||||
}
|
||||
}
|
||||
|
||||
result.finishState();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class LightPathNode {
|
||||
|
||||
/** Which state the path node ends on, whose
|
||||
* transitions we are enumerating. */
|
||||
public int state;
|
||||
|
||||
/** Which state the current transition leads to. */
|
||||
public int to;
|
||||
|
||||
/** Which transition we are on. */
|
||||
public int transition;
|
||||
|
||||
/** Which label we are on, in the min-max range of the
|
||||
* current Transition */
|
||||
public int label;
|
||||
|
||||
private final Transition t = new Transition();
|
||||
|
||||
public void resetState(LightAutomaton a, int state) {
|
||||
assert a.getNumTransitions(state) != 0;
|
||||
this.state = state;
|
||||
transition = 0;
|
||||
a.getTransition(state, 0, t);
|
||||
label = t.min;
|
||||
to = t.dest;
|
||||
}
|
||||
|
||||
/** Returns next label of current transition, or
|
||||
* advances to next transition and returns its first
|
||||
* label, if current one is exhausted. If there are
|
||||
* no more transitions, returns -1. */
|
||||
public int nextLabel(LightAutomaton a) {
|
||||
if (label > t.max) {
|
||||
// We've exhaused the current transition's labels;
|
||||
// move to next transitions:
|
||||
transition++;
|
||||
if (transition >= a.getNumTransitions(state)) {
|
||||
// We're done iterating transitions leaving this state
|
||||
return -1;
|
||||
}
|
||||
a.getTransition(state, transition, t);
|
||||
label = t.min;
|
||||
to = t.dest;
|
||||
}
|
||||
return label++;
|
||||
}
|
||||
}
|
||||
|
||||
private static LightPathNode getNode(LightPathNode[] nodes, int index) {
|
||||
assert index < nodes.length;
|
||||
if (nodes[index] == null) {
|
||||
nodes[index] = new LightPathNode();
|
||||
}
|
||||
return nodes[index];
|
||||
}
|
||||
|
||||
// TODO: this is a dangerous method ... Automaton could be
|
||||
// huge ... and it's better in general for caller to
|
||||
// enumerate & process in a single walk:
|
||||
|
||||
/** Returns the set of accepted strings, up to at most
|
||||
* <code>limit</code> strings. If more than <code>limit</code>
|
||||
* strings are accepted, the first limit strings found are returned. If <code>limit</code> == -1, then
|
||||
* the limit is infinite. If the {@link Automaton} has
|
||||
* cycles then this method might throw {@code
|
||||
* IllegalArgumentException} but that is not guaranteed
|
||||
* when the limit is set. */
|
||||
public static Set<IntsRef> getFiniteStrings(LightAutomaton a, int limit) {
|
||||
Set<IntsRef> results = new HashSet<>();
|
||||
|
||||
if (limit == -1 || limit > 0) {
|
||||
// OK
|
||||
} else {
|
||||
throw new IllegalArgumentException("limit must be -1 (which means no limit), or > 0; got: " + limit);
|
||||
}
|
||||
|
||||
if (a.isAccept(0)) {
|
||||
// Special case the empty string, as usual:
|
||||
results.add(new IntsRef());
|
||||
}
|
||||
|
||||
if (a.getNumTransitions(0) > 0 && (limit == -1 || results.size() < limit)) {
|
||||
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
// Tracks which states are in the current path, for
|
||||
// cycle detection:
|
||||
BitSet pathStates = new BitSet(numStates);
|
||||
|
||||
// Stack to hold our current state in the
|
||||
// recursion/iteration:
|
||||
LightPathNode[] nodes = new LightPathNode[4];
|
||||
|
||||
pathStates.set(0);
|
||||
LightPathNode root = getNode(nodes, 0);
|
||||
root.resetState(a, 0);
|
||||
|
||||
IntsRef string = new IntsRef(1);
|
||||
string.length = 1;
|
||||
|
||||
while (string.length > 0) {
|
||||
|
||||
LightPathNode node = nodes[string.length-1];
|
||||
|
||||
// Get next label leaving the current node:
|
||||
int label = node.nextLabel(a);
|
||||
|
||||
if (label != -1) {
|
||||
string.ints[string.length-1] = label;
|
||||
|
||||
if (a.isAccept(node.to)) {
|
||||
// This transition leads to an accept state,
|
||||
// so we save the current string:
|
||||
results.add(IntsRef.deepCopyOf(string));
|
||||
if (results.size() == limit) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (a.getNumTransitions(node.to) != 0) {
|
||||
// Now recurse: the destination of this transition has
|
||||
// outgoing transitions:
|
||||
if (pathStates.get(node.to)) {
|
||||
throw new IllegalArgumentException("automaton has cycles");
|
||||
}
|
||||
pathStates.set(node.to);
|
||||
|
||||
// Push node onto stack:
|
||||
if (nodes.length == string.length) {
|
||||
LightPathNode[] newNodes = new LightPathNode[ArrayUtil.oversize(nodes.length+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(nodes, 0, newNodes, 0, nodes.length);
|
||||
nodes = newNodes;
|
||||
}
|
||||
getNode(nodes, string.length).resetState(a, node.to);
|
||||
string.length++;
|
||||
string.grow(string.length);
|
||||
}
|
||||
} else {
|
||||
// No more transitions leaving this state,
|
||||
// pop/return back to previous state:
|
||||
assert pathStates.get(node.state);
|
||||
pathStates.clear(node.state);
|
||||
string.length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
|
@ -34,12 +34,12 @@ package org.apache.lucene.util.automaton;
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class LightStatePair {
|
||||
public class StatePair {
|
||||
int s;
|
||||
int s1;
|
||||
int s2;
|
||||
|
||||
LightStatePair(int s, int s1, int s2) {
|
||||
StatePair(int s, int s1, int s2) {
|
||||
this.s = s;
|
||||
this.s1 = s1;
|
||||
this.s2 = s2;
|
||||
|
@ -51,7 +51,7 @@ public class LightStatePair {
|
|||
* @param s1 first state
|
||||
* @param s2 second state
|
||||
*/
|
||||
public LightStatePair(int s1, int s2) {
|
||||
public StatePair(int s1, int s2) {
|
||||
this.s1 = s1;
|
||||
this.s2 = s2;
|
||||
this.s = -1;
|
||||
|
@ -66,8 +66,8 @@ public class LightStatePair {
|
|||
*/
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj instanceof LightStatePair) {
|
||||
LightStatePair p = (LightStatePair) obj;
|
||||
if (obj instanceof StatePair) {
|
||||
StatePair p = (StatePair) obj;
|
||||
return p.s1 == s1 && p.s2 == s2;
|
||||
} else return false;
|
||||
}
|
||||
|
@ -84,6 +84,6 @@ public class LightStatePair {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LightStatePair(s1=" + s1 + " s2=" + s2 + ")";
|
||||
return "StatePair(s1=" + s1 + " s2=" + s2 + ")";
|
||||
}
|
||||
}
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
|||
* Converts UTF-32 automata to the equivalent UTF-8 representation.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class UTF32ToUTF8Light {
|
||||
public final class UTF32ToUTF8 {
|
||||
|
||||
// Unicode boundaries for UTF8 bytes 1,2,3,4
|
||||
private static final int[] startCodes = new int[] {0, 128, 2048, 65536};
|
||||
|
@ -255,14 +255,14 @@ public final class UTF32ToUTF8Light {
|
|||
}
|
||||
}
|
||||
|
||||
LightAutomaton.Builder utf8;
|
||||
Automaton.Builder utf8;
|
||||
|
||||
/** Converts an incoming utf32 automaton to an equivalent
|
||||
* utf8 one. The incoming automaton need not be
|
||||
* deterministic. Note that the returned automaton will
|
||||
* not in general be deterministic, so you must
|
||||
* determinize it if that's needed. */
|
||||
public LightAutomaton convert(LightAutomaton utf32) {
|
||||
public Automaton convert(Automaton utf32) {
|
||||
if (utf32.getNumStates() == 0) {
|
||||
return utf32;
|
||||
}
|
||||
|
@ -273,7 +273,7 @@ public final class UTF32ToUTF8Light {
|
|||
List<Integer> pending = new ArrayList<>();
|
||||
int utf32State = 0;
|
||||
pending.add(utf32State);
|
||||
utf8 = new LightAutomaton.Builder();
|
||||
utf8 = new Automaton.Builder();
|
||||
|
||||
int utf8State = utf8.createState();
|
||||
|
|
@ -29,9 +29,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -409,10 +409,10 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
new Token[] {
|
||||
token("abc", 1, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = s2a("abc");
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = s2a("abc");
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testMultipleHoles() throws Exception {
|
||||
|
@ -421,10 +421,10 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("a", 1, 1),
|
||||
token("b", 3, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b"));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b"));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testSynOverMultipleHoles() throws Exception {
|
||||
|
@ -434,12 +434,12 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("x", 0, 3),
|
||||
token("b", 3, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b"));
|
||||
final LightAutomaton a2 = join(s2a("x"), SEP_A, s2a("b"));
|
||||
final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton a1 = join(s2a("a"), SEP_A, HOLE_A, SEP_A, HOLE_A, SEP_A, s2a("b"));
|
||||
final Automaton a2 = join(s2a("x"), SEP_A, s2a("b"));
|
||||
final Automaton expected = Operations.union(a1, a2);
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
// for debugging!
|
||||
|
@ -453,25 +453,25 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
}
|
||||
*/
|
||||
|
||||
private static final LightAutomaton SEP_A = BasicAutomata.makeCharLight(TokenStreamToAutomaton.POS_SEP);
|
||||
private static final LightAutomaton HOLE_A = BasicAutomata.makeCharLight(TokenStreamToAutomaton.HOLE);
|
||||
private static final Automaton SEP_A = Automata.makeChar(TokenStreamToAutomaton.POS_SEP);
|
||||
private static final Automaton HOLE_A = Automata.makeChar(TokenStreamToAutomaton.HOLE);
|
||||
|
||||
private LightAutomaton join(String ... strings) {
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
private Automaton join(String ... strings) {
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
for(String s : strings) {
|
||||
as.add(s2a(s));
|
||||
as.add(SEP_A);
|
||||
}
|
||||
as.remove(as.size()-1);
|
||||
return BasicOperations.concatenateLight(as);
|
||||
return Operations.concatenate(as);
|
||||
}
|
||||
|
||||
private LightAutomaton join(LightAutomaton ... as) {
|
||||
return BasicOperations.concatenateLight(Arrays.asList(as));
|
||||
private Automaton join(Automaton ... as) {
|
||||
return Operations.concatenate(Arrays.asList(as));
|
||||
}
|
||||
|
||||
private LightAutomaton s2a(String s) {
|
||||
return BasicAutomata.makeStringLight(s);
|
||||
private Automaton s2a(String s) {
|
||||
return Automata.makeString(s);
|
||||
}
|
||||
|
||||
public void testTwoTokens() throws Exception {
|
||||
|
@ -481,12 +481,12 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("abc", 1, 1),
|
||||
token("def", 1, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = join("abc", "def");
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = join("abc", "def");
|
||||
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testHole() throws Exception {
|
||||
|
@ -496,13 +496,13 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("abc", 1, 1),
|
||||
token("def", 2, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
|
||||
final LightAutomaton expected = join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"));
|
||||
final Automaton expected = join(s2a("abc"), SEP_A, HOLE_A, SEP_A, s2a("def"));
|
||||
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testOverlappedTokensSausage() throws Exception {
|
||||
|
@ -513,12 +513,12 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("abc", 1, 1),
|
||||
token("xyz", 0, 1)
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton a1 = s2a("abc");
|
||||
final LightAutomaton a2 = s2a("xyz");
|
||||
final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton a1 = s2a("abc");
|
||||
final Automaton a2 = s2a("xyz");
|
||||
final Automaton expected = Operations.union(a1, a2);
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testOverlappedTokensLattice() throws Exception {
|
||||
|
@ -529,14 +529,14 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("xyz", 0, 2),
|
||||
token("def", 1, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton a1 = s2a("xyz");
|
||||
final LightAutomaton a2 = join("abc", "def");
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton a1 = s2a("xyz");
|
||||
final Automaton a2 = join("abc", "def");
|
||||
|
||||
final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
|
||||
final Automaton expected = Operations.union(a1, a2);
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testSynOverHole() throws Exception {
|
||||
|
@ -547,15 +547,15 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("X", 0, 2),
|
||||
token("b", 2, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton a1 = BasicOperations.unionLight(
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton a1 = Operations.union(
|
||||
join(s2a("a"), SEP_A, HOLE_A),
|
||||
s2a("X"));
|
||||
final LightAutomaton expected = BasicOperations.concatenateLight(a1,
|
||||
final Automaton expected = Operations.concatenate(a1,
|
||||
join(SEP_A, s2a("b")));
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testSynOverHole2() throws Exception {
|
||||
|
@ -566,12 +566,12 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("abc", 0, 3),
|
||||
token("def", 2, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = BasicOperations.unionLight(
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = Operations.union(
|
||||
join(s2a("xyz"), SEP_A, HOLE_A, SEP_A, s2a("def")),
|
||||
s2a("abc"));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testOverlappedTokensLattice2() throws Exception {
|
||||
|
@ -583,13 +583,13 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("def", 1, 1),
|
||||
token("ghi", 1, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton a1 = s2a("xyz");
|
||||
final LightAutomaton a2 = join("abc", "def", "ghi");
|
||||
final LightAutomaton expected = BasicOperations.unionLight(a1, a2);
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton a1 = s2a("xyz");
|
||||
final Automaton a2 = join("abc", "def", "ghi");
|
||||
final Automaton expected = Operations.union(a1, a2);
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
public void testToDot() throws Exception {
|
||||
|
@ -604,11 +604,11 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
new Token[] {
|
||||
token("abc", 2, 1),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = join(HOLE_A, SEP_A, s2a("abc"));
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = join(HOLE_A, SEP_A, s2a("abc"));
|
||||
//toDot(actual);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
|
||||
// TODO: testEndsWithHole... but we need posInc to set in TS.end()
|
||||
|
@ -619,10 +619,10 @@ public class TestGraphTokenizers extends BaseTokenStreamTestCase {
|
|||
token("a", 1, 1),
|
||||
token("X", 0, 10),
|
||||
});
|
||||
final LightAutomaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final LightAutomaton expected = BasicOperations.unionLight(s2a("a"),
|
||||
final Automaton actual = (new TokenStreamToAutomaton()).toAutomaton(ts);
|
||||
final Automaton expected = Operations.union(s2a("a"),
|
||||
s2a("X"));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(expected)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(actual))));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(expected)),
|
||||
Operations.determinize(Operations.removeDeadStates(actual))));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,8 +35,8 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
|
@ -82,7 +82,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** Test a configuration where each character is a term */
|
||||
public void testSingleChar() throws Exception {
|
||||
CharacterRunAutomaton single =
|
||||
new CharacterRunAutomaton(new RegExp(".").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp(".").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(random(), single, false);
|
||||
assertAnalyzesTo(a, "foobar",
|
||||
new String[] { "f", "o", "o", "b", "a", "r" },
|
||||
|
@ -95,7 +95,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** Test a configuration where two characters makes a term */
|
||||
public void testTwoChars() throws Exception {
|
||||
CharacterRunAutomaton single =
|
||||
new CharacterRunAutomaton(new RegExp("..").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("..").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(random(), single, false);
|
||||
assertAnalyzesTo(a, "foobar",
|
||||
new String[] { "fo", "ob", "ar"},
|
||||
|
@ -116,7 +116,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** Test a configuration where three characters makes a term */
|
||||
public void testThreeChars() throws Exception {
|
||||
CharacterRunAutomaton single =
|
||||
new CharacterRunAutomaton(new RegExp("...").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("...").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(random(), single, false);
|
||||
assertAnalyzesTo(a, "foobar",
|
||||
new String[] { "foo", "bar"},
|
||||
|
@ -137,7 +137,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** Test a configuration where word starts with one uppercase */
|
||||
public void testUppercase() throws Exception {
|
||||
CharacterRunAutomaton single =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(random(), single, false);
|
||||
assertAnalyzesTo(a, "FooBarBAZ",
|
||||
new String[] { "Foo", "Bar", "B", "A", "Z"},
|
||||
|
@ -164,9 +164,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testKeep() throws Exception {
|
||||
CharacterRunAutomaton keepWords =
|
||||
new CharacterRunAutomaton(
|
||||
BasicOperations.complementLight(
|
||||
BasicOperations.unionLight(
|
||||
Arrays.asList(BasicAutomata.makeStringLight("foo"), BasicAutomata.makeStringLight("bar")))));
|
||||
Operations.complement(
|
||||
Operations.union(
|
||||
Arrays.asList(Automata.makeString("foo"), Automata.makeString("bar")))));
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
|
||||
assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
|
||||
new String[] { "foo", "bar", "bar", "foo" },
|
||||
|
@ -175,7 +175,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
/** Test a configuration that behaves a lot like LengthFilter */
|
||||
public void testLength() throws Exception {
|
||||
CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toLightAutomaton());
|
||||
CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, length5);
|
||||
assertAnalyzesTo(a, "ok toolong fine notfine",
|
||||
new String[] { "ok", "fine" },
|
||||
|
|
|
@ -183,7 +183,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
int numIntersections = atLeast(3);
|
||||
for (int i = 0; i < numIntersections; i++) {
|
||||
String re = AutomatonTestUtil.randomRegexp(random());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toLightAutomaton());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
|
||||
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
// TODO: test start term too
|
||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||
|
|
|
@ -84,9 +84,9 @@ import org.apache.lucene.util.SetOnce;
|
|||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -2006,7 +2006,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
public void testStopwordsPosIncHole2() throws Exception {
|
||||
// use two stopfilters for testing here
|
||||
Directory dir = newDirectory();
|
||||
final LightAutomaton secondSet = BasicAutomata.makeStringLight("foobar");
|
||||
final Automaton secondSet = Automata.makeString("foobar");
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
|
|
|
@ -33,9 +33,9 @@ import org.apache.lucene.util.LineFileDocs;
|
|||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
@SuppressCodecs({ "SimpleText", "Memory", "Direct" })
|
||||
|
@ -239,12 +239,12 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
final Set<String> acceptTerms = new HashSet<>();
|
||||
final TreeSet<BytesRef> sortedAcceptTerms = new TreeSet<>();
|
||||
final double keepPct = random().nextDouble();
|
||||
LightAutomaton a;
|
||||
Automaton a;
|
||||
if (iter == 0) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: empty automaton");
|
||||
}
|
||||
a = BasicAutomata.makeEmptyLight();
|
||||
a = Automata.makeEmpty();
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: keepPct=" + keepPct);
|
||||
|
@ -259,7 +259,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
acceptTerms.add(s2);
|
||||
sortedAcceptTerms.add(new BytesRef(s2));
|
||||
}
|
||||
a = BasicAutomata.makeStringUnionLight(sortedAcceptTerms);
|
||||
a = Automata.makeStringUnion(sortedAcceptTerms);
|
||||
}
|
||||
|
||||
final CompiledAutomaton c = new CompiledAutomaton(a, true, false);
|
||||
|
@ -738,7 +738,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
w.shutdown();
|
||||
AtomicReader sub = getOnlySegmentReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
LightAutomaton automaton = new RegExp(".*", RegExp.NONE).toLightAutomaton();
|
||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
TermsEnum te = terms.intersect(ca, null);
|
||||
assertEquals("aaa", te.next().utf8ToString());
|
||||
|
@ -793,7 +793,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
AtomicReader sub = getOnlySegmentReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
|
||||
LightAutomaton automaton = new RegExp(".*d", RegExp.NONE).toLightAutomaton();
|
||||
Automaton automaton = new RegExp(".*d", RegExp.NONE).toAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
TermsEnum te;
|
||||
|
||||
|
@ -847,7 +847,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
AtomicReader sub = getOnlySegmentReader(r);
|
||||
Terms terms = sub.fields().terms("field");
|
||||
|
||||
LightAutomaton automaton = new RegExp(".*", RegExp.NONE).toLightAutomaton(); // accept ALL
|
||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton(); // accept ALL
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false);
|
||||
|
||||
TermsEnum te = terms.intersect(ca, null);
|
||||
|
|
|
@ -44,7 +44,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
private IndexReader reader;
|
||||
private IndexSearcher searcher;
|
||||
private SortedSet<BytesRef> terms; // the terms we put in the index
|
||||
private LightAutomaton termsAutomaton; // automata of the same
|
||||
private Automaton termsAutomaton; // automata of the same
|
||||
int numIterations;
|
||||
|
||||
@Override
|
||||
|
@ -69,7 +69,7 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
termsAutomaton = BasicAutomata.makeStringUnionLight(terms);
|
||||
termsAutomaton = Automata.makeStringUnion(terms);
|
||||
|
||||
reader = writer.getReader();
|
||||
searcher = newSearcher(reader);
|
||||
|
@ -88,15 +88,15 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
String reg = AutomatonTestUtil.randomRegexp(random());
|
||||
LightAutomaton automaton = BasicOperations.determinize(new RegExp(reg, RegExp.NONE).toLightAutomaton());
|
||||
Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton());
|
||||
final List<BytesRef> matchedTerms = new ArrayList<>();
|
||||
for(BytesRef t : terms) {
|
||||
if (BasicOperations.run(automaton, t.utf8ToString())) {
|
||||
if (Operations.run(automaton, t.utf8ToString())) {
|
||||
matchedTerms.add(t);
|
||||
}
|
||||
}
|
||||
|
||||
LightAutomaton alternate = BasicAutomata.makeStringUnionLight(matchedTerms);
|
||||
Automaton alternate = Automata.makeStringUnion(matchedTerms);
|
||||
//System.out.println("match " + matchedTerms.size() + " " + alternate.getNumberOfStates() + " states, sigma=" + alternate.getStartPoints().length);
|
||||
//AutomatonTestUtil.minimizeSimple(alternate);
|
||||
//System.out.println("minmize done");
|
||||
|
@ -113,13 +113,13 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
public void testSeeking() throws Exception {
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
String reg = AutomatonTestUtil.randomRegexp(random());
|
||||
LightAutomaton automaton = BasicOperations.determinize(new RegExp(reg, RegExp.NONE).toLightAutomaton());
|
||||
Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton());
|
||||
TermsEnum te = MultiFields.getTerms(reader, "field").iterator(null);
|
||||
ArrayList<BytesRef> unsortedTerms = new ArrayList<>(terms);
|
||||
Collections.shuffle(unsortedTerms, random());
|
||||
|
||||
for (BytesRef term : unsortedTerms) {
|
||||
if (BasicOperations.run(automaton, term.utf8ToString())) {
|
||||
if (Operations.run(automaton, term.utf8ToString())) {
|
||||
// term is accepted
|
||||
if (random().nextBoolean()) {
|
||||
// seek exact
|
||||
|
@ -157,17 +157,17 @@ public class TestTermsEnum2 extends LuceneTestCase {
|
|||
public void testIntersect() throws Exception {
|
||||
for (int i = 0; i < numIterations; i++) {
|
||||
String reg = AutomatonTestUtil.randomRegexp(random());
|
||||
LightAutomaton automaton = new RegExp(reg, RegExp.NONE).toLightAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton), false);
|
||||
Automaton automaton = new RegExp(reg, RegExp.NONE).toAutomaton();
|
||||
CompiledAutomaton ca = new CompiledAutomaton(automaton, Operations.isFinite(automaton), false);
|
||||
TermsEnum te = MultiFields.getTerms(reader, "field").intersect(ca, null);
|
||||
LightAutomaton expected = BasicOperations.determinize(BasicOperations.intersectionLight(termsAutomaton, automaton));
|
||||
Automaton expected = Operations.determinize(Operations.intersection(termsAutomaton, automaton));
|
||||
TreeSet<BytesRef> found = new TreeSet<>();
|
||||
while (te.next() != null) {
|
||||
found.add(BytesRef.deepCopyOf(te.term()));
|
||||
}
|
||||
|
||||
LightAutomaton actual = BasicOperations.determinize(BasicAutomata.makeStringUnionLight(found));
|
||||
assertTrue(BasicOperations.sameLanguage(expected, actual));
|
||||
Automaton actual = Operations.determinize(Automata.makeStringUnion(found));
|
||||
assertTrue(Operations.sameLanguage(expected, actual));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,9 +34,9 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.Rethrow;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
public class TestAutomatonQuery extends LuceneTestCase {
|
||||
private Directory directory;
|
||||
|
@ -86,7 +86,7 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
return searcher.search(query, 5).totalHits;
|
||||
}
|
||||
|
||||
private void assertAutomatonHits(int expected, LightAutomaton automaton)
|
||||
private void assertAutomatonHits(int expected, Automaton automaton)
|
||||
throws IOException {
|
||||
AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
|
||||
|
||||
|
@ -106,26 +106,24 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
/**
|
||||
* Test some very simple automata.
|
||||
*/
|
||||
public void testBasicAutomata() throws IOException {
|
||||
assertAutomatonHits(0, BasicAutomata.makeEmptyLight());
|
||||
assertAutomatonHits(0, BasicAutomata.makeEmptyStringLight());
|
||||
assertAutomatonHits(2, BasicAutomata.makeAnyCharLight());
|
||||
assertAutomatonHits(3, BasicAutomata.makeAnyStringLight());
|
||||
assertAutomatonHits(2, BasicAutomata.makeStringLight("doc"));
|
||||
assertAutomatonHits(1, BasicAutomata.makeCharLight('a'));
|
||||
assertAutomatonHits(2, BasicAutomata.makeCharRangeLight('a', 'b'));
|
||||
assertAutomatonHits(2, BasicAutomata.makeIntervalLight(1233, 2346, 0));
|
||||
assertAutomatonHits(1, BasicAutomata.makeIntervalLight(0, 2000, 0));
|
||||
assertAutomatonHits(2, BasicOperations.unionLight(BasicAutomata.makeCharLight('a'),
|
||||
BasicAutomata.makeCharLight('b')));
|
||||
assertAutomatonHits(0, BasicOperations.intersectionLight(BasicAutomata
|
||||
.makeCharLight('a'), BasicAutomata.makeCharLight('b')));
|
||||
assertAutomatonHits(1, BasicOperations.minusLight(BasicAutomata.makeCharRangeLight('a', 'b'),
|
||||
BasicAutomata.makeCharLight('a')));
|
||||
public void testAutomata() throws IOException {
|
||||
assertAutomatonHits(0, Automata.makeEmpty());
|
||||
assertAutomatonHits(0, Automata.makeEmptyString());
|
||||
assertAutomatonHits(2, Automata.makeAnyChar());
|
||||
assertAutomatonHits(3, Automata.makeAnyString());
|
||||
assertAutomatonHits(2, Automata.makeString("doc"));
|
||||
assertAutomatonHits(1, Automata.makeChar('a'));
|
||||
assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
|
||||
assertAutomatonHits(2, Automata.makeInterval(1233, 2346, 0));
|
||||
assertAutomatonHits(1, Automata.makeInterval(0, 2000, 0));
|
||||
assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
|
||||
Automata.makeChar('b')));
|
||||
assertAutomatonHits(0, Operations.intersection(Automata
|
||||
.makeChar('a'), Automata.makeChar('b')));
|
||||
assertAutomatonHits(1, Operations.minus(Automata.makeCharRange('a', 'b'),
|
||||
Automata.makeChar('a')));
|
||||
}
|
||||
|
||||
// nocommit make a testRandomAutomaton like TestRR2
|
||||
|
||||
/**
|
||||
* Test that a nondeterministic automaton works correctly. (It should will be
|
||||
* determinized)
|
||||
|
@ -133,27 +131,27 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
public void testNFA() throws IOException {
|
||||
// accept this or three, the union is an NFA (two transitions for 't' from
|
||||
// initial state)
|
||||
LightAutomaton nfa = BasicOperations.unionLight(BasicAutomata.makeStringLight("this"),
|
||||
BasicAutomata.makeStringLight("three"));
|
||||
Automaton nfa = Operations.union(Automata.makeString("this"),
|
||||
Automata.makeString("three"));
|
||||
assertAutomatonHits(2, nfa);
|
||||
}
|
||||
|
||||
public void testEquals() {
|
||||
AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), BasicAutomata
|
||||
.makeStringLight("foobar"));
|
||||
AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
|
||||
.makeString("foobar"));
|
||||
// reference to a1
|
||||
AutomatonQuery a2 = a1;
|
||||
// same as a1 (accepts the same language, same term)
|
||||
AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
|
||||
BasicOperations.concatenateLight(
|
||||
BasicAutomata.makeStringLight("foo"),
|
||||
BasicAutomata.makeStringLight("bar")));
|
||||
Operations.concatenate(
|
||||
Automata.makeString("foo"),
|
||||
Automata.makeString("bar")));
|
||||
// different than a1 (same term, but different language)
|
||||
AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
|
||||
BasicAutomata.makeStringLight("different"));
|
||||
Automata.makeString("different"));
|
||||
// different than a1 (different term, same language)
|
||||
AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
|
||||
BasicAutomata.makeStringLight("foobar"));
|
||||
Automata.makeString("foobar"));
|
||||
|
||||
assertEquals(a1.hashCode(), a2.hashCode());
|
||||
assertEquals(a1, a2);
|
||||
|
@ -179,7 +177,7 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
* MultiTermQuery semantics.
|
||||
*/
|
||||
public void testRewriteSingleTerm() throws IOException {
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata.makeStringLight("piece"));
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece"));
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
|
||||
assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
|
||||
assertEquals(1, automatonQueryNrHits(aq));
|
||||
|
@ -190,8 +188,8 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
* MultiTermQuery semantics.
|
||||
*/
|
||||
public void testRewritePrefix() throws IOException {
|
||||
LightAutomaton pfx = BasicAutomata.makeStringLight("do");
|
||||
LightAutomaton prefixAutomaton = BasicOperations.concatenateLight(pfx, BasicAutomata.makeAnyStringLight());
|
||||
Automaton pfx = Automata.makeString("do");
|
||||
Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
|
||||
assertTrue(aq.getTermsEnum(terms) instanceof PrefixTermsEnum);
|
||||
|
@ -202,7 +200,7 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
* Test handling of the empty language
|
||||
*/
|
||||
public void testEmptyOptimization() throws IOException {
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), BasicAutomata.makeEmptyLight());
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
|
||||
// not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
|
||||
// instanceof EmptyTermEnum);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
|
@ -98,7 +98,7 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
|
|||
return searcher.search(query, 5).totalHits;
|
||||
}
|
||||
|
||||
private void assertAutomatonHits(int expected, LightAutomaton automaton)
|
||||
private void assertAutomatonHits(int expected, Automaton automaton)
|
||||
throws IOException {
|
||||
AutomatonQuery query = new AutomatonQuery(newTerm("bogus"), automaton);
|
||||
|
||||
|
@ -122,7 +122,7 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
|
|||
* presentation forms block, or a supplementary character.
|
||||
*/
|
||||
public void testSortOrder() throws IOException {
|
||||
LightAutomaton a = new RegExp("((\uD866\uDF05)|\uFB94).*").toLightAutomaton();
|
||||
Automaton a = new RegExp("((\uD866\uDF05)|\uFB94).*").toAutomaton();
|
||||
assertAutomatonHits(2, a);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,10 +27,10 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomatonProvider;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.AutomatonProvider;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
|
@ -95,15 +95,15 @@ public class TestRegexpQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testCustomProvider() throws IOException {
|
||||
LightAutomatonProvider myProvider = new LightAutomatonProvider() {
|
||||
AutomatonProvider myProvider = new AutomatonProvider() {
|
||||
// automaton that matches quick or brown
|
||||
private LightAutomaton quickBrownAutomaton = BasicOperations.unionLight(Arrays
|
||||
.asList(BasicAutomata.makeStringLight("quick"),
|
||||
BasicAutomata.makeStringLight("brown"),
|
||||
BasicAutomata.makeStringLight("bob")));
|
||||
private Automaton quickBrownAutomaton = Operations.union(Arrays
|
||||
.asList(Automata.makeString("quick"),
|
||||
Automata.makeString("brown"),
|
||||
Automata.makeString("bob")));
|
||||
|
||||
@Override
|
||||
public LightAutomaton getAutomaton(String name) {
|
||||
public Automaton getAutomaton(String name) {
|
||||
if (name.equals("quickBrown")) return quickBrownAutomaton;
|
||||
else return null;
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.AutomatonTestUtil;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
|
@ -103,12 +103,12 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
|
||||
/** a stupid regexp query that just blasts thru the terms */
|
||||
private class DumbRegexpQuery extends MultiTermQuery {
|
||||
private final LightAutomaton automaton;
|
||||
private final Automaton automaton;
|
||||
|
||||
DumbRegexpQuery(Term term, int flags) {
|
||||
super(term.field());
|
||||
RegExp re = new RegExp(term.text(), flags);
|
||||
automaton = re.toLightAutomaton();
|
||||
automaton = re.toAutomaton();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -36,7 +36,7 @@ public class TestSpanFirstQuery extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
|
||||
// mimic StopAnalyzer
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toLightAutomaton());
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
|
||||
|
|
|
@ -103,7 +103,7 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
expected.add(new Term("field", "1"));
|
||||
expected.add(new Term("field", "2"), 2);
|
||||
|
||||
CharacterRunAutomaton stopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toLightAutomaton());
|
||||
CharacterRunAutomaton stopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopList);
|
||||
|
||||
|
|
|
@ -34,10 +34,10 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
import org.apache.lucene.util.automaton.AutomatonTestUtil.RandomAcceptedStrings;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
public class TestLightAutomaton extends LuceneTestCase {
|
||||
public class TestAutomaton extends LuceneTestCase {
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int start = a.createState();
|
||||
int x = a.createState();
|
||||
int y = a.createState();
|
||||
|
@ -52,7 +52,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testReduceBasic() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int start = a.createState();
|
||||
int end = a.createState();
|
||||
a.setAccept(end, true);
|
||||
|
@ -80,156 +80,156 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSameLanguage() throws Exception {
|
||||
LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
|
||||
LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.concatenateLight(
|
||||
BasicAutomata.makeStringLight("foo"),
|
||||
BasicAutomata.makeStringLight("bar")));
|
||||
assertTrue(BasicOperations.sameLanguage(a1, a2));
|
||||
Automaton a1 = Automata.makeString("foobar");
|
||||
Automaton a2 = Operations.removeDeadStates(Operations.concatenate(
|
||||
Automata.makeString("foo"),
|
||||
Automata.makeString("bar")));
|
||||
assertTrue(Operations.sameLanguage(a1, a2));
|
||||
}
|
||||
|
||||
public void testCommonPrefix() throws Exception {
|
||||
LightAutomaton a = BasicOperations.concatenateLight(
|
||||
BasicAutomata.makeStringLight("foobar"),
|
||||
BasicAutomata.makeAnyStringLight());
|
||||
assertEquals("foobar", SpecialOperations.getCommonPrefix(a));
|
||||
Automaton a = Operations.concatenate(
|
||||
Automata.makeString("foobar"),
|
||||
Automata.makeAnyString());
|
||||
assertEquals("foobar", Operations.getCommonPrefix(a));
|
||||
}
|
||||
|
||||
public void testConcatenate1() throws Exception {
|
||||
LightAutomaton a = BasicOperations.concatenateLight(
|
||||
BasicAutomata.makeStringLight("m"),
|
||||
BasicAutomata.makeAnyStringLight());
|
||||
assertTrue(BasicOperations.run(a, "m"));
|
||||
assertTrue(BasicOperations.run(a, "me"));
|
||||
assertTrue(BasicOperations.run(a, "me too"));
|
||||
Automaton a = Operations.concatenate(
|
||||
Automata.makeString("m"),
|
||||
Automata.makeAnyString());
|
||||
assertTrue(Operations.run(a, "m"));
|
||||
assertTrue(Operations.run(a, "me"));
|
||||
assertTrue(Operations.run(a, "me too"));
|
||||
}
|
||||
|
||||
public void testConcatenate2() throws Exception {
|
||||
LightAutomaton a = BasicOperations.concatenateLight(Arrays.asList(
|
||||
BasicAutomata.makeStringLight("m"),
|
||||
BasicAutomata.makeAnyStringLight(),
|
||||
BasicAutomata.makeStringLight("n"),
|
||||
BasicAutomata.makeAnyStringLight()));
|
||||
a = BasicOperations.determinize(a);
|
||||
assertTrue(BasicOperations.run(a, "mn"));
|
||||
assertTrue(BasicOperations.run(a, "mone"));
|
||||
assertFalse(BasicOperations.run(a, "m"));
|
||||
assertFalse(SpecialOperations.isFinite(a));
|
||||
Automaton a = Operations.concatenate(Arrays.asList(
|
||||
Automata.makeString("m"),
|
||||
Automata.makeAnyString(),
|
||||
Automata.makeString("n"),
|
||||
Automata.makeAnyString()));
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(Operations.run(a, "mn"));
|
||||
assertTrue(Operations.run(a, "mone"));
|
||||
assertFalse(Operations.run(a, "m"));
|
||||
assertFalse(Operations.isFinite(a));
|
||||
}
|
||||
|
||||
public void testUnion1() throws Exception {
|
||||
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(
|
||||
BasicAutomata.makeStringLight("foobar"),
|
||||
BasicAutomata.makeStringLight("barbaz")));
|
||||
a = BasicOperations.determinize(a);
|
||||
assertTrue(BasicOperations.run(a, "foobar"));
|
||||
assertTrue(BasicOperations.run(a, "barbaz"));
|
||||
Automaton a = Operations.union(Arrays.asList(
|
||||
Automata.makeString("foobar"),
|
||||
Automata.makeString("barbaz")));
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(Operations.run(a, "foobar"));
|
||||
assertTrue(Operations.run(a, "barbaz"));
|
||||
|
||||
assertMatches(a, "foobar", "barbaz");
|
||||
}
|
||||
|
||||
public void testUnion2() throws Exception {
|
||||
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(
|
||||
BasicAutomata.makeStringLight("foobar"),
|
||||
BasicAutomata.makeStringLight(""),
|
||||
BasicAutomata.makeStringLight("barbaz")));
|
||||
a = BasicOperations.determinize(a);
|
||||
assertTrue(BasicOperations.run(a, "foobar"));
|
||||
assertTrue(BasicOperations.run(a, "barbaz"));
|
||||
assertTrue(BasicOperations.run(a, ""));
|
||||
Automaton a = Operations.union(Arrays.asList(
|
||||
Automata.makeString("foobar"),
|
||||
Automata.makeString(""),
|
||||
Automata.makeString("barbaz")));
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(Operations.run(a, "foobar"));
|
||||
assertTrue(Operations.run(a, "barbaz"));
|
||||
assertTrue(Operations.run(a, ""));
|
||||
|
||||
assertMatches(a, "", "foobar", "barbaz");
|
||||
}
|
||||
|
||||
public void testMinimizeSimple() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("foobar");
|
||||
LightAutomaton aMin = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Automata.makeString("foobar");
|
||||
Automaton aMin = MinimizationOperations.minimize(a);
|
||||
|
||||
assertTrue(BasicOperations.sameLanguage(a, aMin));
|
||||
assertTrue(Operations.sameLanguage(a, aMin));
|
||||
}
|
||||
|
||||
public void testMinimize2() throws Exception {
|
||||
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(BasicAutomata.makeStringLight("foobar"),
|
||||
BasicAutomata.makeStringLight("boobar")));
|
||||
LightAutomaton aMin = MinimizationOperationsLight.minimize(a);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)), aMin));
|
||||
Automaton a = Operations.union(Arrays.asList(Automata.makeString("foobar"),
|
||||
Automata.makeString("boobar")));
|
||||
Automaton aMin = MinimizationOperations.minimize(a);
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(a)), aMin));
|
||||
}
|
||||
|
||||
public void testReverse() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("foobar");
|
||||
LightAutomaton ra = SpecialOperations.reverse(a);
|
||||
LightAutomaton a2 = BasicOperations.determinize(SpecialOperations.reverse(ra));
|
||||
Automaton a = Automata.makeString("foobar");
|
||||
Automaton ra = Operations.reverse(a);
|
||||
Automaton a2 = Operations.determinize(Operations.reverse(ra));
|
||||
|
||||
assertTrue(BasicOperations.sameLanguage(a, a2));
|
||||
assertTrue(Operations.sameLanguage(a, a2));
|
||||
}
|
||||
|
||||
public void testOptional() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("foobar");
|
||||
LightAutomaton a2 = BasicOperations.optionalLight(a);
|
||||
a2 = BasicOperations.determinize(a2);
|
||||
Automaton a = Automata.makeString("foobar");
|
||||
Automaton a2 = Operations.optional(a);
|
||||
a2 = Operations.determinize(a2);
|
||||
|
||||
assertTrue(BasicOperations.run(a, "foobar"));
|
||||
assertFalse(BasicOperations.run(a, ""));
|
||||
assertTrue(BasicOperations.run(a2, "foobar"));
|
||||
assertTrue(BasicOperations.run(a2, ""));
|
||||
assertTrue(Operations.run(a, "foobar"));
|
||||
assertFalse(Operations.run(a, ""));
|
||||
assertTrue(Operations.run(a2, "foobar"));
|
||||
assertTrue(Operations.run(a2, ""));
|
||||
}
|
||||
|
||||
public void testRepeatAny() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("zee");
|
||||
LightAutomaton a2 = BasicOperations.determinize(BasicOperations.repeatLight(a));
|
||||
assertTrue(BasicOperations.run(a2, ""));
|
||||
assertTrue(BasicOperations.run(a2, "zee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezeezee"));
|
||||
Automaton a = Automata.makeString("zee");
|
||||
Automaton a2 = Operations.determinize(Operations.repeat(a));
|
||||
assertTrue(Operations.run(a2, ""));
|
||||
assertTrue(Operations.run(a2, "zee"));
|
||||
assertTrue(Operations.run(a2, "zeezee"));
|
||||
assertTrue(Operations.run(a2, "zeezeezee"));
|
||||
}
|
||||
|
||||
public void testRepeatMin() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("zee");
|
||||
LightAutomaton a2 = BasicOperations.determinize(BasicOperations.repeatLight(a, 2));
|
||||
assertFalse(BasicOperations.run(a2, ""));
|
||||
assertFalse(BasicOperations.run(a2, "zee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezeezee"));
|
||||
Automaton a = Automata.makeString("zee");
|
||||
Automaton a2 = Operations.determinize(Operations.repeat(a, 2));
|
||||
assertFalse(Operations.run(a2, ""));
|
||||
assertFalse(Operations.run(a2, "zee"));
|
||||
assertTrue(Operations.run(a2, "zeezee"));
|
||||
assertTrue(Operations.run(a2, "zeezeezee"));
|
||||
}
|
||||
|
||||
public void testRepeatMinMax1() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("zee");
|
||||
LightAutomaton a2 = BasicOperations.determinize(BasicOperations.repeatLight(a, 0, 2));
|
||||
assertTrue(BasicOperations.run(a2, ""));
|
||||
assertTrue(BasicOperations.run(a2, "zee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezee"));
|
||||
assertFalse(BasicOperations.run(a2, "zeezeezee"));
|
||||
Automaton a = Automata.makeString("zee");
|
||||
Automaton a2 = Operations.determinize(Operations.repeat(a, 0, 2));
|
||||
assertTrue(Operations.run(a2, ""));
|
||||
assertTrue(Operations.run(a2, "zee"));
|
||||
assertTrue(Operations.run(a2, "zeezee"));
|
||||
assertFalse(Operations.run(a2, "zeezeezee"));
|
||||
}
|
||||
|
||||
public void testRepeatMinMax2() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("zee");
|
||||
LightAutomaton a2 = BasicOperations.determinize(BasicOperations.repeatLight(a, 2, 4));
|
||||
assertFalse(BasicOperations.run(a2, ""));
|
||||
assertFalse(BasicOperations.run(a2, "zee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezeezee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezeezeezee"));
|
||||
assertFalse(BasicOperations.run(a2, "zeezeezeezeezee"));
|
||||
Automaton a = Automata.makeString("zee");
|
||||
Automaton a2 = Operations.determinize(Operations.repeat(a, 2, 4));
|
||||
assertFalse(Operations.run(a2, ""));
|
||||
assertFalse(Operations.run(a2, "zee"));
|
||||
assertTrue(Operations.run(a2, "zeezee"));
|
||||
assertTrue(Operations.run(a2, "zeezeezee"));
|
||||
assertTrue(Operations.run(a2, "zeezeezeezee"));
|
||||
assertFalse(Operations.run(a2, "zeezeezeezeezee"));
|
||||
}
|
||||
|
||||
public void testComplement() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("zee");
|
||||
LightAutomaton a2 = BasicOperations.determinize(BasicOperations.complementLight(a));
|
||||
assertTrue(BasicOperations.run(a2, ""));
|
||||
assertFalse(BasicOperations.run(a2, "zee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezee"));
|
||||
assertTrue(BasicOperations.run(a2, "zeezeezee"));
|
||||
Automaton a = Automata.makeString("zee");
|
||||
Automaton a2 = Operations.determinize(Operations.complement(a));
|
||||
assertTrue(Operations.run(a2, ""));
|
||||
assertFalse(Operations.run(a2, "zee"));
|
||||
assertTrue(Operations.run(a2, "zeezee"));
|
||||
assertTrue(Operations.run(a2, "zeezeezee"));
|
||||
}
|
||||
|
||||
public void testInterval() throws Exception {
|
||||
LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(17, 100, 3));
|
||||
assertFalse(BasicOperations.run(a, ""));
|
||||
assertTrue(BasicOperations.run(a, "017"));
|
||||
assertTrue(BasicOperations.run(a, "100"));
|
||||
assertTrue(BasicOperations.run(a, "073"));
|
||||
Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3));
|
||||
assertFalse(Operations.run(a, ""));
|
||||
assertTrue(Operations.run(a, "017"));
|
||||
assertTrue(Operations.run(a, "100"));
|
||||
assertTrue(Operations.run(a, "073"));
|
||||
}
|
||||
|
||||
public void testCommonSuffix() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int init = a.createState();
|
||||
int fini = a.createState();
|
||||
a.setAccept(init, true);
|
||||
|
@ -237,17 +237,17 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
a.addTransition(init, fini, 'm');
|
||||
a.addTransition(fini, fini, 'm');
|
||||
a.finishState();
|
||||
assertEquals(0, SpecialOperations.getCommonSuffixBytesRef(a).length);
|
||||
assertEquals(0, Operations.getCommonSuffixBytesRef(a).length);
|
||||
}
|
||||
|
||||
public void testReverseRandom1() throws Exception {
|
||||
int ITERS = atLeast(100);
|
||||
for(int i=0;i<ITERS;i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
LightAutomaton ra = SpecialOperations.reverse(a);
|
||||
LightAutomaton rra = SpecialOperations.reverse(ra);
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(rra))));
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton ra = Operations.reverse(a);
|
||||
Automaton rra = Operations.reverse(ra);
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(Operations.removeDeadStates(a)),
|
||||
Operations.determinize(Operations.removeDeadStates(rra))));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,15 +255,15 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
//System.out.println("TEST: iter=" + iter);
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
if (random().nextBoolean()) {
|
||||
a = BasicOperations.removeDeadStates(a);
|
||||
a = Operations.removeDeadStates(a);
|
||||
}
|
||||
LightAutomaton ra = SpecialOperations.reverse(a);
|
||||
LightAutomaton rda = BasicOperations.determinize(ra);
|
||||
Automaton ra = Operations.reverse(a);
|
||||
Automaton rda = Operations.determinize(ra);
|
||||
|
||||
if (BasicOperations.isEmpty(a)) {
|
||||
assertTrue(BasicOperations.isEmpty(rda));
|
||||
if (Operations.isEmpty(a)) {
|
||||
assertTrue(Operations.isEmpty(rda));
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -282,30 +282,30 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
//System.out.println("TEST: iter2=" + iter2 + " s=" + Arrays.toString(s));
|
||||
|
||||
// Make sure reversed automaton accepts it
|
||||
assertTrue(BasicOperations.run(rda, new IntsRef(s, 0, s.length)));
|
||||
assertTrue(Operations.run(rda, new IntsRef(s, 0, s.length)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testAnyStringEmptyString() throws Exception {
|
||||
LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeAnyStringLight());
|
||||
assertTrue(BasicOperations.run(a, ""));
|
||||
Automaton a = Operations.determinize(Automata.makeAnyString());
|
||||
assertTrue(Operations.run(a, ""));
|
||||
}
|
||||
|
||||
public void testBasicIsEmpty() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
a.createState();
|
||||
assertTrue(BasicOperations.isEmpty(a));
|
||||
assertTrue(Operations.isEmpty(a));
|
||||
}
|
||||
|
||||
public void testRemoveDeadTransitionsEmpty() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeEmptyLight();
|
||||
LightAutomaton a2 = BasicOperations.removeDeadStates(a);
|
||||
assertTrue(BasicOperations.isEmpty(a2));
|
||||
Automaton a = Automata.makeEmpty();
|
||||
Automaton a2 = Operations.removeDeadStates(a);
|
||||
assertTrue(Operations.isEmpty(a2));
|
||||
}
|
||||
|
||||
public void testInvalidAddTransition() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int s1 = a.createState();
|
||||
int s2 = a.createState();
|
||||
a.addTransition(s1, s2, 'a');
|
||||
|
@ -321,7 +321,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
public void testBuilderRandom() throws Exception {
|
||||
int ITERS = atLeast(100);
|
||||
for(int iter=0;iter<ITERS;iter++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
|
||||
// Just get all transitions, shuffle, and build a new automaton with the same transitions:
|
||||
List<Transition> allTrans = new ArrayList<>();
|
||||
|
@ -335,7 +335,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
LightAutomaton.Builder builder = new LightAutomaton.Builder();
|
||||
Automaton.Builder builder = new Automaton.Builder();
|
||||
for(int i=0;i<numStates;i++) {
|
||||
int s = builder.createState();
|
||||
builder.setAccept(s, a.isAccept(s));
|
||||
|
@ -346,83 +346,83 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
builder.addTransition(t.source, t.dest, t.min, t.max);
|
||||
}
|
||||
|
||||
assertTrue(BasicOperations.sameLanguage(
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(a)),
|
||||
BasicOperations.determinize(BasicOperations.removeDeadStates(builder.finish()))));
|
||||
assertTrue(Operations.sameLanguage(
|
||||
Operations.determinize(Operations.removeDeadStates(a)),
|
||||
Operations.determinize(Operations.removeDeadStates(builder.finish()))));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public void testIsTotal() throws Exception {
|
||||
assertFalse(BasicOperations.isTotal(new LightAutomaton()));
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
assertFalse(Operations.isTotal(new Automaton()));
|
||||
Automaton a = new Automaton();
|
||||
int init = a.createState();
|
||||
int fini = a.createState();
|
||||
a.setAccept(fini, true);
|
||||
a.addTransition(init, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
a.finishState();
|
||||
assertFalse(BasicOperations.isTotal(a));
|
||||
assertFalse(Operations.isTotal(a));
|
||||
a.addTransition(fini, fini, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT);
|
||||
a.finishState();
|
||||
assertFalse(BasicOperations.isTotal(a));
|
||||
assertFalse(Operations.isTotal(a));
|
||||
a.setAccept(init, true);
|
||||
assertTrue(BasicOperations.isTotal(MinimizationOperationsLight.minimize(a)));
|
||||
assertTrue(Operations.isTotal(MinimizationOperations.minimize(a)));
|
||||
}
|
||||
|
||||
public void testMinimizeEmpty() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int init = a.createState();
|
||||
int fini = a.createState();
|
||||
a.addTransition(init, fini, 'a');
|
||||
a.finishState();
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
assertEquals(0, a.getNumStates());
|
||||
}
|
||||
|
||||
public void testMinus() throws Exception {
|
||||
LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
|
||||
LightAutomaton a2 = BasicAutomata.makeStringLight("boobar");
|
||||
LightAutomaton a3 = BasicAutomata.makeStringLight("beebar");
|
||||
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(a1, a2, a3));
|
||||
Automaton a1 = Automata.makeString("foobar");
|
||||
Automaton a2 = Automata.makeString("boobar");
|
||||
Automaton a3 = Automata.makeString("beebar");
|
||||
Automaton a = Operations.union(Arrays.asList(a1, a2, a3));
|
||||
if (random().nextBoolean()) {
|
||||
a = BasicOperations.determinize(a);
|
||||
a = Operations.determinize(a);
|
||||
} else if (random().nextBoolean()) {
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
}
|
||||
assertMatches(a, "foobar", "beebar", "boobar");
|
||||
|
||||
LightAutomaton a4 = BasicOperations.determinize(BasicOperations.minusLight(a, a2));
|
||||
Automaton a4 = Operations.determinize(Operations.minus(a, a2));
|
||||
|
||||
assertTrue(BasicOperations.run(a4, "foobar"));
|
||||
assertFalse(BasicOperations.run(a4, "boobar"));
|
||||
assertTrue(BasicOperations.run(a4, "beebar"));
|
||||
assertTrue(Operations.run(a4, "foobar"));
|
||||
assertFalse(Operations.run(a4, "boobar"));
|
||||
assertTrue(Operations.run(a4, "beebar"));
|
||||
assertMatches(a4, "foobar", "beebar");
|
||||
|
||||
a4 = BasicOperations.determinize(BasicOperations.minusLight(a4, a1));
|
||||
assertFalse(BasicOperations.run(a4, "foobar"));
|
||||
assertFalse(BasicOperations.run(a4, "boobar"));
|
||||
assertTrue(BasicOperations.run(a4, "beebar"));
|
||||
a4 = Operations.determinize(Operations.minus(a4, a1));
|
||||
assertFalse(Operations.run(a4, "foobar"));
|
||||
assertFalse(Operations.run(a4, "boobar"));
|
||||
assertTrue(Operations.run(a4, "beebar"));
|
||||
assertMatches(a4, "beebar");
|
||||
|
||||
a4 = BasicOperations.determinize(BasicOperations.minusLight(a4, a3));
|
||||
assertFalse(BasicOperations.run(a4, "foobar"));
|
||||
assertFalse(BasicOperations.run(a4, "boobar"));
|
||||
assertFalse(BasicOperations.run(a4, "beebar"));
|
||||
a4 = Operations.determinize(Operations.minus(a4, a3));
|
||||
assertFalse(Operations.run(a4, "foobar"));
|
||||
assertFalse(Operations.run(a4, "boobar"));
|
||||
assertFalse(Operations.run(a4, "beebar"));
|
||||
assertMatches(a4);
|
||||
}
|
||||
|
||||
public void testOneInterval() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeIntervalLight(999, 1032, 0);
|
||||
a = BasicOperations.determinize(a);
|
||||
assertTrue(BasicOperations.run(a, "0999"));
|
||||
assertTrue(BasicOperations.run(a, "00999"));
|
||||
assertTrue(BasicOperations.run(a, "000999"));
|
||||
Automaton a = Automata.makeInterval(999, 1032, 0);
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(Operations.run(a, "0999"));
|
||||
assertTrue(Operations.run(a, "00999"));
|
||||
assertTrue(Operations.run(a, "000999"));
|
||||
}
|
||||
|
||||
public void testAnotherInterval() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeIntervalLight(1, 2, 0);
|
||||
a = BasicOperations.determinize(a);
|
||||
assertTrue(BasicOperations.run(a, "01"));
|
||||
Automaton a = Automata.makeInterval(1, 2, 0);
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(Operations.run(a, "01"));
|
||||
}
|
||||
|
||||
public void testIntervalRandom() throws Exception {
|
||||
|
@ -443,9 +443,9 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
String prefix = b.toString();
|
||||
|
||||
LightAutomaton a = BasicOperations.determinize(BasicAutomata.makeIntervalLight(min, max, digits));
|
||||
Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits));
|
||||
if (random().nextBoolean()) {
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
}
|
||||
String mins = Integer.toString(min);
|
||||
String maxs = Integer.toString(max);
|
||||
|
@ -453,8 +453,8 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
mins = prefix.substring(mins.length()) + mins;
|
||||
maxs = prefix.substring(maxs.length()) + maxs;
|
||||
}
|
||||
assertTrue(BasicOperations.run(a, mins));
|
||||
assertTrue(BasicOperations.run(a, maxs));
|
||||
assertTrue(Operations.run(a, mins));
|
||||
assertTrue(Operations.run(a, maxs));
|
||||
|
||||
for(int iter2=0;iter2<100;iter2++) {
|
||||
int x = random().nextInt(2*max);
|
||||
|
@ -473,83 +473,83 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
sb.append(sx);
|
||||
sx = sb.toString();
|
||||
}
|
||||
assertEquals(expected, BasicOperations.run(a, sx));
|
||||
assertEquals(expected, Operations.run(a, sx));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void assertMatches(LightAutomaton a, String... strings) {
|
||||
private void assertMatches(Automaton a, String... strings) {
|
||||
Set<IntsRef> expected = new HashSet<>();
|
||||
for(String s : strings) {
|
||||
IntsRef ints = new IntsRef();
|
||||
expected.add(Util.toUTF32(s, ints));
|
||||
}
|
||||
|
||||
assertEquals(expected, SpecialOperations.getFiniteStrings(BasicOperations.determinize(a), -1));
|
||||
assertEquals(expected, Operations.getFiniteStrings(Operations.determinize(a), -1));
|
||||
}
|
||||
|
||||
public void testConcatenatePreservesDet() throws Exception {
|
||||
LightAutomaton a1 = BasicAutomata.makeStringLight("foobar");
|
||||
Automaton a1 = Automata.makeString("foobar");
|
||||
assertTrue(a1.isDeterministic());
|
||||
LightAutomaton a2 = BasicAutomata.makeStringLight("baz");
|
||||
Automaton a2 = Automata.makeString("baz");
|
||||
assertTrue(a2.isDeterministic());
|
||||
assertTrue((BasicOperations.concatenateLight(Arrays.asList(a1, a2)).isDeterministic()));
|
||||
assertTrue((Operations.concatenate(Arrays.asList(a1, a2)).isDeterministic()));
|
||||
}
|
||||
|
||||
public void testRemoveDeadStates() throws Exception {
|
||||
LightAutomaton a = BasicOperations.concatenateLight(Arrays.asList(BasicAutomata.makeStringLight("x"),
|
||||
BasicAutomata.makeStringLight("y")));
|
||||
Automaton a = Operations.concatenate(Arrays.asList(Automata.makeString("x"),
|
||||
Automata.makeString("y")));
|
||||
assertEquals(4, a.getNumStates());
|
||||
a = BasicOperations.removeDeadStates(a);
|
||||
a = Operations.removeDeadStates(a);
|
||||
assertEquals(3, a.getNumStates());
|
||||
}
|
||||
|
||||
public void testRemoveDeadStatesEmpty1() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
a.finishState();
|
||||
assertTrue(BasicOperations.isEmpty(a));
|
||||
assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
|
||||
assertTrue(Operations.isEmpty(a));
|
||||
assertTrue(Operations.isEmpty(Operations.removeDeadStates(a)));
|
||||
}
|
||||
|
||||
public void testRemoveDeadStatesEmpty2() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
a.finishState();
|
||||
assertTrue(BasicOperations.isEmpty(a));
|
||||
assertTrue(BasicOperations.isEmpty(BasicOperations.removeDeadStates(a)));
|
||||
assertTrue(Operations.isEmpty(a));
|
||||
assertTrue(Operations.isEmpty(Operations.removeDeadStates(a)));
|
||||
}
|
||||
|
||||
public void testRemoveDeadStatesEmpty3() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int init = a.createState();
|
||||
int fini = a.createState();
|
||||
a.addTransition(init, fini, 'a');
|
||||
LightAutomaton a2 = BasicOperations.removeDeadStates(a);
|
||||
Automaton a2 = Operations.removeDeadStates(a);
|
||||
assertEquals(0, a2.getNumStates());
|
||||
}
|
||||
|
||||
public void testConcatEmpty() throws Exception {
|
||||
// If you concat empty automaton to anything the result should still be empty:
|
||||
LightAutomaton a = BasicOperations.concatenateLight(BasicAutomata.makeEmptyLight(),
|
||||
BasicAutomata.makeStringLight("foo"));
|
||||
assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
|
||||
Automaton a = Operations.concatenate(Automata.makeEmpty(),
|
||||
Automata.makeString("foo"));
|
||||
assertEquals(new HashSet<IntsRef>(), Operations.getFiniteStrings(a, -1));
|
||||
|
||||
a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight("foo"),
|
||||
BasicAutomata.makeEmptyLight());
|
||||
assertEquals(new HashSet<IntsRef>(), SpecialOperations.getFiniteStrings(a, -1));
|
||||
a = Operations.concatenate(Automata.makeString("foo"),
|
||||
Automata.makeEmpty());
|
||||
assertEquals(new HashSet<IntsRef>(), Operations.getFiniteStrings(a, -1));
|
||||
}
|
||||
|
||||
public void testSeemsNonEmptyButIsNot1() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
// Init state has a transition but doesn't lead to accept
|
||||
int init = a.createState();
|
||||
int s = a.createState();
|
||||
a.addTransition(init, s, 'a');
|
||||
a.finishState();
|
||||
assertTrue(BasicOperations.isEmpty(a));
|
||||
assertTrue(Operations.isEmpty(a));
|
||||
}
|
||||
|
||||
public void testSeemsNonEmptyButIsNot2() throws Exception {
|
||||
LightAutomaton a = new LightAutomaton();
|
||||
Automaton a = new Automaton();
|
||||
int init = a.createState();
|
||||
int s = a.createState();
|
||||
a.addTransition(init, s, 'a');
|
||||
|
@ -557,76 +557,76 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
s = a.createState();
|
||||
a.setAccept(s, true);
|
||||
a.finishState();
|
||||
assertTrue(BasicOperations.isEmpty(a));
|
||||
assertTrue(Operations.isEmpty(a));
|
||||
}
|
||||
|
||||
public void testSameLanguage1() throws Exception {
|
||||
LightAutomaton a = BasicAutomata.makeEmptyStringLight();
|
||||
LightAutomaton a2 = BasicAutomata.makeEmptyStringLight();
|
||||
Automaton a = Automata.makeEmptyString();
|
||||
Automaton a2 = Automata.makeEmptyString();
|
||||
int state = a2.createState();
|
||||
a2.addTransition(0, state, 'a');
|
||||
a2.finishState();
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.removeDeadStates(a),
|
||||
BasicOperations.removeDeadStates(a2)));
|
||||
assertTrue(Operations.sameLanguage(Operations.removeDeadStates(a),
|
||||
Operations.removeDeadStates(a2)));
|
||||
}
|
||||
|
||||
private LightAutomaton randomNoOp(LightAutomaton a) {
|
||||
private Automaton randomNoOp(Automaton a) {
|
||||
switch (random().nextInt(5)) {
|
||||
case 0:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: determinize");
|
||||
}
|
||||
return BasicOperations.determinize(a);
|
||||
return Operations.determinize(a);
|
||||
case 1:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: minimize");
|
||||
}
|
||||
return MinimizationOperationsLight.minimize(a);
|
||||
return MinimizationOperations.minimize(a);
|
||||
case 2:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: removeDeadStates");
|
||||
}
|
||||
return BasicOperations.removeDeadStates(a);
|
||||
return Operations.removeDeadStates(a);
|
||||
case 3:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: reverse reverse");
|
||||
}
|
||||
a = SpecialOperations.reverse(a);
|
||||
a = Operations.reverse(a);
|
||||
a = randomNoOp(a);
|
||||
return SpecialOperations.reverse(a);
|
||||
return Operations.reverse(a);
|
||||
case 4:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: concat empty string");
|
||||
}
|
||||
return BasicOperations.concatenateLight(a, BasicAutomata.makeEmptyStringLight());
|
||||
return Operations.concatenate(a, Automata.makeEmptyString());
|
||||
case 5:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" randomNoOp: union empty automaton");
|
||||
}
|
||||
return BasicOperations.unionLight(a, BasicAutomata.makeEmptyLight());
|
||||
return Operations.union(a, Automata.makeEmpty());
|
||||
}
|
||||
assert false;
|
||||
return null;
|
||||
}
|
||||
|
||||
private LightAutomaton unionTerms(Collection<BytesRef> terms) {
|
||||
LightAutomaton a;
|
||||
private Automaton unionTerms(Collection<BytesRef> terms) {
|
||||
Automaton a;
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: unionTerms: use union");
|
||||
}
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
for(BytesRef term : terms) {
|
||||
as.add(BasicAutomata.makeStringLight(term.utf8ToString()));
|
||||
as.add(Automata.makeString(term.utf8ToString()));
|
||||
}
|
||||
a = BasicOperations.unionLight(as);
|
||||
a = Operations.union(as);
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: unionTerms: use makeStringUnion");
|
||||
}
|
||||
List<BytesRef> termsList = new ArrayList<>(terms);
|
||||
Collections.sort(termsList);
|
||||
a = BasicAutomata.makeStringUnionLight(termsList);
|
||||
a = Automata.makeStringUnion(termsList);
|
||||
}
|
||||
|
||||
return randomNoOp(a);
|
||||
|
@ -657,7 +657,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
terms.add(new BytesRef(getRandomString(isAscii)));
|
||||
}
|
||||
|
||||
LightAutomaton a = unionTerms(terms);
|
||||
Automaton a = unionTerms(terms);
|
||||
assertSame(terms, a);
|
||||
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
|
@ -685,7 +685,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
terms = newTerms;
|
||||
boolean wasDeterministic1 = a.isDeterministic();
|
||||
a = BasicOperations.concatenateLight(BasicAutomata.makeStringLight(prefix.utf8ToString()), a);
|
||||
a = Operations.concatenate(Automata.makeString(prefix.utf8ToString()), a);
|
||||
assertEquals(wasDeterministic1, a.isDeterministic());
|
||||
}
|
||||
break;
|
||||
|
@ -704,7 +704,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
newTerms.add(newTerm);
|
||||
}
|
||||
terms = newTerms;
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(suffix.utf8ToString()));
|
||||
a = Operations.concatenate(a, Automata.makeString(suffix.utf8ToString()));
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -715,7 +715,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=determinize");
|
||||
}
|
||||
a = BasicOperations.determinize(a);
|
||||
a = Operations.determinize(a);
|
||||
assertTrue(a.isDeterministic());
|
||||
break;
|
||||
|
||||
|
@ -724,7 +724,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
System.out.println(" op=minimize");
|
||||
}
|
||||
// minimize
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
|
@ -739,8 +739,8 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
newTerms.add(new BytesRef(getRandomString(isAscii)));
|
||||
}
|
||||
terms.addAll(newTerms);
|
||||
LightAutomaton newA = unionTerms(newTerms);
|
||||
a = BasicOperations.unionLight(a, newA);
|
||||
Automaton newA = unionTerms(newTerms);
|
||||
a = Operations.union(a, newA);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -750,7 +750,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=optional");
|
||||
}
|
||||
a = BasicOperations.optionalLight(a);
|
||||
a = Operations.optional(a);
|
||||
terms.add(new BytesRef());
|
||||
}
|
||||
break;
|
||||
|
@ -762,7 +762,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
System.out.println(" op=minus finite");
|
||||
}
|
||||
if (terms.size() > 0) {
|
||||
RandomAcceptedStrings rasl = new RandomAcceptedStrings(BasicOperations.removeDeadStates(a));
|
||||
RandomAcceptedStrings rasl = new RandomAcceptedStrings(Operations.removeDeadStates(a));
|
||||
Set<BytesRef> toRemove = new HashSet<>();
|
||||
int numToRemove = TestUtil.nextInt(random(), 1, (terms.size()+1)/2);
|
||||
while (toRemove.size() < numToRemove) {
|
||||
|
@ -776,8 +776,8 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
boolean removed = terms.remove(term);
|
||||
assertTrue(removed);
|
||||
}
|
||||
LightAutomaton a2 = unionTerms(toRemove);
|
||||
a = BasicOperations.minusLight(a, a2);
|
||||
Automaton a2 = unionTerms(toRemove);
|
||||
a = Operations.minus(a, a2);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -785,7 +785,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
case 7:
|
||||
{
|
||||
// minus infinite
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
int count = TestUtil.nextInt(random(), 1, 5);
|
||||
Set<Integer> prefixes = new HashSet<>();
|
||||
while(prefixes.size() < count) {
|
||||
|
@ -800,7 +800,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
|
||||
for(int prefix : prefixes) {
|
||||
// prefix is a leading ascii byte; we remove <prefix>* from a
|
||||
LightAutomaton a2 = new LightAutomaton();
|
||||
Automaton a2 = new Automaton();
|
||||
int init = a2.createState();
|
||||
int state = a2.createState();
|
||||
a2.addTransition(init, state, prefix);
|
||||
|
@ -816,8 +816,8 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
LightAutomaton a2 = randomNoOp(BasicOperations.unionLight(as));
|
||||
a = BasicOperations.minusLight(a, a2);
|
||||
Automaton a2 = randomNoOp(Operations.union(as));
|
||||
a = Operations.minus(a, a2);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -828,7 +828,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
System.out.println(" op=intersect infinite count=" + count);
|
||||
}
|
||||
// intersect infinite
|
||||
List<LightAutomaton> as = new ArrayList<>();
|
||||
List<Automaton> as = new ArrayList<>();
|
||||
|
||||
Set<Integer> prefixes = new HashSet<>();
|
||||
while(prefixes.size() < count) {
|
||||
|
@ -841,7 +841,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
|
||||
for(int prefix : prefixes) {
|
||||
// prefix is a leading ascii byte; we retain <prefix>* in a
|
||||
LightAutomaton a2 = new LightAutomaton();
|
||||
Automaton a2 = new Automaton();
|
||||
int init = a2.createState();
|
||||
int state = a2.createState();
|
||||
a2.addTransition(init, state, prefix);
|
||||
|
@ -852,13 +852,13 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
prefixes.add(prefix);
|
||||
}
|
||||
|
||||
LightAutomaton a2 = BasicOperations.unionLight(as);
|
||||
Automaton a2 = Operations.union(as);
|
||||
if (random().nextBoolean()) {
|
||||
a2 = BasicOperations.determinize(a2);
|
||||
a2 = Operations.determinize(a2);
|
||||
} else if (random().nextBoolean()) {
|
||||
a2 = MinimizationOperationsLight.minimize(a2);
|
||||
a2 = MinimizationOperations.minimize(a2);
|
||||
}
|
||||
a = BasicOperations.intersectionLight(a, a2);
|
||||
a = Operations.intersection(a, a2);
|
||||
|
||||
Iterator<BytesRef> it = terms.iterator();
|
||||
while (it.hasNext()) {
|
||||
|
@ -882,7 +882,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=reverse");
|
||||
}
|
||||
a = SpecialOperations.reverse(a);
|
||||
a = Operations.reverse(a);
|
||||
Set<BytesRef> newTerms = new HashSet<>();
|
||||
for(BytesRef term : terms) {
|
||||
newTerms.add(new BytesRef(new StringBuilder(term.utf8ToString()).reverse().toString()));
|
||||
|
@ -906,7 +906,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
|
||||
}
|
||||
a = BasicOperations.unionLight(a, BasicAutomata.makeIntervalLight(min, max, digits));
|
||||
a = Operations.union(a, Automata.makeInterval(min, max, digits));
|
||||
StringBuilder b = new StringBuilder();
|
||||
for(int i=0;i<digits;i++) {
|
||||
b.append('0');
|
||||
|
@ -926,7 +926,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=remove the empty string");
|
||||
}
|
||||
a = BasicOperations.minusLight(a, BasicAutomata.makeEmptyStringLight());
|
||||
a = Operations.minus(a, Automata.makeEmptyString());
|
||||
terms.remove(new BytesRef());
|
||||
break;
|
||||
|
||||
|
@ -934,7 +934,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=add the empty string");
|
||||
}
|
||||
a = BasicOperations.unionLight(a, BasicAutomata.makeEmptyStringLight());
|
||||
a = Operations.union(a, Automata.makeEmptyString());
|
||||
terms.add(new BytesRef());
|
||||
break;
|
||||
}
|
||||
|
@ -945,19 +945,19 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
assertSame(terms, a);
|
||||
}
|
||||
|
||||
private void assertSame(Collection<BytesRef> terms, LightAutomaton a) {
|
||||
private void assertSame(Collection<BytesRef> terms, Automaton a) {
|
||||
|
||||
try {
|
||||
assertTrue(SpecialOperations.isFinite(a));
|
||||
assertFalse(BasicOperations.isTotal(a));
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertFalse(Operations.isTotal(a));
|
||||
|
||||
LightAutomaton detA = BasicOperations.determinize(a);
|
||||
Automaton detA = Operations.determinize(a);
|
||||
|
||||
// Make sure all terms are accepted:
|
||||
IntsRef scratch = new IntsRef();
|
||||
for(BytesRef term : terms) {
|
||||
Util.toIntsRef(term, scratch);
|
||||
assertTrue("failed to accept term=" + term.utf8ToString(), BasicOperations.run(detA, term.utf8ToString()));
|
||||
assertTrue("failed to accept term=" + term.utf8ToString(), Operations.run(detA, term.utf8ToString()));
|
||||
}
|
||||
|
||||
// Use getFiniteStrings:
|
||||
|
@ -967,7 +967,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
Util.toUTF32(term.utf8ToString(), intsRef);
|
||||
expected.add(intsRef);
|
||||
}
|
||||
Set<IntsRef> actual = SpecialOperations.getFiniteStrings(a, -1);
|
||||
Set<IntsRef> actual = Operations.getFiniteStrings(a, -1);
|
||||
|
||||
if (expected.equals(actual) == false) {
|
||||
System.out.println("FAILED:");
|
||||
|
@ -985,11 +985,11 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// Use sameLanguage:
|
||||
LightAutomaton a2 = BasicOperations.removeDeadStates(BasicOperations.determinize(unionTerms(terms)));
|
||||
assertTrue(BasicOperations.sameLanguage(a2, BasicOperations.removeDeadStates(BasicOperations.determinize(a))));
|
||||
Automaton a2 = Operations.removeDeadStates(Operations.determinize(unionTerms(terms)));
|
||||
assertTrue(Operations.sameLanguage(a2, Operations.removeDeadStates(Operations.determinize(a))));
|
||||
|
||||
// Do same check, in UTF8 space
|
||||
LightAutomaton utf8 = randomNoOp(new UTF32ToUTF8Light().convert(a));
|
||||
Automaton utf8 = randomNoOp(new UTF32ToUTF8().convert(a));
|
||||
|
||||
Set<IntsRef> expected2 = new HashSet<>();
|
||||
for(BytesRef term : terms) {
|
||||
|
@ -997,7 +997,7 @@ public class TestLightAutomaton extends LuceneTestCase {
|
|||
Util.toIntsRef(term, intsRef);
|
||||
expected2.add(intsRef);
|
||||
}
|
||||
assertEquals(expected2, SpecialOperations.getFiniteStrings(utf8, -1));
|
||||
assertEquals(expected2, Operations.getFiniteStrings(utf8, -1));
|
||||
} catch (AssertionError ae) {
|
||||
System.out.println("TEST: FAILED: not same");
|
||||
System.out.println(" terms (count=" + terms.size() + "):");
|
|
@ -1,106 +0,0 @@
|
|||
package org.apache.lucene.util.automaton;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.util.*;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
public class TestBasicOperations extends LuceneTestCase {
|
||||
/** Test string union. */
|
||||
public void testStringUnion() {
|
||||
List<BytesRef> strings = new ArrayList<>();
|
||||
for (int i = RandomInts.randomIntBetween(random(), 0, 1000); --i >= 0;) {
|
||||
strings.add(new BytesRef(TestUtil.randomUnicodeString(random())));
|
||||
}
|
||||
|
||||
Collections.sort(strings);
|
||||
LightAutomaton union = BasicAutomata.makeStringUnionLight(strings);
|
||||
assertTrue(union.isDeterministic());
|
||||
assertTrue(BasicOperations.sameLanguage(union, naiveUnion(strings)));
|
||||
}
|
||||
|
||||
private static LightAutomaton naiveUnion(List<BytesRef> strings) {
|
||||
LightAutomaton[] eachIndividual = new LightAutomaton[strings.size()];
|
||||
int i = 0;
|
||||
for (BytesRef bref : strings) {
|
||||
eachIndividual[i++] = BasicAutomata.makeStringLight(bref.utf8ToString());
|
||||
}
|
||||
return BasicOperations.determinize(BasicOperations.unionLight(Arrays.asList(eachIndividual)));
|
||||
}
|
||||
|
||||
/** Test concatenation with empty language returns empty */
|
||||
public void testEmptyLanguageConcatenate() {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight("a");
|
||||
LightAutomaton concat = BasicOperations.concatenateLight(a, BasicAutomata.makeEmptyLight());
|
||||
assertTrue(BasicOperations.isEmpty(concat));
|
||||
}
|
||||
|
||||
/** Test optimization to concatenate() with empty String to an NFA */
|
||||
public void testEmptySingletonNFAConcatenate() {
|
||||
LightAutomaton singleton = BasicAutomata.makeStringLight("");
|
||||
LightAutomaton expandedSingleton = singleton;
|
||||
// an NFA (two transitions for 't' from initial state)
|
||||
LightAutomaton nfa = BasicOperations.unionLight(BasicAutomata.makeStringLight("this"),
|
||||
BasicAutomata.makeStringLight("three"));
|
||||
LightAutomaton concat1 = BasicOperations.concatenateLight(expandedSingleton, nfa);
|
||||
LightAutomaton concat2 = BasicOperations.concatenateLight(singleton, nfa);
|
||||
assertFalse(concat2.isDeterministic());
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(concat1),
|
||||
BasicOperations.determinize(concat2)));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(nfa),
|
||||
BasicOperations.determinize(concat1)));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicOperations.determinize(nfa),
|
||||
BasicOperations.determinize(concat2)));
|
||||
}
|
||||
|
||||
public void testGetRandomAcceptedString() throws Throwable {
|
||||
final int ITER1 = atLeast(100);
|
||||
final int ITER2 = atLeast(100);
|
||||
for(int i=0;i<ITER1;i++) {
|
||||
|
||||
final RegExp re = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
|
||||
//System.out.println("TEST i=" + i + " re=" + re);
|
||||
final LightAutomaton a = BasicOperations.determinize(re.toLightAutomaton());
|
||||
assertFalse(BasicOperations.isEmpty(a));
|
||||
|
||||
final AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
|
||||
for(int j=0;j<ITER2;j++) {
|
||||
//System.out.println("TEST: j=" + j);
|
||||
int[] acc = null;
|
||||
try {
|
||||
acc = rx.getRandomAcceptedString(random());
|
||||
final String s = UnicodeUtil.newString(acc, 0, acc.length);
|
||||
//a.writeDot("adot");
|
||||
assertTrue(BasicOperations.run(a, s));
|
||||
} catch (Throwable t) {
|
||||
System.out.println("regexp: " + re);
|
||||
if (acc != null) {
|
||||
System.out.println("fail acc re=" + re + " count=" + acc.length);
|
||||
for(int k=0;k<acc.length;k++) {
|
||||
System.out.println(" " + Integer.toHexString(acc[k]));
|
||||
}
|
||||
}
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -36,7 +36,7 @@ public class TestCompiledAutomaton extends LuceneTestCase {
|
|||
terms.add(new BytesRef(s));
|
||||
}
|
||||
Collections.sort(terms);
|
||||
final LightAutomaton a = DaciukMihovAutomatonBuilderLight.build(terms);
|
||||
final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
|
||||
return new CompiledAutomaton(a, true, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ public class TestDeterminism extends LuceneTestCase {
|
|||
public void testRegexps() throws Exception {
|
||||
int num = atLeast(500);
|
||||
for (int i = 0; i < num; i++) {
|
||||
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toLightAutomaton());
|
||||
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toAutomaton());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,42 +37,42 @@ public class TestDeterminism extends LuceneTestCase {
|
|||
public void testAgainstSimple() throws Exception {
|
||||
int num = atLeast(200);
|
||||
for (int i = 0; i < num; i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
a = AutomatonTestUtil.determinizeSimpleLight(a);
|
||||
LightAutomaton b = BasicOperations.determinize(a);
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
a = AutomatonTestUtil.determinizeSimple(a);
|
||||
Automaton b = Operations.determinize(a);
|
||||
// TODO: more verifications possible?
|
||||
assertTrue(BasicOperations.sameLanguage(a, b));
|
||||
assertTrue(Operations.sameLanguage(a, b));
|
||||
}
|
||||
}
|
||||
|
||||
private static void assertAutomaton(LightAutomaton a) {
|
||||
a = BasicOperations.determinize(BasicOperations.removeDeadStates(a));
|
||||
private static void assertAutomaton(Automaton a) {
|
||||
a = Operations.determinize(Operations.removeDeadStates(a));
|
||||
|
||||
// complement(complement(a)) = a
|
||||
LightAutomaton equivalent = BasicOperations.complementLight(BasicOperations.complementLight(a));
|
||||
assertTrue(BasicOperations.sameLanguage(a, equivalent));
|
||||
Automaton equivalent = Operations.complement(Operations.complement(a));
|
||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
||||
|
||||
// a union a = a
|
||||
equivalent = BasicOperations.determinize(BasicOperations.removeDeadStates(BasicOperations.unionLight(a, a)));
|
||||
assertTrue(BasicOperations.sameLanguage(a, equivalent));
|
||||
equivalent = Operations.determinize(Operations.removeDeadStates(Operations.union(a, a)));
|
||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
||||
|
||||
// a intersect a = a
|
||||
equivalent = BasicOperations.determinize(BasicOperations.removeDeadStates(BasicOperations.intersectionLight(a, a)));
|
||||
assertTrue(BasicOperations.sameLanguage(a, equivalent));
|
||||
equivalent = Operations.determinize(Operations.removeDeadStates(Operations.intersection(a, a)));
|
||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
||||
|
||||
// a minus a = empty
|
||||
LightAutomaton empty = BasicOperations.minusLight(a, a);
|
||||
assertTrue(BasicOperations.isEmpty(empty));
|
||||
Automaton empty = Operations.minus(a, a);
|
||||
assertTrue(Operations.isEmpty(empty));
|
||||
|
||||
// as long as don't accept the empty string
|
||||
// then optional(a) - empty = a
|
||||
if (!BasicOperations.run(a, "")) {
|
||||
if (!Operations.run(a, "")) {
|
||||
//System.out.println("test " + a);
|
||||
LightAutomaton optional = BasicOperations.optionalLight(a);
|
||||
Automaton optional = Operations.optional(a);
|
||||
//System.out.println("optional " + optional);
|
||||
equivalent = BasicOperations.minusLight(optional, BasicAutomata.makeEmptyStringLight());
|
||||
equivalent = Operations.minus(optional, Automata.makeEmptyString());
|
||||
//System.out.println("equiv " + equivalent);
|
||||
assertTrue(BasicOperations.sameLanguage(a, equivalent));
|
||||
assertTrue(Operations.sameLanguage(a, equivalent));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.util.TestUtil;
|
|||
* somewhat randomly, by determinizing a huge random lexicon.
|
||||
*/
|
||||
public class TestDeterminizeLexicon extends LuceneTestCase {
|
||||
private List<LightAutomaton> automata = new ArrayList<>();
|
||||
private List<Automaton> automata = new ArrayList<>();
|
||||
private List<String> terms = new ArrayList<>();
|
||||
|
||||
public void testLexicon() throws Exception {
|
||||
|
@ -41,7 +41,7 @@ public class TestDeterminizeLexicon extends LuceneTestCase {
|
|||
for (int j = 0; j < 5000; j++) {
|
||||
String randomString = TestUtil.randomUnicodeString(random());
|
||||
terms.add(randomString);
|
||||
automata.add(BasicAutomata.makeStringLight(randomString));
|
||||
automata.add(Automata.makeString(randomString));
|
||||
}
|
||||
assertLexicon();
|
||||
}
|
||||
|
@ -49,11 +49,11 @@ public class TestDeterminizeLexicon extends LuceneTestCase {
|
|||
|
||||
public void assertLexicon() throws Exception {
|
||||
Collections.shuffle(automata, random());
|
||||
LightAutomaton lex = BasicOperations.unionLight(automata);
|
||||
lex = BasicOperations.determinize(lex);
|
||||
assertTrue(SpecialOperations.isFinite(lex));
|
||||
Automaton lex = Operations.union(automata);
|
||||
lex = Operations.determinize(lex);
|
||||
assertTrue(Operations.isFinite(lex));
|
||||
for (String s : terms) {
|
||||
assertTrue(BasicOperations.run(lex, s));
|
||||
assertTrue(Operations.run(lex, s));
|
||||
}
|
||||
final ByteRunAutomaton lexByte = new ByteRunAutomaton(lex);
|
||||
for (String s : terms) {
|
||||
|
|
|
@ -41,7 +41,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
|
||||
// LUCENE-3094
|
||||
public void testNoWastedStates() throws Exception {
|
||||
assertFalse(BasicOperations.hasDeadStatesFromInitial(new LevenshteinAutomata("abc", false).toAutomaton(1)));
|
||||
assertFalse(Operations.hasDeadStatesFromInitial(new LevenshteinAutomata("abc", false).toAutomaton(1)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -66,8 +66,8 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
private void assertLev(String s, int maxDistance) {
|
||||
LevenshteinAutomata builder = new LevenshteinAutomata(s, false);
|
||||
LevenshteinAutomata tbuilder = new LevenshteinAutomata(s, true);
|
||||
LightAutomaton automata[] = new LightAutomaton[maxDistance + 1];
|
||||
LightAutomaton tautomata[] = new LightAutomaton[maxDistance + 1];
|
||||
Automaton automata[] = new Automaton[maxDistance + 1];
|
||||
Automaton tautomata[] = new Automaton[maxDistance + 1];
|
||||
for (int n = 0; n < automata.length; n++) {
|
||||
automata[n] = builder.toAutomaton(n);
|
||||
tautomata[n] = tbuilder.toAutomaton(n);
|
||||
|
@ -75,36 +75,36 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
assertNotNull(tautomata[n]);
|
||||
assertTrue(automata[n].isDeterministic());
|
||||
assertTrue(tautomata[n].isDeterministic());
|
||||
assertTrue(SpecialOperations.isFinite(automata[n]));
|
||||
assertTrue(SpecialOperations.isFinite(tautomata[n]));
|
||||
assertFalse(BasicOperations.hasDeadStatesFromInitial(automata[n]));
|
||||
assertFalse(BasicOperations.hasDeadStatesFromInitial(tautomata[n]));
|
||||
assertTrue(Operations.isFinite(automata[n]));
|
||||
assertTrue(Operations.isFinite(tautomata[n]));
|
||||
assertFalse(Operations.hasDeadStatesFromInitial(automata[n]));
|
||||
assertFalse(Operations.hasDeadStatesFromInitial(tautomata[n]));
|
||||
// check that the dfa for n-1 accepts a subset of the dfa for n
|
||||
if (n > 0) {
|
||||
assertTrue(BasicOperations.subsetOf(BasicOperations.removeDeadStates(automata[n-1]),
|
||||
BasicOperations.removeDeadStates(automata[n])));
|
||||
assertTrue(BasicOperations.subsetOf(BasicOperations.removeDeadStates(automata[n-1]),
|
||||
BasicOperations.removeDeadStates(tautomata[n])));
|
||||
assertTrue(BasicOperations.subsetOf(BasicOperations.removeDeadStates(tautomata[n-1]),
|
||||
BasicOperations.removeDeadStates(automata[n])));
|
||||
assertTrue(BasicOperations.subsetOf(BasicOperations.removeDeadStates(tautomata[n-1]),
|
||||
BasicOperations.removeDeadStates(tautomata[n])));
|
||||
assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n-1]),
|
||||
Operations.removeDeadStates(automata[n])));
|
||||
assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n-1]),
|
||||
Operations.removeDeadStates(tautomata[n])));
|
||||
assertTrue(Operations.subsetOf(Operations.removeDeadStates(tautomata[n-1]),
|
||||
Operations.removeDeadStates(automata[n])));
|
||||
assertTrue(Operations.subsetOf(Operations.removeDeadStates(tautomata[n-1]),
|
||||
Operations.removeDeadStates(tautomata[n])));
|
||||
assertNotSame(automata[n-1], automata[n]);
|
||||
}
|
||||
// check that Lev(N) is a subset of LevT(N)
|
||||
assertTrue(BasicOperations.subsetOf(BasicOperations.removeDeadStates(automata[n]),
|
||||
BasicOperations.removeDeadStates(tautomata[n])));
|
||||
assertTrue(Operations.subsetOf(Operations.removeDeadStates(automata[n]),
|
||||
Operations.removeDeadStates(tautomata[n])));
|
||||
// special checks for specific n
|
||||
switch(n) {
|
||||
case 0:
|
||||
// easy, matches the string itself
|
||||
assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeStringLight(s), BasicOperations.removeDeadStates(automata[0])));
|
||||
assertTrue(BasicOperations.sameLanguage(BasicAutomata.makeStringLight(s), BasicOperations.removeDeadStates(tautomata[0])));
|
||||
assertTrue(Operations.sameLanguage(Automata.makeString(s), Operations.removeDeadStates(automata[0])));
|
||||
assertTrue(Operations.sameLanguage(Automata.makeString(s), Operations.removeDeadStates(tautomata[0])));
|
||||
break;
|
||||
case 1:
|
||||
// generate a lev1 naively, and check the accepted lang is the same.
|
||||
assertTrue(BasicOperations.sameLanguage(naiveLev1(s), BasicOperations.removeDeadStates(automata[1])));
|
||||
assertTrue(BasicOperations.sameLanguage(naiveLev1T(s), BasicOperations.removeDeadStates(tautomata[1])));
|
||||
assertTrue(Operations.sameLanguage(naiveLev1(s), Operations.removeDeadStates(automata[1])));
|
||||
assertTrue(Operations.sameLanguage(naiveLev1T(s), Operations.removeDeadStates(tautomata[1])));
|
||||
break;
|
||||
default:
|
||||
assertBruteForce(s, automata[n], n);
|
||||
|
@ -118,14 +118,14 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all 1-character insertions, deletions, and
|
||||
* substitutions of s.
|
||||
*/
|
||||
private LightAutomaton naiveLev1(String s) {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight(s);
|
||||
a = BasicOperations.unionLight(a, insertionsOf(s));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = BasicOperations.unionLight(a, deletionsOf(s));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = BasicOperations.unionLight(a, substitutionsOf(s));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
private Automaton naiveLev1(String s) {
|
||||
Automaton a = Automata.makeString(s);
|
||||
a = Operations.union(a, insertionsOf(s));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
a = Operations.union(a, deletionsOf(s));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
a = Operations.union(a, substitutionsOf(s));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
|
||||
return a;
|
||||
}
|
||||
|
@ -134,10 +134,10 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all 1-character insertions, deletions,
|
||||
* substitutions, and transpositions of s.
|
||||
*/
|
||||
private LightAutomaton naiveLev1T(String s) {
|
||||
LightAutomaton a = naiveLev1(s);
|
||||
a = BasicOperations.unionLight(a, transpositionsOf(s));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
private Automaton naiveLev1T(String s) {
|
||||
Automaton a = naiveLev1(s);
|
||||
a = Operations.union(a, transpositionsOf(s));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -145,18 +145,18 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all 1-character insertions of s (inserting
|
||||
* one character)
|
||||
*/
|
||||
private LightAutomaton insertionsOf(String s) {
|
||||
List<LightAutomaton> list = new ArrayList<>();
|
||||
private Automaton insertionsOf(String s) {
|
||||
List<Automaton> list = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i <= s.length(); i++) {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeAnyCharLight());
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i)));
|
||||
Automaton a = Automata.makeString(s.substring(0, i));
|
||||
a = Operations.concatenate(a, Automata.makeAnyChar());
|
||||
a = Operations.concatenate(a, Automata.makeString(s.substring(i)));
|
||||
list.add(a);
|
||||
}
|
||||
|
||||
LightAutomaton a = BasicOperations.unionLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Operations.union(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -164,17 +164,17 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all 1-character deletions of s (deleting
|
||||
* one character).
|
||||
*/
|
||||
private LightAutomaton deletionsOf(String s) {
|
||||
List<LightAutomaton> list = new ArrayList<>();
|
||||
private Automaton deletionsOf(String s) {
|
||||
List<Automaton> list = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i + 1)));
|
||||
Automaton a = Automata.makeString(s.substring(0, i));
|
||||
a = Operations.concatenate(a, Automata.makeString(s.substring(i + 1)));
|
||||
list.add(a);
|
||||
}
|
||||
|
||||
LightAutomaton a = BasicOperations.unionLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Operations.union(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -182,18 +182,18 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all 1-character substitutions of s
|
||||
* (replacing one character)
|
||||
*/
|
||||
private LightAutomaton substitutionsOf(String s) {
|
||||
List<LightAutomaton> list = new ArrayList<>();
|
||||
private Automaton substitutionsOf(String s) {
|
||||
List<Automaton> list = new ArrayList<>();
|
||||
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
LightAutomaton a = BasicAutomata.makeStringLight(s.substring(0, i));
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeAnyCharLight());
|
||||
a = BasicOperations.concatenateLight(a, BasicAutomata.makeStringLight(s.substring(i + 1)));
|
||||
Automaton a = Automata.makeString(s.substring(0, i));
|
||||
a = Operations.concatenate(a, Automata.makeAnyChar());
|
||||
a = Operations.concatenate(a, Automata.makeString(s.substring(i + 1)));
|
||||
list.add(a);
|
||||
}
|
||||
|
||||
LightAutomaton a = BasicOperations.unionLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Operations.union(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -201,11 +201,11 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
* Return an automaton that accepts all transpositions of s
|
||||
* (transposing two adjacent characters)
|
||||
*/
|
||||
private LightAutomaton transpositionsOf(String s) {
|
||||
private Automaton transpositionsOf(String s) {
|
||||
if (s.length() < 2) {
|
||||
return BasicAutomata.makeEmptyLight();
|
||||
return Automata.makeEmpty();
|
||||
}
|
||||
List<LightAutomaton> list = new ArrayList<>();
|
||||
List<Automaton> list = new ArrayList<>();
|
||||
for (int i = 0; i < s.length()-1; i++) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(s.substring(0, i));
|
||||
|
@ -214,15 +214,15 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
sb.append(s.substring(i+2, s.length()));
|
||||
String st = sb.toString();
|
||||
if (!st.equals(s)) {
|
||||
list.add(BasicAutomata.makeStringLight(st));
|
||||
list.add(Automata.makeString(st));
|
||||
}
|
||||
}
|
||||
LightAutomaton a = BasicOperations.unionLight(list);
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Operations.union(list);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
return a;
|
||||
}
|
||||
|
||||
private void assertBruteForce(String input, LightAutomaton dfa, int distance) {
|
||||
private void assertBruteForce(String input, Automaton dfa, int distance) {
|
||||
CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
|
||||
int maxLen = input.length() + distance + 1;
|
||||
int maxNum = (int) Math.pow(2, maxLen);
|
||||
|
@ -237,7 +237,7 @@ public class TestLevenshteinAutomata extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertBruteForceT(String input, LightAutomaton dfa, int distance) {
|
||||
private void assertBruteForceT(String input, Automaton dfa, int distance) {
|
||||
CharacterRunAutomaton ra = new CharacterRunAutomaton(dfa);
|
||||
int maxLen = input.length() + distance + 1;
|
||||
int maxNum = (int) Math.pow(2, maxLen);
|
||||
|
|
|
@ -27,10 +27,10 @@ public class TestMinimize extends LuceneTestCase {
|
|||
public void testBasic() {
|
||||
int num = atLeast(200);
|
||||
for (int i = 0; i < num; i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
LightAutomaton la = BasicOperations.determinize(BasicOperations.removeDeadStates(a));
|
||||
LightAutomaton lb = MinimizationOperationsLight.minimize(a);
|
||||
assertTrue(BasicOperations.sameLanguage(la, lb));
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton la = Operations.determinize(Operations.removeDeadStates(a));
|
||||
Automaton lb = MinimizationOperations.minimize(a);
|
||||
assertTrue(Operations.sameLanguage(la, lb));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,10 +40,10 @@ public class TestMinimize extends LuceneTestCase {
|
|||
public void testAgainstBrzozowski() {
|
||||
int num = atLeast(200);
|
||||
for (int i = 0; i < num; i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
a = AutomatonTestUtil.minimizeSimple(a);
|
||||
LightAutomaton b = MinimizationOperationsLight.minimize(a);
|
||||
assertTrue(BasicOperations.sameLanguage(a, b));
|
||||
Automaton b = MinimizationOperations.minimize(a);
|
||||
assertTrue(Operations.sameLanguage(a, b));
|
||||
assertEquals(a.getNumStates(), b.getNumStates());
|
||||
int numStates = a.getNumStates();
|
||||
|
||||
|
@ -62,6 +62,6 @@ public class TestMinimize extends LuceneTestCase {
|
|||
|
||||
/** n^2 space usage in Hopcroft minimization? */
|
||||
public void testMinimizeHuge() {
|
||||
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toLightAutomaton();
|
||||
new RegExp("+-*(A|.....|BC)*]", RegExp.NONE).toAutomaton();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,36 +17,109 @@ package org.apache.lucene.util.automaton;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomInts;
|
||||
|
||||
public class TestSpecialOperations extends LuceneTestCase {
|
||||
public class TestOperations extends LuceneTestCase {
|
||||
/** Test string union. */
|
||||
public void testStringUnion() {
|
||||
List<BytesRef> strings = new ArrayList<>();
|
||||
for (int i = RandomInts.randomIntBetween(random(), 0, 1000); --i >= 0;) {
|
||||
strings.add(new BytesRef(TestUtil.randomUnicodeString(random())));
|
||||
}
|
||||
|
||||
Collections.sort(strings);
|
||||
Automaton union = Automata.makeStringUnion(strings);
|
||||
assertTrue(union.isDeterministic());
|
||||
assertTrue(Operations.sameLanguage(union, naiveUnion(strings)));
|
||||
}
|
||||
|
||||
private static Automaton naiveUnion(List<BytesRef> strings) {
|
||||
Automaton[] eachIndividual = new Automaton[strings.size()];
|
||||
int i = 0;
|
||||
for (BytesRef bref : strings) {
|
||||
eachIndividual[i++] = Automata.makeString(bref.utf8ToString());
|
||||
}
|
||||
return Operations.determinize(Operations.union(Arrays.asList(eachIndividual)));
|
||||
}
|
||||
|
||||
/** Test concatenation with empty language returns empty */
|
||||
public void testEmptyLanguageConcatenate() {
|
||||
Automaton a = Automata.makeString("a");
|
||||
Automaton concat = Operations.concatenate(a, Automata.makeEmpty());
|
||||
assertTrue(Operations.isEmpty(concat));
|
||||
}
|
||||
|
||||
/** Test optimization to concatenate() with empty String to an NFA */
|
||||
public void testEmptySingletonNFAConcatenate() {
|
||||
Automaton singleton = Automata.makeString("");
|
||||
Automaton expandedSingleton = singleton;
|
||||
// an NFA (two transitions for 't' from initial state)
|
||||
Automaton nfa = Operations.union(Automata.makeString("this"),
|
||||
Automata.makeString("three"));
|
||||
Automaton concat1 = Operations.concatenate(expandedSingleton, nfa);
|
||||
Automaton concat2 = Operations.concatenate(singleton, nfa);
|
||||
assertFalse(concat2.isDeterministic());
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(concat1),
|
||||
Operations.determinize(concat2)));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
|
||||
Operations.determinize(concat1)));
|
||||
assertTrue(Operations.sameLanguage(Operations.determinize(nfa),
|
||||
Operations.determinize(concat2)));
|
||||
}
|
||||
|
||||
public void testGetRandomAcceptedString() throws Throwable {
|
||||
final int ITER1 = atLeast(100);
|
||||
final int ITER2 = atLeast(100);
|
||||
for(int i=0;i<ITER1;i++) {
|
||||
|
||||
final RegExp re = new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE);
|
||||
//System.out.println("TEST i=" + i + " re=" + re);
|
||||
final Automaton a = Operations.determinize(re.toAutomaton());
|
||||
assertFalse(Operations.isEmpty(a));
|
||||
|
||||
final AutomatonTestUtil.RandomAcceptedStrings rx = new AutomatonTestUtil.RandomAcceptedStrings(a);
|
||||
for(int j=0;j<ITER2;j++) {
|
||||
//System.out.println("TEST: j=" + j);
|
||||
int[] acc = null;
|
||||
try {
|
||||
acc = rx.getRandomAcceptedString(random());
|
||||
final String s = UnicodeUtil.newString(acc, 0, acc.length);
|
||||
//a.writeDot("adot");
|
||||
assertTrue(Operations.run(a, s));
|
||||
} catch (Throwable t) {
|
||||
System.out.println("regexp: " + re);
|
||||
if (acc != null) {
|
||||
System.out.println("fail acc re=" + re + " count=" + acc.length);
|
||||
for(int k=0;k<acc.length;k++) {
|
||||
System.out.println(" " + Integer.toHexString(acc[k]));
|
||||
}
|
||||
}
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* tests against the original brics implementation.
|
||||
*/
|
||||
public void testIsFinite() {
|
||||
int num = atLeast(200);
|
||||
for (int i = 0; i < num; i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
assertEquals(AutomatonTestUtil.isFiniteSlow(a), SpecialOperations.isFinite(a));
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
assertEquals(AutomatonTestUtil.isFiniteSlow(a), Operations.isFinite(a));
|
||||
}
|
||||
}
|
||||
|
||||
/** Pass false for testRecursive if the expected strings
|
||||
* may be too long */
|
||||
private Set<IntsRef> getFiniteStrings(LightAutomaton a, int limit, boolean testRecursive) {
|
||||
Set<IntsRef> result = SpecialOperations.getFiniteStrings(a, limit);
|
||||
private Set<IntsRef> getFiniteStrings(Automaton a, int limit, boolean testRecursive) {
|
||||
Set<IntsRef> result = Operations.getFiniteStrings(a, limit);
|
||||
if (testRecursive) {
|
||||
assertEquals(AutomatonTestUtil.getFiniteStringsRecursiveLight(a, limit), result);
|
||||
assertEquals(AutomatonTestUtil.getFiniteStringsRecursive(a, limit), result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -55,8 +128,8 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
* Basic test for getFiniteStrings
|
||||
*/
|
||||
public void testFiniteStringsBasic() {
|
||||
LightAutomaton a = BasicOperations.unionLight(BasicAutomata.makeStringLight("dog"), BasicAutomata.makeStringLight("duck"));
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
Automaton a = Operations.union(Automata.makeString("dog"), Automata.makeString("duck"));
|
||||
a = MinimizationOperations.minimize(a);
|
||||
Set<IntsRef> strings = getFiniteStrings(a, -1, true);
|
||||
assertEquals(2, strings.size());
|
||||
IntsRef dog = new IntsRef();
|
||||
|
@ -73,7 +146,7 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
String bigString1 = new String(chars);
|
||||
TestUtil.randomFixedLengthUnicodeString(random(), chars, 0, chars.length);
|
||||
String bigString2 = new String(chars);
|
||||
LightAutomaton a = BasicOperations.unionLight(BasicAutomata.makeStringLight(bigString1), BasicAutomata.makeStringLight(bigString2));
|
||||
Automaton a = Operations.union(Automata.makeString(bigString1), Automata.makeString(bigString2));
|
||||
Set<IntsRef> strings = getFiniteStrings(a, -1, false);
|
||||
assertEquals(2, strings.size());
|
||||
IntsRef scratch = new IntsRef();
|
||||
|
@ -91,10 +164,10 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
}
|
||||
|
||||
Set<IntsRef> strings = new HashSet<IntsRef>();
|
||||
List<LightAutomaton> automata = new ArrayList<>();
|
||||
List<Automaton> automata = new ArrayList<>();
|
||||
for(int i=0;i<numStrings;i++) {
|
||||
String s = TestUtil.randomSimpleString(random(), 1, 200);
|
||||
automata.add(BasicAutomata.makeStringLight(s));
|
||||
automata.add(Automata.makeString(s));
|
||||
IntsRef scratch = new IntsRef();
|
||||
Util.toUTF32(s.toCharArray(), 0, s.length(), scratch);
|
||||
strings.add(scratch);
|
||||
|
@ -107,9 +180,9 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
// DaciukMihovAutomatonBuilder here
|
||||
|
||||
// TODO: what other random things can we do here...
|
||||
LightAutomaton a = BasicOperations.unionLight(automata);
|
||||
Automaton a = Operations.union(automata);
|
||||
if (random().nextBoolean()) {
|
||||
a = MinimizationOperationsLight.minimize(a);
|
||||
a = MinimizationOperations.minimize(a);
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: a.minimize numStates=" + a.getNumStates());
|
||||
}
|
||||
|
@ -117,12 +190,12 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println("TEST: a.determinize");
|
||||
}
|
||||
a = BasicOperations.determinize(a);
|
||||
a = Operations.determinize(a);
|
||||
} else if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: a.removeDeadStates");
|
||||
}
|
||||
a = BasicOperations.removeDeadStates(a);
|
||||
a = Operations.removeDeadStates(a);
|
||||
}
|
||||
|
||||
Set<IntsRef> actual = getFiniteStrings(a, -1, true);
|
||||
|
@ -152,7 +225,7 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
|
||||
public void testWithCycle() throws Exception {
|
||||
try {
|
||||
SpecialOperations.getFiniteStrings(new RegExp("abc.*", RegExp.NONE).toLightAutomaton(), -1);
|
||||
Operations.getFiniteStrings(new RegExp("abc.*", RegExp.NONE).toAutomaton(), -1);
|
||||
fail("did not hit exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
|
@ -164,24 +237,24 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
// automaton:
|
||||
int iters = atLeast(100);
|
||||
for(int i=0;i<iters;i++) {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
try {
|
||||
// Must pass a limit because the random automaton
|
||||
// can accept MANY strings:
|
||||
SpecialOperations.getFiniteStrings(a, TestUtil.nextInt(random(), 1, 1000));
|
||||
Operations.getFiniteStrings(a, TestUtil.nextInt(random(), 1, 1000));
|
||||
// NOTE: cannot do this, because the method is not
|
||||
// guaranteed to detect cycles when you have a limit
|
||||
//assertTrue(SpecialOperations.isFinite(a));
|
||||
//assertTrue(Operations.isFinite(a));
|
||||
} catch (IllegalArgumentException iae) {
|
||||
assertFalse(SpecialOperations.isFinite(a));
|
||||
assertFalse(Operations.isFinite(a));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testInvalidLimit() {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
try {
|
||||
SpecialOperations.getFiniteStrings(a, -7);
|
||||
Operations.getFiniteStrings(a, -7);
|
||||
fail("did not hit exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
|
@ -189,9 +262,9 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testInvalidLimit2() {
|
||||
LightAutomaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
Automaton a = AutomatonTestUtil.randomAutomaton(random());
|
||||
try {
|
||||
SpecialOperations.getFiniteStrings(a, 0);
|
||||
Operations.getFiniteStrings(a, 0);
|
||||
fail("did not hit exception");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
|
@ -199,7 +272,7 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSingletonNoLimit() {
|
||||
Set<IntsRef> result = SpecialOperations.getFiniteStrings(BasicAutomata.makeStringLight("foobar"), -1);
|
||||
Set<IntsRef> result = Operations.getFiniteStrings(Automata.makeString("foobar"), -1);
|
||||
assertEquals(1, result.size());
|
||||
IntsRef scratch = new IntsRef();
|
||||
Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
|
||||
|
@ -207,7 +280,7 @@ public class TestSpecialOperations extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testSingletonLimit1() {
|
||||
Set<IntsRef> result = SpecialOperations.getFiniteStrings(BasicAutomata.makeStringLight("foobar"), 1);
|
||||
Set<IntsRef> result = Operations.getFiniteStrings(Automata.makeString("foobar"), 1);
|
||||
assertEquals(1, result.size());
|
||||
IntsRef scratch = new IntsRef();
|
||||
Util.toUTF32("foobar".toCharArray(), 0, 6, scratch);
|
|
@ -155,14 +155,14 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
continue;
|
||||
}
|
||||
|
||||
LightAutomaton a = BasicAutomata.makeCharRangeLight(startCode, endCode);
|
||||
Automaton a = Automata.makeCharRange(startCode, endCode);
|
||||
testOne(r, new ByteRunAutomaton(a), startCode, endCode, ITERS_PER_DFA);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSpecialCase() {
|
||||
RegExp re = new RegExp(".?");
|
||||
LightAutomaton automaton = re.toLightAutomaton();
|
||||
Automaton automaton = re.toAutomaton();
|
||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
// make sure character dfa accepts empty string
|
||||
|
@ -178,7 +178,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
public void testSpecialCase2() throws Exception {
|
||||
RegExp re = new RegExp(".+\u0775");
|
||||
String input = "\ufadc\ufffd\ub80b\uda5a\udc68\uf234\u0056\uda5b\udcc1\ufffd\ufffd\u0775";
|
||||
LightAutomaton automaton = re.toLightAutomaton();
|
||||
Automaton automaton = re.toAutomaton();
|
||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
|
||||
|
@ -191,7 +191,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
public void testSpecialCase3() throws Exception {
|
||||
RegExp re = new RegExp("(\\鯺)*(.)*\\Ӕ");
|
||||
String input = "\u5cfd\ufffd\ub2f7\u0033\ue304\u51d7\u3692\udb50\udfb3\u0576\udae2\udc62\u0053\u0449\u04d4";
|
||||
LightAutomaton automaton = re.toLightAutomaton();
|
||||
Automaton automaton = re.toAutomaton();
|
||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
|
||||
|
@ -204,7 +204,7 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
public void testRandomRegexes() throws Exception {
|
||||
int num = atLeast(250);
|
||||
for (int i = 0; i < num; i++) {
|
||||
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toLightAutomaton());
|
||||
assertAutomaton(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toAutomaton());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -212,17 +212,17 @@ public class TestUTF32ToUTF8 extends LuceneTestCase {
|
|||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
String s = TestUtil.randomRealisticUnicodeString(random());
|
||||
LightAutomaton a = BasicAutomata.makeStringLight(s);
|
||||
LightAutomaton utf8 = new UTF32ToUTF8Light().convert(a);
|
||||
Automaton a = Automata.makeString(s);
|
||||
Automaton utf8 = new UTF32ToUTF8().convert(a);
|
||||
IntsRef ints = new IntsRef();
|
||||
Util.toIntsRef(new BytesRef(s), ints);
|
||||
Set<IntsRef> set = new HashSet<>();
|
||||
set.add(ints);
|
||||
assertEquals(set, SpecialOperations.getFiniteStrings(utf8, -1));
|
||||
assertEquals(set, Operations.getFiniteStrings(utf8, -1));
|
||||
}
|
||||
}
|
||||
|
||||
private void assertAutomaton(LightAutomaton automaton) throws Exception {
|
||||
private void assertAutomaton(Automaton automaton) throws Exception {
|
||||
CharacterRunAutomaton cra = new CharacterRunAutomaton(automaton);
|
||||
ByteRunAutomaton bra = new ByteRunAutomaton(automaton);
|
||||
final AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(automaton);
|
||||
|
|
|
@ -69,7 +69,7 @@ import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum.InputOutput;
|
||||
import org.apache.lucene.util.fst.FST.Arc;
|
||||
|
@ -346,7 +346,7 @@ public class TestFSTs extends LuceneTestCase {
|
|||
BytesRef term;
|
||||
int ord = 0;
|
||||
|
||||
LightAutomaton automaton = new RegExp(".*", RegExp.NONE).toLightAutomaton();
|
||||
Automaton automaton = new RegExp(".*", RegExp.NONE).toAutomaton();
|
||||
final TermsEnum termsEnum2 = terms.intersect(new CompiledAutomaton(automaton, false, false), null);
|
||||
|
||||
while((term = termsEnum.next()) != null) {
|
||||
|
|
|
@ -46,11 +46,11 @@ import org.apache.lucene.search.spans.SpanPositionCheckQuery;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/**
|
||||
* Support for highlighting multiterm queries in PostingsHighlighter.
|
||||
|
@ -95,7 +95,7 @@ class MultiTermHighlighting {
|
|||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
list.add(new CharacterRunAutomaton(aq.getLightAutomaton()) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return aq.toString();
|
||||
|
@ -106,8 +106,8 @@ class MultiTermHighlighting {
|
|||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
if (prefix.field().equals(field)) {
|
||||
list.add(new CharacterRunAutomaton(BasicOperations.concatenateLight(BasicAutomata.makeStringLight(prefix.text()),
|
||||
BasicAutomata.makeAnyStringLight())) {
|
||||
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
|
||||
Automata.makeAnyString())) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return pq.toString();
|
||||
|
@ -127,7 +127,7 @@ class MultiTermHighlighting {
|
|||
String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength);
|
||||
LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions());
|
||||
String prefix = UnicodeUtil.newString(termText, 0, prefixLength);
|
||||
LightAutomaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
|
||||
Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
|
||||
list.add(new CharacterRunAutomaton(automaton) {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -158,7 +158,7 @@ class MultiTermHighlighting {
|
|||
final Comparator<CharsRef> comparator = CharsRef.getUTF16SortedAsUTF8Comparator();
|
||||
|
||||
// this is *not* an automaton, but its very simple
|
||||
list.add(new CharacterRunAutomaton(BasicAutomata.makeEmptyLight()) {
|
||||
list.add(new CharacterRunAutomaton(Automata.makeEmpty()) {
|
||||
@Override
|
||||
public boolean run(char[] s, int offset, int length) {
|
||||
scratch.chars = s;
|
||||
|
|
|
@ -55,7 +55,7 @@ import org.apache.lucene.search.spans.*;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.w3c.dom.Element;
|
||||
|
@ -1340,7 +1340,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
@Override
|
||||
public void run() throws Exception {
|
||||
String goodWord = "goodtoken";
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeStringLight("stoppedtoken"));
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("stoppedtoken"));
|
||||
// we disable MockTokenizer checks because we will forcefully limit the
|
||||
// tokenstream and call end() before incrementToken() returns false.
|
||||
final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
|
||||
|
@ -1386,7 +1386,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
TestHighlightRunner helper = new TestHighlightRunner() {
|
||||
@Override
|
||||
public void run() throws Exception {
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toLightAutomaton());
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
|
||||
TermQuery query = new TermQuery(new Term("text", "searchterm"));
|
||||
|
||||
String text = "this is a text with searchterm in it";
|
||||
|
|
|
@ -602,8 +602,8 @@ public class FastVectorHighlighterTest extends LuceneTestCase {
|
|||
fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) );
|
||||
fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) );
|
||||
fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) );
|
||||
fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toLightAutomaton() ), true ) );
|
||||
fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toLightAutomaton() ), true ) );
|
||||
fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) );
|
||||
fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
|
||||
fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
|
||||
fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) ); // This is required even though we provide a token stream
|
||||
Analyzer analyzer = new AnalyzerWrapper() {
|
||||
|
|
|
@ -629,7 +629,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
}
|
||||
DocsEnum parents = MultiFields.getTermDocsEnum(joinR, null, "isParent", new BytesRef("x"));
|
||||
System.out.println("parent docIDs:");
|
||||
while (parents.nextDoc() != parents.NO_MORE_DOCS) {
|
||||
while (parents.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
System.out.println(" " + parents.docID());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -557,7 +557,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBoost() throws Exception {
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeStringLight("on"));
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
|
||||
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
|
||||
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser();
|
||||
|
|
|
@ -67,7 +67,7 @@ import org.apache.lucene.search.TermRangeQuery;
|
|||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -957,7 +957,7 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testBoost() throws Exception {
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeStringLight("on"));
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
|
||||
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(oneStopAnalyzer);
|
||||
|
@ -1190,7 +1190,7 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
|
||||
public void testStopwords() throws Exception {
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toLightAutomaton());
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
|
||||
qp.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
|
||||
|
||||
Query result = qp.parse("a:the OR a:foo", "a");
|
||||
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -868,7 +868,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
|
||||
public void testBoost()
|
||||
throws Exception {
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeStringLight("on"));
|
||||
CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
|
||||
Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
|
||||
CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
|
||||
Query q = getQuery("on^1.0",qp);
|
||||
|
@ -1023,7 +1023,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toLightAutomaton());
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
|
||||
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
|
||||
Query result = getQuery("field:the OR field:foo",qp);
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
|
@ -1251,7 +1251,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
|
||||
public void testPhraseQueryPositionIncrements() throws Exception {
|
||||
CharacterRunAutomaton stopStopList =
|
||||
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
|
||||
|
||||
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
|
||||
|
||||
|
|
|
@ -43,9 +43,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.OfflineSorter;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
|
@ -255,7 +254,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
return fst == null ? 0 : fst.ramBytesUsed();
|
||||
}
|
||||
|
||||
private int[] topoSortStates(LightAutomaton a) {
|
||||
private int[] topoSortStates(Automaton a) {
|
||||
int[] states = new int[a.getNumStates()];
|
||||
final Set<Integer> visited = new HashSet<>();
|
||||
final LinkedList<Integer> worklist = new LinkedList<>();
|
||||
|
@ -283,9 +282,9 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
// Replaces SEP with epsilon or remaps them if
|
||||
// we were asked to preserve them:
|
||||
private LightAutomaton replaceSep(LightAutomaton a) {
|
||||
private Automaton replaceSep(Automaton a) {
|
||||
|
||||
LightAutomaton result = new LightAutomaton();
|
||||
Automaton result = new Automaton();
|
||||
|
||||
// Copy all states over
|
||||
int numStates = a.getNumStates();
|
||||
|
@ -335,7 +334,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
/** Used by subclass to change the lookup automaton, if
|
||||
* necessary. */
|
||||
protected LightAutomaton convertAutomaton(LightAutomaton a) {
|
||||
protected Automaton convertAutomaton(Automaton a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
|
@ -694,7 +693,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
}
|
||||
final BytesRef utf8Key = new BytesRef(key);
|
||||
try {
|
||||
LightAutomaton lookupAutomaton = toLookupAutomaton(key);
|
||||
Automaton lookupAutomaton = toLookupAutomaton(key);
|
||||
|
||||
final CharsRef spare = new CharsRef();
|
||||
|
||||
|
@ -846,7 +845,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
/** Returns all prefix paths to initialize the search. */
|
||||
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
|
||||
LightAutomaton lookupAutomaton,
|
||||
Automaton lookupAutomaton,
|
||||
FST<Pair<Long,BytesRef>> fst)
|
||||
throws IOException {
|
||||
return prefixPaths;
|
||||
|
@ -854,7 +853,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
|
||||
// Analyze surface form:
|
||||
LightAutomaton automaton = null;
|
||||
Automaton automaton = null;
|
||||
try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
|
||||
|
||||
// Create corresponding automaton: labels are bytes
|
||||
|
@ -877,13 +876,13 @@ public class AnalyzingSuggester extends Lookup {
|
|||
// don't have to alloc [possibly biggish]
|
||||
// intermediate HashSet in RAM:
|
||||
|
||||
return SpecialOperations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
return Operations.getFiniteStrings(automaton, maxGraphExpansions);
|
||||
}
|
||||
|
||||
final LightAutomaton toLookupAutomaton(final CharSequence key) throws IOException {
|
||||
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
|
||||
// TODO: is there a Reader from a CharSequence?
|
||||
// Turn tokenstream into automaton:
|
||||
LightAutomaton automaton = null;
|
||||
Automaton automaton = null;
|
||||
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
|
||||
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
|
||||
}
|
||||
|
@ -892,7 +891,7 @@ public class AnalyzingSuggester extends Lookup {
|
|||
|
||||
// TODO: we can optimize this somewhat by determinizing
|
||||
// while we convert
|
||||
automaton = BasicOperations.determinize(automaton);
|
||||
automaton = Operations.determinize(automaton);
|
||||
return automaton;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
@ -66,7 +66,7 @@ public class FSTUtil {
|
|||
* Enumerates all minimal prefix paths in the automaton that also intersect the FST,
|
||||
* accumulating the FST end node and output for each path.
|
||||
*/
|
||||
public static <T> List<Path<T>> intersectPrefixPaths(LightAutomaton a, FST<T> fst)
|
||||
public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst)
|
||||
throws IOException {
|
||||
assert a.isDeterministic();
|
||||
final List<Path<T>> queue = new ArrayList<>();
|
||||
|
|
|
@ -29,12 +29,11 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; // ja
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.UTF32ToUTF8Light;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.UTF32ToUTF8;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
|
||||
|
@ -178,7 +177,7 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
|
||||
@Override
|
||||
protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths,
|
||||
LightAutomaton lookupAutomaton,
|
||||
Automaton lookupAutomaton,
|
||||
FST<Pair<Long,BytesRef>> fst)
|
||||
throws IOException {
|
||||
|
||||
|
@ -192,7 +191,7 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
// "compete") ... in which case I think the wFST needs
|
||||
// to be log weights or something ...
|
||||
|
||||
LightAutomaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
|
||||
Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton));
|
||||
/*
|
||||
Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8);
|
||||
w.write(levA.toDot());
|
||||
|
@ -203,10 +202,10 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected LightAutomaton convertAutomaton(LightAutomaton a) {
|
||||
protected Automaton convertAutomaton(Automaton a) {
|
||||
if (unicodeAware) {
|
||||
LightAutomaton utf8automaton = new UTF32ToUTF8Light().convert(a);
|
||||
utf8automaton = BasicOperations.determinize(utf8automaton);
|
||||
Automaton utf8automaton = new UTF32ToUTF8().convert(a);
|
||||
utf8automaton = Operations.determinize(utf8automaton);
|
||||
return utf8automaton;
|
||||
} else {
|
||||
return a;
|
||||
|
@ -220,13 +219,13 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
return tsta;
|
||||
}
|
||||
|
||||
LightAutomaton toLevenshteinAutomata(LightAutomaton automaton) {
|
||||
final Set<IntsRef> ref = SpecialOperations.getFiniteStrings(automaton, -1);
|
||||
LightAutomaton subs[] = new LightAutomaton[ref.size()];
|
||||
Automaton toLevenshteinAutomata(Automaton automaton) {
|
||||
final Set<IntsRef> ref = Operations.getFiniteStrings(automaton, -1);
|
||||
Automaton subs[] = new Automaton[ref.size()];
|
||||
int upto = 0;
|
||||
for (IntsRef path : ref) {
|
||||
if (path.length <= nonFuzzyPrefix || path.length < minFuzzyLength) {
|
||||
subs[upto] = BasicAutomata.makeStringLight(path.ints, path.offset, path.length);
|
||||
subs[upto] = Automata.makeString(path.ints, path.offset, path.length);
|
||||
upto++;
|
||||
} else {
|
||||
int ints[] = new int[path.length-nonFuzzyPrefix];
|
||||
|
@ -244,17 +243,17 @@ public final class FuzzySuggester extends AnalyzingSuggester {
|
|||
|
||||
if (subs.length == 0) {
|
||||
// automaton is empty, there is no accepted paths through it
|
||||
return BasicAutomata.makeEmptyLight(); // matches nothing
|
||||
return Automata.makeEmpty(); // matches nothing
|
||||
} else if (subs.length == 1) {
|
||||
// no synonyms or anything: just a single path through the tokenstream
|
||||
return subs[0];
|
||||
} else {
|
||||
// multiple paths: this is really scary! is it slow?
|
||||
// maybe we should not do this and throw UOE?
|
||||
LightAutomaton a = BasicOperations.unionLight(Arrays.asList(subs));
|
||||
Automaton a = Operations.union(Arrays.asList(subs));
|
||||
// TODO: we could call toLevenshteinAutomata() before det?
|
||||
// this only happens if you have multiple paths anyway (e.g. synonyms)
|
||||
return BasicOperations.determinize(a);
|
||||
return Operations.determinize(a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,8 +47,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
public class FuzzySuggesterTest extends LuceneTestCase {
|
||||
|
@ -752,7 +751,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
// us the "answer key" (ie maybe we have a bug in
|
||||
// suggester.toLevA ...) ... but testRandom2() fixes
|
||||
// this:
|
||||
LightAutomaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
|
||||
Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
|
||||
assertTrue(automaton.isDeterministic());
|
||||
|
||||
// TODO: could be faster... but its slowCompletor for a reason
|
||||
|
|
|
@ -17,15 +17,15 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.util.automaton.BasicAutomata.makeEmptyLight;
|
||||
import static org.apache.lucene.util.automaton.BasicAutomata.makeStringLight;
|
||||
import static org.apache.lucene.util.automaton.Automata.makeEmpty;
|
||||
import static org.apache.lucene.util.automaton.Automata.makeString;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
/**
|
||||
|
@ -39,20 +39,20 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
|||
public final class MockTokenFilter extends TokenFilter {
|
||||
/** Empty set of stopwords */
|
||||
public static final CharacterRunAutomaton EMPTY_STOPSET =
|
||||
new CharacterRunAutomaton(makeEmptyLight());
|
||||
new CharacterRunAutomaton(makeEmpty());
|
||||
|
||||
/** Set of common english stopwords */
|
||||
public static final CharacterRunAutomaton ENGLISH_STOPSET =
|
||||
new CharacterRunAutomaton(BasicOperations.unionLight(Arrays.asList(
|
||||
makeStringLight("a"), makeStringLight("an"), makeStringLight("and"), makeStringLight("are"),
|
||||
makeStringLight("as"), makeStringLight("at"), makeStringLight("be"), makeStringLight("but"),
|
||||
makeStringLight("by"), makeStringLight("for"), makeStringLight("if"), makeStringLight("in"),
|
||||
makeStringLight("into"), makeStringLight("is"), makeStringLight("it"), makeStringLight("no"),
|
||||
makeStringLight("not"), makeStringLight("of"), makeStringLight("on"), makeStringLight("or"),
|
||||
makeStringLight("such"), makeStringLight("that"), makeStringLight("the"), makeStringLight("their"),
|
||||
makeStringLight("then"), makeStringLight("there"), makeStringLight("these"), makeStringLight("they"),
|
||||
makeStringLight("this"), makeStringLight("to"), makeStringLight("was"), makeStringLight("will"),
|
||||
makeStringLight("with"))));
|
||||
new CharacterRunAutomaton(Operations.union(Arrays.asList(
|
||||
makeString("a"), makeString("an"), makeString("and"), makeString("are"),
|
||||
makeString("as"), makeString("at"), makeString("be"), makeString("but"),
|
||||
makeString("by"), makeString("for"), makeString("if"), makeString("in"),
|
||||
makeString("into"), makeString("is"), makeString("it"), makeString("no"),
|
||||
makeString("not"), makeString("of"), makeString("on"), makeString("or"),
|
||||
makeString("such"), makeString("that"), makeString("the"), makeString("their"),
|
||||
makeString("then"), makeString("there"), makeString("these"), makeString("they"),
|
||||
makeString("this"), makeString("to"), makeString("was"), makeString("will"),
|
||||
makeString("with"))));
|
||||
|
||||
private final CharacterRunAutomaton filter;
|
||||
|
||||
|
|
|
@ -44,16 +44,16 @@ import com.carrotsearch.randomizedtesting.RandomizedContext;
|
|||
public class MockTokenizer extends Tokenizer {
|
||||
/** Acts Similar to WhitespaceTokenizer */
|
||||
public static final CharacterRunAutomaton WHITESPACE =
|
||||
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
|
||||
/** Acts Similar to KeywordTokenizer.
|
||||
* TODO: Keyword returns an "empty" token for an empty reader...
|
||||
*/
|
||||
public static final CharacterRunAutomaton KEYWORD =
|
||||
new CharacterRunAutomaton(new RegExp(".*").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp(".*").toAutomaton());
|
||||
/** Acts like LetterTokenizer. */
|
||||
// the ugly regex below is incomplete Unicode 5.2 [:Letter:]
|
||||
public static final CharacterRunAutomaton SIMPLE =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ一-鿌]+").toLightAutomaton());
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁ一-鿌]+").toAutomaton());
|
||||
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -57,7 +57,7 @@ public abstract class SearchEquivalenceTestBase extends LuceneTestCase {
|
|||
Random random = random();
|
||||
directory = newDirectory();
|
||||
stopword = "" + randomChar();
|
||||
CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.makeStringLight(stopword));
|
||||
CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
|
||||
analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -1788,7 +1788,7 @@ public abstract class LuceneTestCase extends Assert {
|
|||
int numIntersections = atLeast(3);
|
||||
for (int i = 0; i < numIntersections; i++) {
|
||||
String re = AutomatonTestUtil.randomRegexp(random());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toLightAutomaton());
|
||||
CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
|
||||
if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
// TODO: test start term too
|
||||
TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
|
||||
|
|
|
@ -137,7 +137,7 @@ public class AutomatonTestUtil {
|
|||
public static class RandomAcceptedStrings {
|
||||
|
||||
private final Map<Transition,Boolean> leadsToAccept;
|
||||
private final LightAutomaton a;
|
||||
private final Automaton a;
|
||||
private final Transition[][] transitions;
|
||||
|
||||
private static class ArrivingTransition {
|
||||
|
@ -150,7 +150,7 @@ public class AutomatonTestUtil {
|
|||
}
|
||||
}
|
||||
|
||||
public RandomAcceptedStrings(LightAutomaton a) {
|
||||
public RandomAcceptedStrings(Automaton a) {
|
||||
this.a = a;
|
||||
if (a.getNumStates() == 0) {
|
||||
throw new IllegalArgumentException("this automaton accepts nothing");
|
||||
|
@ -252,24 +252,24 @@ public class AutomatonTestUtil {
|
|||
}
|
||||
|
||||
/** return a random NFA/DFA for testing */
|
||||
public static LightAutomaton randomAutomaton(Random random) {
|
||||
public static Automaton randomAutomaton(Random random) {
|
||||
// get two random Automata from regexps
|
||||
LightAutomaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toLightAutomaton();
|
||||
Automaton a1 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
|
||||
if (random.nextBoolean()) {
|
||||
a1 = BasicOperations.complementLight(a1);
|
||||
a1 = Operations.complement(a1);
|
||||
}
|
||||
|
||||
LightAutomaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toLightAutomaton();
|
||||
Automaton a2 = new RegExp(AutomatonTestUtil.randomRegexp(random), RegExp.NONE).toAutomaton();
|
||||
if (random.nextBoolean()) {
|
||||
a2 = BasicOperations.complementLight(a2);
|
||||
a2 = Operations.complement(a2);
|
||||
}
|
||||
|
||||
// combine them in random ways
|
||||
switch (random.nextInt(4)) {
|
||||
case 0: return BasicOperations.concatenateLight(a1, a2);
|
||||
case 1: return BasicOperations.unionLight(a1, a2);
|
||||
case 2: return BasicOperations.intersectionLight(a1, a2);
|
||||
default: return BasicOperations.minusLight(a1, a2);
|
||||
case 0: return Operations.concatenate(a1, a2);
|
||||
case 1: return Operations.union(a1, a2);
|
||||
case 2: return Operations.intersection(a1, a2);
|
||||
default: return Operations.minus(a1, a2);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -310,28 +310,28 @@ public class AutomatonTestUtil {
|
|||
/**
|
||||
* Simple, original brics implementation of Brzozowski minimize()
|
||||
*/
|
||||
public static LightAutomaton minimizeSimple(LightAutomaton a) {
|
||||
public static Automaton minimizeSimple(Automaton a) {
|
||||
Set<Integer> initialSet = new HashSet<Integer>();
|
||||
a = determinizeSimpleLight(SpecialOperations.reverse(a, initialSet), initialSet);
|
||||
a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
|
||||
initialSet.clear();
|
||||
a = determinizeSimpleLight(SpecialOperations.reverse(a, initialSet), initialSet);
|
||||
a = determinizeSimple(Operations.reverse(a, initialSet), initialSet);
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple, original brics implementation of determinize()
|
||||
*/
|
||||
public static LightAutomaton determinizeSimpleLight(LightAutomaton a) {
|
||||
public static Automaton determinizeSimple(Automaton a) {
|
||||
Set<Integer> initialset = new HashSet<>();
|
||||
initialset.add(0);
|
||||
return determinizeSimpleLight(a, initialset);
|
||||
return determinizeSimple(a, initialset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple, original brics implementation of determinize()
|
||||
* Determinizes the given automaton using the given set of initial states.
|
||||
*/
|
||||
public static LightAutomaton determinizeSimpleLight(LightAutomaton a, Set<Integer> initialset) {
|
||||
public static Automaton determinizeSimple(Automaton a, Set<Integer> initialset) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return a;
|
||||
}
|
||||
|
@ -342,7 +342,7 @@ public class AutomatonTestUtil {
|
|||
Map<Set<Integer>, Integer> newstate = new HashMap<>();
|
||||
sets.put(initialset, initialset);
|
||||
worklist.add(initialset);
|
||||
LightAutomaton.Builder result = new LightAutomaton.Builder();
|
||||
Automaton.Builder result = new Automaton.Builder();
|
||||
result.createState();
|
||||
newstate.put(initialset, 0);
|
||||
Transition t = new Transition();
|
||||
|
@ -384,7 +384,7 @@ public class AutomatonTestUtil {
|
|||
}
|
||||
}
|
||||
|
||||
return BasicOperations.removeDeadStates(result.finish());
|
||||
return Operations.removeDeadStates(result.finish());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -399,9 +399,9 @@ public class AutomatonTestUtil {
|
|||
* frame for each digit in the returned strings (ie, max
|
||||
* is the max length returned string).
|
||||
*/
|
||||
public static Set<IntsRef> getFiniteStringsRecursiveLight(LightAutomaton a, int limit) {
|
||||
public static Set<IntsRef> getFiniteStringsRecursive(Automaton a, int limit) {
|
||||
HashSet<IntsRef> strings = new HashSet<>();
|
||||
if (!getFiniteStringsLight(a, 0, new HashSet<Integer>(), strings, new IntsRef(), limit)) {
|
||||
if (!getFiniteStrings(a, 0, new HashSet<Integer>(), strings, new IntsRef(), limit)) {
|
||||
return strings;
|
||||
}
|
||||
return strings;
|
||||
|
@ -412,7 +412,7 @@ public class AutomatonTestUtil {
|
|||
* false if more than <code>limit</code> strings are found.
|
||||
* <code>limit</code><0 means "infinite".
|
||||
*/
|
||||
private static boolean getFiniteStringsLight(LightAutomaton a, int s, HashSet<Integer> pathstates,
|
||||
private static boolean getFiniteStrings(Automaton a, int s, HashSet<Integer> pathstates,
|
||||
HashSet<IntsRef> strings, IntsRef path, int limit) {
|
||||
pathstates.add(s);
|
||||
Transition t = new Transition();
|
||||
|
@ -432,7 +432,7 @@ public class AutomatonTestUtil {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (!getFiniteStringsLight(a, t.dest, pathstates, strings, path, limit)) {
|
||||
if (!getFiniteStrings(a, t.dest, pathstates, strings, path, limit)) {
|
||||
return false;
|
||||
}
|
||||
path.length--;
|
||||
|
@ -448,7 +448,7 @@ public class AutomatonTestUtil {
|
|||
* WARNING: this method is slow, it will blow up if the automaton is large.
|
||||
* this is only used to test the correctness of our faster implementation.
|
||||
*/
|
||||
public static boolean isFiniteSlow(LightAutomaton a) {
|
||||
public static boolean isFiniteSlow(Automaton a) {
|
||||
if (a.getNumStates() == 0) {
|
||||
return true;
|
||||
}
|
||||
|
@ -461,7 +461,7 @@ public class AutomatonTestUtil {
|
|||
*/
|
||||
// TODO: not great that this is recursive... in theory a
|
||||
// large automata could exceed java's stack
|
||||
private static boolean isFiniteSlow(LightAutomaton a, int s, HashSet<Integer> path) {
|
||||
private static boolean isFiniteSlow(Automaton a, int s, HashSet<Integer> path) {
|
||||
path.add(s);
|
||||
Transition t = new Transition();
|
||||
int count = a.initTransition(s, t);
|
||||
|
@ -479,14 +479,14 @@ public class AutomatonTestUtil {
|
|||
* Checks that an automaton has no detached states that are unreachable
|
||||
* from the initial state.
|
||||
*/
|
||||
public static void assertNoDetachedStates(LightAutomaton a) {
|
||||
LightAutomaton a2 = BasicOperations.removeDeadStates(a);
|
||||
public static void assertNoDetachedStates(Automaton a) {
|
||||
Automaton a2 = Operations.removeDeadStates(a);
|
||||
assert a.getNumStates() == a2.getNumStates() : "automaton has " + (a.getNumStates() - a2.getNumStates()) + " detached states";
|
||||
}
|
||||
|
||||
// nocommit where to assert this...
|
||||
/** Returns true if the automaton is deterministic. */
|
||||
public static boolean isDeterministicSlow(LightAutomaton a) {
|
||||
public static boolean isDeterministicSlow(Automaton a) {
|
||||
Transition t = new Transition();
|
||||
int numStates = a.getNumStates();
|
||||
for(int s=0;s<numStates;s++) {
|
||||
|
|
|
@ -40,10 +40,9 @@ import org.apache.lucene.search.WildcardQuery;
|
|||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -777,19 +776,19 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
if (factory != null) {
|
||||
Term term = new Term(field, termStr);
|
||||
// fsa representing the query
|
||||
LightAutomaton automaton = WildcardQuery.toAutomaton(term);
|
||||
Automaton automaton = WildcardQuery.toAutomaton(term);
|
||||
// TODO: we should likely use the automaton to calculate shouldReverse, too.
|
||||
if (factory.shouldReverse(termStr)) {
|
||||
automaton = BasicOperations.concatenateLight(automaton, BasicAutomata.makeCharLight(factory.getMarkerChar()));
|
||||
automaton = SpecialOperations.reverse(automaton);
|
||||
automaton = Operations.concatenate(automaton, Automata.makeChar(factory.getMarkerChar()));
|
||||
automaton = Operations.reverse(automaton);
|
||||
} else {
|
||||
// reverse wildcardfilter is active: remove false positives
|
||||
// fsa representing false positives (markerChar*)
|
||||
LightAutomaton falsePositives = BasicOperations.concatenateLight(
|
||||
BasicAutomata.makeCharLight(factory.getMarkerChar()),
|
||||
BasicAutomata.makeAnyStringLight());
|
||||
Automaton falsePositives = Operations.concatenate(
|
||||
Automata.makeChar(factory.getMarkerChar()),
|
||||
Automata.makeAnyString());
|
||||
// subtract these away
|
||||
automaton = BasicOperations.minusLight(automaton, falsePositives);
|
||||
automaton = Operations.minus(automaton, falsePositives);
|
||||
}
|
||||
return new AutomatonQuery(term, automaton) {
|
||||
// override toString so its completely transparent
|
||||
|
|
|
@ -25,9 +25,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
|
|
|
@ -26,9 +26,8 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.LightAutomaton;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.util.automaton.Operations;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
@ -161,8 +160,8 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
if (!(q instanceof AutomatonQuery)) {
|
||||
return false;
|
||||
}
|
||||
LightAutomaton automaton = ((AutomatonQuery) q).getLightAutomaton();
|
||||
String prefix = SpecialOperations.getCommonPrefix(BasicOperations.determinize(automaton));
|
||||
Automaton automaton = ((AutomatonQuery) q).getAutomaton();
|
||||
String prefix = Operations.getCommonPrefix(Operations.determinize(automaton));
|
||||
return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
|
||||
}
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ public class MockTokenFilterFactory extends TokenFilterFactory {
|
|||
}
|
||||
} else if (null != stopregex) {
|
||||
RegExp regex = new RegExp(stopregex);
|
||||
filter = new CharacterRunAutomaton(regex.toLightAutomaton());
|
||||
filter = new CharacterRunAutomaton(regex.toAutomaton());
|
||||
} else {
|
||||
throw new IllegalArgumentException
|
||||
("Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
|
||||
|
|
Loading…
Reference in New Issue