mirror of https://github.com/apache/lucene.git
LUCENE-2907: fix automaton thread hazard: change query to compute immutable compiled form in ctor
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1067720 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70cbc8acab
commit
32b914572d
|
@ -18,15 +18,15 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
|
||||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||||
import org.apache.lucene.util.automaton.BasicOperations;
|
import org.apache.lucene.util.automaton.BasicOperations;
|
||||||
import org.apache.lucene.util.automaton.MinimizationOperations;
|
import org.apache.lucene.util.automaton.MinimizationOperations;
|
||||||
|
@ -56,9 +56,16 @@ public class AutomatonQuery extends MultiTermQuery {
|
||||||
/** term containing the field, and possibly some pattern structure */
|
/** term containing the field, and possibly some pattern structure */
|
||||||
protected final Term term;
|
protected final Term term;
|
||||||
|
|
||||||
transient ByteRunAutomaton runAutomaton;
|
/**
|
||||||
transient boolean isFinite;
|
* abstraction for returning a termsenum:
|
||||||
transient BytesRef commonSuffixRef;
|
* in the ctor the query computes one of these, the actual
|
||||||
|
* implementation depends upon the automaton's structure.
|
||||||
|
*/
|
||||||
|
private abstract class TermsEnumFactory implements Serializable {
|
||||||
|
protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final TermsEnumFactory factory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new AutomatonQuery from an {@link Automaton}.
|
* Create a new AutomatonQuery from an {@link Automaton}.
|
||||||
|
@ -68,60 +75,77 @@ public class AutomatonQuery extends MultiTermQuery {
|
||||||
* @param automaton Automaton to run, terms that are accepted are considered a
|
* @param automaton Automaton to run, terms that are accepted are considered a
|
||||||
* match.
|
* match.
|
||||||
*/
|
*/
|
||||||
public AutomatonQuery(Term term, Automaton automaton) {
|
public AutomatonQuery(final Term term, Automaton automaton) {
|
||||||
super(term.field());
|
super(term.field());
|
||||||
this.term = term;
|
this.term = term;
|
||||||
this.automaton = automaton;
|
this.automaton = automaton;
|
||||||
MinimizationOperations.minimize(automaton);
|
MinimizationOperations.minimize(automaton);
|
||||||
}
|
|
||||||
|
if (BasicOperations.isEmpty(automaton)) {
|
||||||
private synchronized void compileAutomaton() {
|
// matches nothing
|
||||||
// this method must be synchronized, as setting the three transient fields is not atomic:
|
factory = new TermsEnumFactory() {
|
||||||
if (runAutomaton == null) {
|
@Override
|
||||||
runAutomaton = new ByteRunAutomaton(automaton);
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
isFinite = SpecialOperations.isFinite(automaton);
|
return TermsEnum.EMPTY;
|
||||||
commonSuffixRef = isFinite ? null : SpecialOperations.getCommonSuffixBytesRef(runAutomaton.getAutomaton());
|
}
|
||||||
|
};
|
||||||
|
} else if (BasicOperations.isTotal(automaton)) {
|
||||||
|
// matches all possible strings
|
||||||
|
factory = new TermsEnumFactory() {
|
||||||
|
@Override
|
||||||
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
|
return terms.iterator();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
final String singleton;
|
||||||
|
final String commonPrefix;
|
||||||
|
|
||||||
|
if (automaton.getSingleton() == null) {
|
||||||
|
commonPrefix = SpecialOperations.getCommonPrefix(automaton);
|
||||||
|
if (commonPrefix.length() > 0 && BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
|
||||||
|
singleton = commonPrefix;
|
||||||
|
} else {
|
||||||
|
singleton = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
commonPrefix = null;
|
||||||
|
singleton = automaton.getSingleton();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (singleton != null) {
|
||||||
|
// matches a fixed string in singleton or expanded representation
|
||||||
|
factory = new TermsEnumFactory() {
|
||||||
|
@Override
|
||||||
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
|
return new SingleTermsEnum(terms.iterator(), term.createTerm(singleton));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else if (BasicOperations.sameLanguage(automaton, BasicOperations.concatenate(
|
||||||
|
BasicAutomata.makeString(commonPrefix), BasicAutomata.makeAnyString()))) {
|
||||||
|
// matches a constant prefix
|
||||||
|
factory = new TermsEnumFactory() {
|
||||||
|
@Override
|
||||||
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
|
return new PrefixTermsEnum(terms.iterator(), term.createTerm(commonPrefix));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
final AutomatonTermsEnum.CompiledAutomaton compiled =
|
||||||
|
new CompiledAutomaton(automaton, SpecialOperations.isFinite(automaton));
|
||||||
|
factory = new TermsEnumFactory() {
|
||||||
|
@Override
|
||||||
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
|
return new AutomatonTermsEnum(terms.iterator(), compiled);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||||
// matches nothing
|
return factory.getTermsEnum(terms, atts);
|
||||||
if (BasicOperations.isEmpty(automaton)) {
|
|
||||||
return TermsEnum.EMPTY;
|
|
||||||
}
|
|
||||||
|
|
||||||
TermsEnum tenum = terms.iterator();
|
|
||||||
|
|
||||||
// matches all possible strings
|
|
||||||
if (BasicOperations.isTotal(automaton)) {
|
|
||||||
return tenum;
|
|
||||||
}
|
|
||||||
|
|
||||||
// matches a fixed string in singleton representation
|
|
||||||
String singleton = automaton.getSingleton();
|
|
||||||
if (singleton != null)
|
|
||||||
return new SingleTermsEnum(tenum, term.createTerm(singleton));
|
|
||||||
|
|
||||||
// matches a fixed string in expanded representation
|
|
||||||
final String commonPrefix = SpecialOperations.getCommonPrefix(automaton);
|
|
||||||
|
|
||||||
if (commonPrefix.length() > 0) {
|
|
||||||
if (BasicOperations.sameLanguage(automaton, BasicAutomata.makeString(commonPrefix))) {
|
|
||||||
return new SingleTermsEnum(tenum, term.createTerm(commonPrefix));
|
|
||||||
}
|
|
||||||
|
|
||||||
// matches a constant prefix
|
|
||||||
Automaton prefixAutomaton = BasicOperations.concatenate(BasicAutomata
|
|
||||||
.makeString(commonPrefix), BasicAutomata.makeAnyString());
|
|
||||||
if (BasicOperations.sameLanguage(automaton, prefixAutomaton)) {
|
|
||||||
return new PrefixTermsEnum(tenum, term.createTerm(commonPrefix));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
compileAutomaton();
|
|
||||||
|
|
||||||
return new AutomatonTermsEnum(runAutomaton, tenum, isFinite, commonSuffixRef);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||||
import org.apache.lucene.util.automaton.Transition;
|
import org.apache.lucene.util.automaton.Transition;
|
||||||
|
import org.apache.lucene.util.automaton.UTF32ToUTF8;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A FilteredTermsEnum that enumerates terms based upon what is accepted by a
|
* A FilteredTermsEnum that enumerates terms based upon what is accepted by a
|
||||||
|
@ -46,8 +47,6 @@ import org.apache.lucene.util.automaton.Transition;
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class AutomatonTermsEnum extends FilteredTermsEnum {
|
public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
// the object-oriented form of the DFA
|
|
||||||
private final Automaton automaton;
|
|
||||||
// a tableized array-based form of the DFA
|
// a tableized array-based form of the DFA
|
||||||
private final ByteRunAutomaton runAutomaton;
|
private final ByteRunAutomaton runAutomaton;
|
||||||
// common suffix of the automaton
|
// common suffix of the automaton
|
||||||
|
@ -71,54 +70,26 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
private final Comparator<BytesRef> termComp;
|
private final Comparator<BytesRef> termComp;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert ctor:
|
|
||||||
* Construct an enumerator based upon an automaton, enumerating the specified
|
* Construct an enumerator based upon an automaton, enumerating the specified
|
||||||
* field, working on a supplied TermsEnum
|
* field, working on a supplied TermsEnum
|
||||||
* <p>
|
* <p>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
* <p>
|
* <p>
|
||||||
* @param runAutomaton pre-compiled ByteRunAutomaton
|
* @param compiled CompiledAutomaton
|
||||||
* @param finite true if the automaton accepts a finite language
|
|
||||||
*/
|
*/
|
||||||
public AutomatonTermsEnum(ByteRunAutomaton runAutomaton,
|
public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) throws IOException {
|
||||||
TermsEnum tenum,
|
|
||||||
boolean finite, BytesRef commonSuffixRef)
|
|
||||||
throws IOException {
|
|
||||||
super(tenum);
|
super(tenum);
|
||||||
this.automaton = runAutomaton.getAutomaton();
|
this.finite = compiled.finite;
|
||||||
this.finite = finite;
|
this.runAutomaton = compiled.runAutomaton;
|
||||||
|
this.commonSuffixRef = compiled.commonSuffixRef;
|
||||||
|
this.allTransitions = compiled.sortedTransitions;
|
||||||
|
|
||||||
this.runAutomaton = runAutomaton;
|
|
||||||
if (finite) {
|
|
||||||
// don't use suffix w/ finite DFAs
|
|
||||||
this.commonSuffixRef = null;
|
|
||||||
} else if (commonSuffixRef == null) {
|
|
||||||
// compute now
|
|
||||||
this.commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(automaton);
|
|
||||||
} else {
|
|
||||||
// precomputed
|
|
||||||
this.commonSuffixRef = commonSuffixRef;
|
|
||||||
}
|
|
||||||
|
|
||||||
// build a cache of sorted transitions for every state
|
|
||||||
allTransitions = this.automaton.getSortedTransitions();
|
|
||||||
// used for path tracking, where each bit is a numbered state.
|
// used for path tracking, where each bit is a numbered state.
|
||||||
visited = new long[runAutomaton.getSize()];
|
visited = new long[runAutomaton.getSize()];
|
||||||
|
|
||||||
termComp = getComparator();
|
termComp = getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Construct an enumerator based upon an automaton, enumerating the specified
|
|
||||||
* field, working on a supplied TermsEnum
|
|
||||||
* <p>
|
|
||||||
* It will automatically calculate whether or not the automaton is finite
|
|
||||||
*/
|
|
||||||
public AutomatonTermsEnum(Automaton automaton, TermsEnum tenum)
|
|
||||||
throws IOException {
|
|
||||||
this(new ByteRunAutomaton(automaton), tenum, SpecialOperations.isFinite(automaton), null);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if the term matches the automaton. Also stashes away the term
|
* Returns true if the term matches the automaton. Also stashes away the term
|
||||||
* to assist with smart enumeration.
|
* to assist with smart enumeration.
|
||||||
|
@ -350,4 +321,26 @@ public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||||
}
|
}
|
||||||
return -1; /* all solutions exhausted */
|
return -1; /* all solutions exhausted */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* immutable class with everything this enum needs.
|
||||||
|
*/
|
||||||
|
public static class CompiledAutomaton {
|
||||||
|
public final ByteRunAutomaton runAutomaton;
|
||||||
|
public final Transition[][] sortedTransitions;
|
||||||
|
public final BytesRef commonSuffixRef;
|
||||||
|
public final boolean finite;
|
||||||
|
|
||||||
|
public CompiledAutomaton(Automaton automaton, boolean finite) {
|
||||||
|
Automaton utf8 = new UTF32ToUTF8().convert(automaton);
|
||||||
|
runAutomaton = new ByteRunAutomaton(utf8, true);
|
||||||
|
sortedTransitions = utf8.getSortedTransitions();
|
||||||
|
this.finite = finite;
|
||||||
|
if (finite) {
|
||||||
|
commonSuffixRef = null;
|
||||||
|
} else {
|
||||||
|
commonSuffixRef = SpecialOperations.getCommonSuffixBytesRef(utf8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.AutomatonTermsEnum.CompiledAutomaton;
|
||||||
import org.apache.lucene.util.Attribute;
|
import org.apache.lucene.util.Attribute;
|
||||||
import org.apache.lucene.util.AttributeImpl;
|
import org.apache.lucene.util.AttributeImpl;
|
||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
|
@ -140,18 +141,18 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
*/
|
*/
|
||||||
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
|
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final List<ByteRunAutomaton> runAutomata = initAutomata(editDistance);
|
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
|
||||||
if (editDistance < runAutomata.size()) {
|
if (editDistance < runAutomata.size()) {
|
||||||
return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1)
|
return new AutomatonFuzzyTermsEnum(runAutomata.subList(0, editDistance + 1)
|
||||||
.toArray(new ByteRunAutomaton[editDistance + 1]), lastTerm);
|
.toArray(new CompiledAutomaton[editDistance + 1]), lastTerm);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** initialize levenshtein DFAs up to maxDistance, if possible */
|
/** initialize levenshtein DFAs up to maxDistance, if possible */
|
||||||
private List<ByteRunAutomaton> initAutomata(int maxDistance) {
|
private List<CompiledAutomaton> initAutomata(int maxDistance) {
|
||||||
final List<ByteRunAutomaton> runAutomata = dfaAtt.automata();
|
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
|
||||||
if (runAutomata.size() <= maxDistance &&
|
if (runAutomata.size() <= maxDistance &&
|
||||||
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||||
LevenshteinAutomata builder =
|
LevenshteinAutomata builder =
|
||||||
|
@ -165,7 +166,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
UnicodeUtil.newString(termText, 0, realPrefixLength));
|
UnicodeUtil.newString(termText, 0, realPrefixLength));
|
||||||
a = BasicOperations.concatenate(prefix, a);
|
a = BasicOperations.concatenate(prefix, a);
|
||||||
}
|
}
|
||||||
runAutomata.add(new ByteRunAutomaton(a));
|
runAutomata.add(new CompiledAutomaton(a, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return runAutomata;
|
return runAutomata;
|
||||||
|
@ -312,10 +313,12 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
private final BoostAttribute boostAtt =
|
private final BoostAttribute boostAtt =
|
||||||
attributes().addAttribute(BoostAttribute.class);
|
attributes().addAttribute(BoostAttribute.class);
|
||||||
|
|
||||||
public AutomatonFuzzyTermsEnum(ByteRunAutomaton matchers[],
|
public AutomatonFuzzyTermsEnum(CompiledAutomaton compiled[],
|
||||||
BytesRef lastTerm) throws IOException {
|
BytesRef lastTerm) throws IOException {
|
||||||
super(matchers[matchers.length - 1], tenum, true, null);
|
super(tenum, compiled[compiled.length - 1]);
|
||||||
this.matchers = matchers;
|
this.matchers = new ByteRunAutomaton[compiled.length];
|
||||||
|
for (int i = 0; i < compiled.length; i++)
|
||||||
|
this.matchers[i] = compiled[i].runAutomaton;
|
||||||
this.lastTerm = lastTerm;
|
this.lastTerm = lastTerm;
|
||||||
termRef = new BytesRef(term.text());
|
termRef = new BytesRef(term.text());
|
||||||
}
|
}
|
||||||
|
@ -563,14 +566,14 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public static interface LevenshteinAutomataAttribute extends Attribute {
|
public static interface LevenshteinAutomataAttribute extends Attribute {
|
||||||
public List<ByteRunAutomaton> automata();
|
public List<CompiledAutomaton> automata();
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
||||||
private final List<ByteRunAutomaton> automata = new ArrayList<ByteRunAutomaton>();
|
private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>();
|
||||||
|
|
||||||
public List<ByteRunAutomaton> automata() {
|
public List<CompiledAutomaton> automata() {
|
||||||
return automata;
|
return automata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -595,7 +598,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl target) {
|
||||||
final List<ByteRunAutomaton> targetAutomata =
|
final List<CompiledAutomaton> targetAutomata =
|
||||||
((LevenshteinAutomataAttribute) target).automata();
|
((LevenshteinAutomataAttribute) target).automata();
|
||||||
targetAutomata.clear();
|
targetAutomata.clear();
|
||||||
targetAutomata.addAll(automata);
|
targetAutomata.addAll(automata);
|
||||||
|
|
|
@ -66,6 +66,13 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
* assumed by the built-in automata operations.
|
* assumed by the built-in automata operations.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
|
* <p>
|
||||||
|
* Note: This class has internal mutable state and is not thread safe. It is
|
||||||
|
* the caller's responsibility to ensure any necessary synchronization if you
|
||||||
|
* wish to use the same Automaton from multiple threads. In general it is instead
|
||||||
|
* recommended to use a {@link RunAutomaton} for multithreaded matching: it is immutable,
|
||||||
|
* thread safe, and much faster.
|
||||||
|
* </p>
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class Automaton implements Serializable, Cloneable {
|
public class Automaton implements Serializable, Cloneable {
|
||||||
|
|
|
@ -20,7 +20,12 @@ package org.apache.lucene.util.automaton;
|
||||||
public class ByteRunAutomaton extends RunAutomaton {
|
public class ByteRunAutomaton extends RunAutomaton {
|
||||||
|
|
||||||
public ByteRunAutomaton(Automaton a) {
|
public ByteRunAutomaton(Automaton a) {
|
||||||
super(new UTF32ToUTF8().convert(a), 256, true);
|
this(a, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** expert: if utf8 is true, the input is already byte-based */
|
||||||
|
public ByteRunAutomaton(Automaton a, boolean utf8) {
|
||||||
|
super(utf8 ? a : new UTF32ToUTF8().convert(a), 256, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -45,7 +45,6 @@ public abstract class RunAutomaton implements Serializable {
|
||||||
// getCharClass(c)]
|
// getCharClass(c)]
|
||||||
final int[] points; // char interval start points
|
final int[] points; // char interval start points
|
||||||
final int[] classmap; // map from char number to class class
|
final int[] classmap; // map from char number to class class
|
||||||
final Automaton automaton;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a string representation of this automaton.
|
* Returns a string representation of this automaton.
|
||||||
|
@ -113,13 +112,6 @@ public abstract class RunAutomaton implements Serializable {
|
||||||
final int getCharClass(int c) {
|
final int getCharClass(int c) {
|
||||||
return SpecialOperations.findIndex(c, points);
|
return SpecialOperations.findIndex(c, points);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the automaton
|
|
||||||
*/
|
|
||||||
public Automaton getAutomaton() {
|
|
||||||
return automaton;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new <code>RunAutomaton</code> from a deterministic
|
* Constructs a new <code>RunAutomaton</code> from a deterministic
|
||||||
|
@ -160,7 +152,6 @@ public abstract class RunAutomaton implements Serializable {
|
||||||
} else {
|
} else {
|
||||||
classmap = null;
|
classmap = null;
|
||||||
}
|
}
|
||||||
this.automaton = a;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -26,7 +26,8 @@ import java.util.ArrayList;
|
||||||
// TODO
|
// TODO
|
||||||
// - do we really need the .bits...? if not we can make util in UnicodeUtil to convert 1 char into a BytesRef
|
// - do we really need the .bits...? if not we can make util in UnicodeUtil to convert 1 char into a BytesRef
|
||||||
|
|
||||||
final class UTF32ToUTF8 {
|
/** @lucene.internal */
|
||||||
|
public final class UTF32ToUTF8 {
|
||||||
|
|
||||||
// Unicode boundaries for UTF8 bytes 1,2,3,4
|
// Unicode boundaries for UTF8 bytes 1,2,3,4
|
||||||
private static final int[] startCodes = new int[] {0, 128, 2048, 65536};
|
private static final int[] startCodes = new int[] {0, 128, 2048, 65536};
|
||||||
|
|
Loading…
Reference in New Issue