mirror of https://github.com/apache/lucene.git
LUCENE-3726: default Kuromoji to search mode
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1240710 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0e73d06216
commit
009608d9f2
|
@ -22,7 +22,11 @@ import java.util.ArrayList;
|
|||
import java.util.EnumMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.kuromoji.dict.*;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.UnknownDictionary;
|
||||
import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
|
||||
import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
|
||||
import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
|
||||
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;
|
||||
|
@ -37,6 +41,8 @@ public class Segmenter {
|
|||
NORMAL, SEARCH, EXTENDED
|
||||
}
|
||||
|
||||
public static final Mode DEFAULT_MODE = Mode.SEARCH;
|
||||
|
||||
private final Viterbi viterbi;
|
||||
|
||||
private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<Type, Dictionary>(Type.class);
|
||||
|
@ -44,31 +50,25 @@ public class Segmenter {
|
|||
private final boolean split;
|
||||
|
||||
public Segmenter() {
|
||||
this(null, Mode.NORMAL, false);
|
||||
}
|
||||
|
||||
public Segmenter(UserDictionary userDictionary, Mode mode) {
|
||||
this(userDictionary, mode, false);
|
||||
}
|
||||
|
||||
public Segmenter(UserDictionary userDictionary) {
|
||||
this(userDictionary, Mode.NORMAL, false);
|
||||
this(null, DEFAULT_MODE, false);
|
||||
}
|
||||
|
||||
public Segmenter(Mode mode) {
|
||||
this(null, mode, false);
|
||||
}
|
||||
|
||||
public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
|
||||
public Segmenter(UserDictionary userDictionary) {
|
||||
this(userDictionary, DEFAULT_MODE, false);
|
||||
}
|
||||
|
||||
public Segmenter(UserDictionary userDictionary, Mode mode) {
|
||||
this(userDictionary, mode, false);
|
||||
}
|
||||
|
||||
public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
|
||||
final TokenInfoDictionary dict = TokenInfoDictionary.getInstance();
|
||||
final UnknownDictionary unknownDict = UnknownDictionary.getInstance();
|
||||
this.viterbi = new Viterbi(dict,
|
||||
unknownDict,
|
||||
ConnectionCosts.getInstance(),
|
||||
userDictionary,
|
||||
mode);
|
||||
|
||||
this.viterbi = new Viterbi(dict, unknownDict, ConnectionCosts.getInstance(), userDictionary, mode);
|
||||
this.split = split;
|
||||
|
||||
dictionaryMap.put(Type.KNOWN, dict);
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.nio.charset.Charset;
|
|||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer;
|
||||
|
@ -62,7 +63,7 @@ public class KuromojiTokenizerFactory extends BaseTokenizerFactory implements Re
|
|||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) {
|
||||
Mode mode = args.get(MODE) != null ? Mode.valueOf(args.get(MODE).toUpperCase(Locale.ENGLISH)) : Mode.NORMAL;
|
||||
Mode mode = getMode(args);
|
||||
String userDictionaryPath = args.get(USER_DICT_PATH);
|
||||
try {
|
||||
if (userDictionaryPath != null) {
|
||||
|
@ -88,4 +89,13 @@ public class KuromojiTokenizerFactory extends BaseTokenizerFactory implements Re
|
|||
public Tokenizer create(Reader input) {
|
||||
return new KuromojiTokenizer(segmenter, input);
|
||||
}
|
||||
|
||||
private Mode getMode(Map<String, String> args) {
|
||||
String mode = args.get(MODE);
|
||||
if (mode != null) {
|
||||
return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH));
|
||||
} else {
|
||||
return Segmenter.DEFAULT_MODE;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue