LUCENE-3726: default Kuromoji to search mode

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1240710 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-02-05 12:41:13 +00:00
parent 0e73d06216
commit 009608d9f2
2 changed files with 29 additions and 19 deletions

View File

@ -22,7 +22,11 @@ import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import org.apache.lucene.analysis.kuromoji.dict.*;
import org.apache.lucene.analysis.kuromoji.dict.ConnectionCosts;
import org.apache.lucene.analysis.kuromoji.dict.Dictionary;
import org.apache.lucene.analysis.kuromoji.dict.TokenInfoDictionary;
import org.apache.lucene.analysis.kuromoji.dict.UnknownDictionary;
import org.apache.lucene.analysis.kuromoji.dict.UserDictionary;
import org.apache.lucene.analysis.kuromoji.viterbi.GraphvizFormatter;
import org.apache.lucene.analysis.kuromoji.viterbi.Viterbi;
import org.apache.lucene.analysis.kuromoji.viterbi.ViterbiNode;
@ -37,6 +41,8 @@ public class Segmenter {
NORMAL, SEARCH, EXTENDED
}
public static final Mode DEFAULT_MODE = Mode.SEARCH;
private final Viterbi viterbi;
private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<Type, Dictionary>(Type.class);
@ -44,31 +50,25 @@ public class Segmenter {
private final boolean split;
public Segmenter() {
this(null, Mode.NORMAL, false);
}
public Segmenter(UserDictionary userDictionary, Mode mode) {
this(userDictionary, mode, false);
}
public Segmenter(UserDictionary userDictionary) {
this(userDictionary, Mode.NORMAL, false);
this(null, DEFAULT_MODE, false);
}
public Segmenter(Mode mode) {
this(null, mode, false);
}
public Segmenter(UserDictionary userDictionary) {
this(userDictionary, DEFAULT_MODE, false);
}
public Segmenter(UserDictionary userDictionary, Mode mode) {
this(userDictionary, mode, false);
}
public Segmenter(UserDictionary userDictionary, Mode mode, boolean split) {
final TokenInfoDictionary dict = TokenInfoDictionary.getInstance();
final UnknownDictionary unknownDict = UnknownDictionary.getInstance();
this.viterbi = new Viterbi(dict,
unknownDict,
ConnectionCosts.getInstance(),
userDictionary,
mode);
this.viterbi = new Viterbi(dict, unknownDict, ConnectionCosts.getInstance(), userDictionary, mode);
this.split = split;
dictionaryMap.put(Type.KNOWN, dict);

View File

@ -24,6 +24,7 @@ import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.kuromoji.KuromojiTokenizer;
@ -62,7 +63,7 @@ public class KuromojiTokenizerFactory extends BaseTokenizerFactory implements Re
@Override
public void inform(ResourceLoader loader) {
Mode mode = args.get(MODE) != null ? Mode.valueOf(args.get(MODE).toUpperCase(Locale.ENGLISH)) : Mode.NORMAL;
Mode mode = getMode(args);
String userDictionaryPath = args.get(USER_DICT_PATH);
try {
if (userDictionaryPath != null) {
@ -88,4 +89,13 @@ public class KuromojiTokenizerFactory extends BaseTokenizerFactory implements Re
public Tokenizer create(Reader input) {
return new KuromojiTokenizer(segmenter, input);
}
}
private Mode getMode(Map<String, String> args) {
String mode = args.get(MODE);
if (mode != null) {
return Mode.valueOf(mode.toUpperCase(Locale.ENGLISH));
} else {
return Segmenter.DEFAULT_MODE;
}
}
}