mirror of https://github.com/apache/lucene.git
LUCENE-3700: optionally support naist-jdic for kuromoji
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1232268 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48c01e5a2b
commit
f562a8a0dc
|
@ -25,10 +25,18 @@
|
|||
|
||||
<property name="build.dir" location="../build/kuromoji" />
|
||||
<property name="dist.dir" location="../dist/kuromoji" />
|
||||
|
||||
<!-- default configuration: uses mecab-ipadic -->
|
||||
<property name="ipadic.version" value="mecab-ipadic-2.7.0-20070801" />
|
||||
<property name="dict.src.file" value="${ipadic.version}.tar.gz" />
|
||||
<!-- <property name="dict.url" value="http://atilika.com/releases/mecab-ipadic/${dict.src.file}" /> -->
|
||||
<property name="dict.url" value="http://mecab.googlecode.com/files/${dict.src.file}"/>
|
||||
|
||||
<!-- alternative configuration: uses mecab-naist-jdic
|
||||
<property name="ipadic.version" value="mecab-naist-jdic-0.6.3b-20111013" />
|
||||
<property name="dict.src.file" value="${ipadic.version}.tar.gz" />
|
||||
<property name="dict.url" value="http://sourceforge.jp/frs/redir.php?m=iij&f=/naist-jdic/53500/${dict.src.file}"/>
|
||||
-->
|
||||
|
||||
<property name="dict.src.dir" value="${build.dir}/${ipadic.version}" />
|
||||
<property name="dict.encoding" value="euc-jp"/>
|
||||
<property name="dict.format" value="ipadic"/>
|
||||
|
|
|
@ -78,12 +78,17 @@ public class TestKuromojiTokenizer extends BaseTokenStreamTestCase {
|
|||
);
|
||||
}
|
||||
|
||||
/* Note this is really a stupid test just to see if things arent horribly slow.
|
||||
* ideally the test would actually fail instead of hanging...
|
||||
*/
|
||||
public void testDecomposition5() throws Exception {
|
||||
assertAnalyzesTo(analyzer, "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ",
|
||||
new String[] { "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ", "くよくよ" },
|
||||
new int[] { 0, 4, 8, 12, 16, 20, 24, 28, 32, 36},
|
||||
new int[] { 4, 8, 12, 16, 20, 24, 28, 32, 36, 40 }
|
||||
);
|
||||
TokenStream ts = analyzer.tokenStream("bogus", new StringReader("くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ"));
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
|
||||
}
|
||||
ts.end();
|
||||
ts.close();
|
||||
}
|
||||
|
||||
/** Tests that sentence offset is incorporated into the resulting offsets */
|
||||
|
|
Loading…
Reference in New Issue