merge up to lucene trunk rev 924781

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/branches/newtrunk@924935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-03-18 18:22:18 +00:00
parent 097709a317
commit 0d10770063
3 changed files with 15 additions and 17 deletions

View File

@ -54,7 +54,7 @@
<patternset id="src.dist.patterns"
includes="src/,build.xml,*build*.xml,docs/,*.txt,contrib/,*pom.xml*,lib/"
excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/,contrib/benchmark/temp/,contrib/benchmark/work/,contrib/analyzers/common/src/test/org/apache/lucene/analysis/snowball/data/"
excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/,contrib/benchmark/temp/,contrib/benchmark/work/"
/>
<patternset id="binary.build.dist.patterns"
includes="${final.name}.jar,${demo.war.name}.war,${demo.name}.jar,docs/,contrib/*/*.jar,contrib/*/*.war, contrib/*/*/*.jar"

View File

@ -18,12 +18,11 @@ package org.apache.lucene.analysis.snowball;
*/
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.zip.ZipFile;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.KeywordTokenizer;
@ -35,24 +34,25 @@ import org.apache.lucene.analysis.Tokenizer;
*/
public class TestSnowballVocab extends BaseTokenStreamTestCase {
private Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
File dataRoot = null;
ZipFile zipFile = null;
@Override
protected void setUp() throws Exception {
super.setUp();
try {
dataRoot = getDataFile("data");
} catch (IOException ioe) {
dataRoot = null;
System.err.println("WARN: This test was disabled, as the snowball test files are not available!");
}
this.zipFile = new ZipFile(getDataFile("TestSnowballVocabData.zip"));
}
@Override
protected void tearDown() throws Exception {
this.zipFile.close();
this.zipFile = null;
super.tearDown();
}
/**
* Run all languages against their snowball vocabulary tests.
*/
public void testStemmers() throws IOException {
if (dataRoot == null) return;
assertCorrectOutput("Danish", "danish");
assertCorrectOutput("Dutch", "dutch");
assertCorrectOutput("English", "english");
@ -86,14 +86,12 @@ public class TestSnowballVocab extends BaseTokenStreamTestCase {
throws IOException {
if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
TokenStream filter = new SnowballFilter(tokenizer, snowballLanguage);
InputStream vocFile = new FileInputStream(new File(dataRoot,
dataDirectory + "/voc.txt"));
InputStream outputFile = new FileInputStream(new File(dataRoot,
dataDirectory + "/output.txt"));
InputStream voc = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/voc.txt"));
InputStream out = zipFile.getInputStream(zipFile.getEntry(dataDirectory + "/output.txt"));
BufferedReader vocReader = new BufferedReader(new InputStreamReader(
vocFile, "UTF-8"));
voc, "UTF-8"));
BufferedReader outputReader = new BufferedReader(new InputStreamReader(
outputFile, "UTF-8"));
out, "UTF-8"));
String inputWord = null;
while ((inputWord = vocReader.readLine()) != null) {
String expectedWord = outputReader.readLine();