snowball touchups

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2004-01-23 12:05:03 +00:00
parent b3c97c801e
commit 0d43020b12
3 changed files with 62 additions and 58 deletions

View File

@ -1,4 +1,3 @@
package net.sf.snowball;
import java.lang.reflect.Method;
@ -13,54 +12,62 @@ import java.io.OutputStream;
import java.io.FileOutputStream;
public class TestApp {
public static void main(String [] args) throws Throwable {
Class stemClass = Class.forName("net.sf.snowball.ext." +
args[0] + "Stemmer");
SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
Method stemMethod = stemClass.getMethod("stem", new Class[0]);
public static void main(String[] args) throws Throwable {
Reader reader;
reader = new InputStreamReader(new FileInputStream(args[1]));
reader = new BufferedReader(reader);
StringBuffer input = new StringBuffer();
OutputStream outstream;
if (args.length > 2 && args[2].equals("-o")) {
outstream = new FileOutputStream(args[3]);
} else if (args.length == 2) {
System.err.println("Usage: TestApp <input file> [-o <output file>]");
return;
} else {
outstream = System.out;
}
Writer output = new OutputStreamWriter(outstream);
output = new BufferedWriter(output);
int repeat = 1;
if (args.length > 4) {
repeat = Integer.parseInt(args[4]);
}
Object [] emptyArgs = new Object[0];
int character;
while ((character = reader.read()) != -1) {
char ch = (char) character;
if (Character.isWhitespace((char) ch)) {
if (input.length() > 0) {
stemmer.setCurrent(input.toString());
for (int i = repeat; i != 0; i--) {
stemMethod.invoke(stemmer, emptyArgs);
}
output.write(stemmer.getCurrent());
output.write('\n');
input.delete(0, input.length());
}
} else {
input.append(Character.toLowerCase(ch));
}
}
output.flush();
if (args.length < 2) {
exitWithUsage();
}
Class stemClass = Class.forName("net.sf.snowball.ext." +
args[0] + "Stemmer");
SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
Method stemMethod = stemClass.getMethod("stem", new Class[0]);
Reader reader;
reader = new InputStreamReader(new FileInputStream(args[1]));
reader = new BufferedReader(reader);
StringBuffer input = new StringBuffer();
OutputStream outstream = System.out;
if (args.length > 2 && args[2].equals("-o")) {
outstream = new FileOutputStream(args[3]);
} else if (args.length > 2) {
exitWithUsage();
}
Writer output = new OutputStreamWriter(outstream);
output = new BufferedWriter(output);
int repeat = 1;
if (args.length > 4) {
repeat = Integer.parseInt(args[4]);
}
Object[] emptyArgs = new Object[0];
int character;
while ((character = reader.read()) != -1) {
char ch = (char) character;
if (Character.isWhitespace(ch)) {
if (input.length() > 0) {
stemmer.setCurrent(input.toString());
for (int i = repeat; i != 0; i--) {
stemMethod.invoke(stemmer, emptyArgs);
}
output.write(stemmer.getCurrent());
output.write('\n');
input.delete(0, input.length());
}
} else {
input.append(Character.toLowerCase(ch));
}
}
output.flush();
}
private static void exitWithUsage() {
System.err.println("Usage: TestApp <stemmer name> <input file> [-o <output file>]");
System.exit(1);
}
}

View File

@ -3,7 +3,7 @@ package org.apache.lucene.analysis.snowball;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* Copyright (c) 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -89,6 +89,7 @@ public class SnowballFilter extends TokenFilter {
Class stemClass =
Class.forName("net.sf.snowball.ext." + name + "Stemmer");
stemmer = (SnowballProgram) stemClass.newInstance();
// why doesn't the SnowballProgram class have an (abstract?) stem method?
stemMethod = stemClass.getMethod("stem", new Class[0]);
} catch (Exception e) {
throw new RuntimeException(e.toString());

View File

@ -3,7 +3,7 @@ package org.apache.lucene.analysis.snowball;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* Copyright (c) 2004 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -61,12 +61,8 @@ import org.apache.lucene.analysis.*;
public class TestSnowball extends TestCase {
public TestSnowball(String name) {
super(name);
}
public void assertAnalyzesTo(Analyzer a,
String input,
public void assertAnalyzesTo(Analyzer a,
String input,
String[] output) throws Exception {
TokenStream ts = a.tokenStream("dummy", new StringReader(input));
for (int i=0; i<output.length; i++) {
@ -80,7 +76,7 @@ public class TestSnowball extends TestCase {
public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents",
assertAnalyzesTo(a, "he abhorred accents",
new String[] { "he", "abhor", "accent" });
}
}