mirror of https://github.com/apache/lucene.git
snowball touchups
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150935 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b3c97c801e
commit
0d43020b12
|
@ -1,4 +1,3 @@
|
|||
|
||||
package net.sf.snowball;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
|
@ -13,54 +12,62 @@ import java.io.OutputStream;
|
|||
import java.io.FileOutputStream;
|
||||
|
||||
public class TestApp {
|
||||
public static void main(String [] args) throws Throwable {
|
||||
Class stemClass = Class.forName("net.sf.snowball.ext." +
|
||||
args[0] + "Stemmer");
|
||||
SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
|
||||
Method stemMethod = stemClass.getMethod("stem", new Class[0]);
|
||||
public static void main(String[] args) throws Throwable {
|
||||
|
||||
Reader reader;
|
||||
reader = new InputStreamReader(new FileInputStream(args[1]));
|
||||
reader = new BufferedReader(reader);
|
||||
|
||||
StringBuffer input = new StringBuffer();
|
||||
|
||||
OutputStream outstream;
|
||||
|
||||
if (args.length > 2 && args[2].equals("-o")) {
|
||||
outstream = new FileOutputStream(args[3]);
|
||||
} else if (args.length == 2) {
|
||||
System.err.println("Usage: TestApp <input file> [-o <output file>]");
|
||||
return;
|
||||
} else {
|
||||
outstream = System.out;
|
||||
}
|
||||
Writer output = new OutputStreamWriter(outstream);
|
||||
output = new BufferedWriter(output);
|
||||
|
||||
int repeat = 1;
|
||||
if (args.length > 4) {
|
||||
repeat = Integer.parseInt(args[4]);
|
||||
}
|
||||
|
||||
Object [] emptyArgs = new Object[0];
|
||||
int character;
|
||||
while ((character = reader.read()) != -1) {
|
||||
char ch = (char) character;
|
||||
if (Character.isWhitespace((char) ch)) {
|
||||
if (input.length() > 0) {
|
||||
stemmer.setCurrent(input.toString());
|
||||
for (int i = repeat; i != 0; i--) {
|
||||
stemMethod.invoke(stemmer, emptyArgs);
|
||||
}
|
||||
output.write(stemmer.getCurrent());
|
||||
output.write('\n');
|
||||
input.delete(0, input.length());
|
||||
}
|
||||
} else {
|
||||
input.append(Character.toLowerCase(ch));
|
||||
}
|
||||
}
|
||||
output.flush();
|
||||
if (args.length < 2) {
|
||||
exitWithUsage();
|
||||
}
|
||||
|
||||
Class stemClass = Class.forName("net.sf.snowball.ext." +
|
||||
args[0] + "Stemmer");
|
||||
SnowballProgram stemmer = (SnowballProgram) stemClass.newInstance();
|
||||
Method stemMethod = stemClass.getMethod("stem", new Class[0]);
|
||||
|
||||
Reader reader;
|
||||
reader = new InputStreamReader(new FileInputStream(args[1]));
|
||||
reader = new BufferedReader(reader);
|
||||
|
||||
StringBuffer input = new StringBuffer();
|
||||
|
||||
OutputStream outstream = System.out;
|
||||
|
||||
if (args.length > 2 && args[2].equals("-o")) {
|
||||
outstream = new FileOutputStream(args[3]);
|
||||
} else if (args.length > 2) {
|
||||
exitWithUsage();
|
||||
}
|
||||
|
||||
Writer output = new OutputStreamWriter(outstream);
|
||||
output = new BufferedWriter(output);
|
||||
|
||||
int repeat = 1;
|
||||
if (args.length > 4) {
|
||||
repeat = Integer.parseInt(args[4]);
|
||||
}
|
||||
|
||||
Object[] emptyArgs = new Object[0];
|
||||
int character;
|
||||
while ((character = reader.read()) != -1) {
|
||||
char ch = (char) character;
|
||||
if (Character.isWhitespace(ch)) {
|
||||
if (input.length() > 0) {
|
||||
stemmer.setCurrent(input.toString());
|
||||
for (int i = repeat; i != 0; i--) {
|
||||
stemMethod.invoke(stemmer, emptyArgs);
|
||||
}
|
||||
output.write(stemmer.getCurrent());
|
||||
output.write('\n');
|
||||
input.delete(0, input.length());
|
||||
}
|
||||
} else {
|
||||
input.append(Character.toLowerCase(ch));
|
||||
}
|
||||
}
|
||||
output.flush();
|
||||
}
|
||||
|
||||
private static void exitWithUsage() {
|
||||
System.err.println("Usage: TestApp <stemmer name> <input file> [-o <output file>]");
|
||||
System.exit(1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.lucene.analysis.snowball;
|
|||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* Copyright (c) 2004 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -89,6 +89,7 @@ public class SnowballFilter extends TokenFilter {
|
|||
Class stemClass =
|
||||
Class.forName("net.sf.snowball.ext." + name + "Stemmer");
|
||||
stemmer = (SnowballProgram) stemClass.newInstance();
|
||||
// why doesn't the SnowballProgram class have an (abstract?) stem method?
|
||||
stemMethod = stemClass.getMethod("stem", new Class[0]);
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e.toString());
|
||||
|
|
|
@ -3,7 +3,7 @@ package org.apache.lucene.analysis.snowball;
|
|||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* Copyright (c) 2004 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -61,12 +61,8 @@ import org.apache.lucene.analysis.*;
|
|||
|
||||
public class TestSnowball extends TestCase {
|
||||
|
||||
public TestSnowball(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public void assertAnalyzesTo(Analyzer a,
|
||||
String input,
|
||||
public void assertAnalyzesTo(Analyzer a,
|
||||
String input,
|
||||
String[] output) throws Exception {
|
||||
TokenStream ts = a.tokenStream("dummy", new StringReader(input));
|
||||
for (int i=0; i<output.length; i++) {
|
||||
|
@ -80,7 +76,7 @@ public class TestSnowball extends TestCase {
|
|||
|
||||
public void testEnglish() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer("English");
|
||||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
new String[] { "he", "abhor", "accent" });
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue