diff --git a/CHANGES.txt b/CHANGES.txt index 81bed138c26..27327b9149e 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,10 @@ $Id$ methods to replace a PhraseQuery with a SpanNearQuery instead, keeping the proper slop factor. (Erik Hatcher) + 4. Changed the encoding of GermanAnalyzer.java and GermanStemmer.java to + UTF-8 and changed the build encoding to UTF-8, to make changed files + compile. (Otis Gospodnetic) + 1.4 RC3 diff --git a/build.xml b/build.xml index 0e0d61dd35b..a93a27ef2bb 100644 --- a/build.xml +++ b/build.xml @@ -23,7 +23,7 @@ - + diff --git a/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java index 386987d269b..1277430d165 100644 --- a/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java +++ b/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java @@ -47,14 +47,14 @@ public class GermanAnalyzer extends Analyzer { */ private String[] GERMAN_STOP_WORDS = { "einer", "eine", "eines", "einem", "einen", - "der", "die", "das", "dass", "daß", + "der", "die", "das", "dass", "daß", "du", "er", "sie", "es", "was", "wer", "wie", "wir", "und", "oder", "ohne", "mit", "am", "im", "in", "aus", "auf", "ist", "sein", "war", "wird", "ihr", "ihre", "ihres", - "als", "für", "von", "mit", + "als", "für", "von", "mit", "dich", "dir", "mich", "mir", "mein", "sein", "kein", "durch", "wegen", "wird" diff --git a/src/java/org/apache/lucene/analysis/de/GermanStemmer.java b/src/java/org/apache/lucene/analysis/de/GermanStemmer.java index c5a1cd04cdc..b8619816034 100644 --- a/src/java/org/apache/lucene/analysis/de/GermanStemmer.java +++ b/src/java/org/apache/lucene/analysis/de/GermanStemmer.java @@ -18,7 +18,7 @@ package org.apache.lucene.analysis.de; /** * A stemmer for German words. The algorithm is based on the report - * "A Fast and Simple Stemming Algorithm for German Words" by Jörg + * "A Fast and Simple Stemming Algorithm for German Words" by Jörg * Caumanns (joerg.caumanns@isst.fhg.de). * * @author Gerhard Schwarz @@ -153,12 +153,12 @@ public class GermanStemmer /** * Do some substitutions for the term to reduce overstemming: * - * - Substitute Umlauts with their corresponding vowel: äöü -> aou, - * "ß" is substituted by "ss" + * - Substitute Umlauts with their corresponding vowel: äöü -> aou, + * "ß" is substituted by "ss" * - Substitute a second char of a pair of equal characters with * an asterisk: ?? -> ?* * - Substitute some common character combinations with a token: - * sch/ch/ei/ie/ig/st -> $/§/%/&/#/! + * sch/ch/ei/ie/ig/st -> $/§/%/&/#/! */ private void substitute( StringBuffer buffer ) { @@ -169,18 +169,18 @@ public class GermanStemmer buffer.setCharAt( c, '*' ); } // Substitute Umlauts. - else if ( buffer.charAt( c ) == 'ä' ) { + else if ( buffer.charAt( c ) == 'ä' ) { buffer.setCharAt( c, 'a' ); } - else if ( buffer.charAt( c ) == 'ö' ) { + else if ( buffer.charAt( c ) == 'ö' ) { buffer.setCharAt( c, 'o' ); } - else if ( buffer.charAt( c ) == 'ü' ) { + else if ( buffer.charAt( c ) == 'ü' ) { buffer.setCharAt( c, 'u' ); } // Take care that at least one character is left left side from the current one if ( c < buffer.length() - 1 ) { - if ( buffer.charAt( c ) == 'ß' ) { + if ( buffer.charAt( c ) == 'ß' ) { buffer.setCharAt( c, 's' ); buffer.insert( c + 1, 's' ); substCount++; @@ -194,7 +194,7 @@ public class GermanStemmer substCount =+ 2; } else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) { - buffer.setCharAt( c, '§' ); + buffer.setCharAt( c, '§' ); buffer.deleteCharAt( c + 1 ); substCount++; } @@ -225,7 +225,7 @@ public class GermanStemmer /** * Undoes the changes made by substitute(). That are character pairs and * character combinations. Umlauts will remain as their corresponding vowel, - * as "ß" remains as "ss". + * as "ß" remains as "ss". */ private void resubstitute( StringBuffer buffer ) { @@ -238,7 +238,7 @@ public class GermanStemmer buffer.setCharAt( c, 's' ); buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 ); } - else if ( buffer.charAt( c ) == '§' ) { + else if ( buffer.charAt( c ) == '§' ) { buffer.setCharAt( c, 'c' ); buffer.insert( c + 1, 'h' ); } diff --git a/src/test/org/apache/lucene/queryParser/TestQueryParser.java b/src/test/org/apache/lucene/queryParser/TestQueryParser.java index 32d83d66f8f..c3519bb7ef7 100644 --- a/src/test/org/apache/lucene/queryParser/TestQueryParser.java +++ b/src/test/org/apache/lucene/queryParser/TestQueryParser.java @@ -159,8 +159,8 @@ public class TestQueryParser extends TestCase { public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); - assertQueryEquals("türm term term", null, "türm term term"); - assertQueryEquals("ümlaut", null, "ümlaut"); + assertQueryEquals("türm term term", null, "türm term term"); + assertQueryEquals("ümlaut", null, "ümlaut"); assertQueryEquals("a AND b", null, "+a +b"); assertQueryEquals("(a AND b)", null, "+a +b");