mirror of https://github.com/apache/lucene.git
convert to utf-8
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@151009 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e7ae41ce23
commit
5ae03e4587
|
@ -66,7 +66,7 @@ import java.util.Set;
|
|||
/**
|
||||
* Based on (copied) the GermanStemFilter
|
||||
*
|
||||
* @author João Kramer
|
||||
* @author João Kramer
|
||||
* <p/>
|
||||
* <p/>
|
||||
* A filter that stemms german words. It supports a table of words that should
|
||||
|
|
|
@ -56,7 +56,7 @@ package org.apache.lucene.analysis.br;
|
|||
|
||||
/**
|
||||
* A stemmer for brazilian words. The algorithm is based on the report
|
||||
* "A Fast and Simple Stemming Algorithm for German Words" by Jörg
|
||||
* "A Fast and Simple Stemming Algorithm for German Words" by Jörg
|
||||
* Caumanns (joerg.caumanns@isst.fhg.de).
|
||||
*
|
||||
* @author Gerhard Schwarz
|
||||
|
@ -282,8 +282,8 @@ public class BrazilianStemmer {
|
|||
/**
|
||||
* 1) Turn to lowercase
|
||||
* 2) Remove accents
|
||||
* 3) ã -> a ; õ -> o
|
||||
* 4) ç -> c
|
||||
* 3) ã -> a ; õ -> o
|
||||
* 4) ç -> c
|
||||
*
|
||||
* @return null or a string transformed
|
||||
*/
|
||||
|
@ -299,31 +299,31 @@ public class BrazilianStemmer {
|
|||
|
||||
value = value.toLowerCase() ;
|
||||
for (j=0 ; j < value.length() ; j++) {
|
||||
if ((value.charAt(j) == 'á') ||
|
||||
(value.charAt(j) == 'â') ||
|
||||
(value.charAt(j) == 'ã')) {
|
||||
if ((value.charAt(j) == 'á') ||
|
||||
(value.charAt(j) == 'â') ||
|
||||
(value.charAt(j) == 'ã')) {
|
||||
r= r + "a" ; continue ;
|
||||
}
|
||||
if ((value.charAt(j) == 'é') ||
|
||||
(value.charAt(j) == 'ê')) {
|
||||
if ((value.charAt(j) == 'é') ||
|
||||
(value.charAt(j) == 'ê')) {
|
||||
r= r + "e" ; continue ;
|
||||
}
|
||||
if (value.charAt(j) == 'í') {
|
||||
if (value.charAt(j) == 'í') {
|
||||
r= r + "i" ; continue ;
|
||||
}
|
||||
if ((value.charAt(j) == 'ó') ||
|
||||
(value.charAt(j) == 'ô') ||
|
||||
(value.charAt(j) == 'õ')) {
|
||||
if ((value.charAt(j) == 'ó') ||
|
||||
(value.charAt(j) == 'ô') ||
|
||||
(value.charAt(j) == 'õ')) {
|
||||
r= r + "o" ; continue ;
|
||||
}
|
||||
if ((value.charAt(j) == 'ú') ||
|
||||
(value.charAt(j) == 'ü')) {
|
||||
if ((value.charAt(j) == 'ú') ||
|
||||
(value.charAt(j) == 'ü')) {
|
||||
r= r + "u" ; continue ;
|
||||
}
|
||||
if (value.charAt(j) == 'ç') {
|
||||
if (value.charAt(j) == 'ç') {
|
||||
r= r + "c" ; continue ;
|
||||
}
|
||||
if (value.charAt(j) == 'ñ') {
|
||||
if (value.charAt(j) == 'ñ') {
|
||||
r= r + "n" ; continue ;
|
||||
}
|
||||
|
||||
|
@ -410,7 +410,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
|
||||
* Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
|
||||
*/
|
||||
private void createCT( String term ) {
|
||||
CT = changeTerm(term) ;
|
||||
|
@ -1008,7 +1008,7 @@ public class BrazilianStemmer {
|
|||
/**
|
||||
* Residual suffix
|
||||
*
|
||||
* If the word ends with one of the suffixes (os a i o á í ó)
|
||||
* If the word ends with one of the suffixes (os a i o á í ó)
|
||||
* in RV, delete it
|
||||
*
|
||||
*/
|
||||
|
@ -1031,11 +1031,11 @@ public class BrazilianStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* If the word ends with one of ( e é ê) in RV,delete it,
|
||||
* If the word ends with one of ( e é ê) in RV,delete it,
|
||||
* and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
|
||||
* delete the 'u' (or 'i')
|
||||
*
|
||||
* Or if the word ends ç remove the cedilha
|
||||
* Or if the word ends ç remove the cedilha
|
||||
*
|
||||
*/
|
||||
private void step5() {
|
||||
|
|
Loading…
Reference in New Issue