From c0319d592814dd0976862dcb4a6d941489b9637f Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 8 Feb 2012 16:27:47 +0000 Subject: [PATCH] SOLR-3056: document expectations in these files git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1241960 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/resources/org/apache/lucene/analysis/ar/stopwords.txt | 2 ++ .../src/resources/org/apache/lucene/analysis/el/stopwords.txt | 2 ++ .../src/resources/org/apache/lucene/analysis/fa/stopwords.txt | 2 ++ .../src/resources/org/apache/lucene/analysis/hi/stopwords.txt | 4 ++++ solr/example/solr/conf/lang/stopwords_ar.txt | 2 ++ solr/example/solr/conf/lang/stopwords_el.txt | 2 ++ solr/example/solr/conf/lang/stopwords_fa.txt | 2 ++ solr/example/solr/conf/lang/stopwords_hi.txt | 4 ++++ 8 files changed, 20 insertions(+) diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt index 21897849280..046829db6a2 100644 --- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt @@ -2,6 +2,8 @@ # See http://members.unine.ch/jacques.savoy/clef/index.html. # Also see http://www.opensource.org/licenses/bsd-license.html # Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا من ومن منها diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt index 1a08d318326..232681f5bd6 100644 --- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/el/stopwords.txt @@ -1,4 +1,6 @@ # Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' ο η το diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt index 36182817b40..723641c6da7 100644 --- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt @@ -1,6 +1,8 @@ # This file was created by Jacques Savoy and is distributed under the BSD license. # See http://members.unine.ch/jacques.savoy/clef/index.html. # Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' انان نداشته سراسر diff --git a/modules/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt b/modules/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt index 53874db03d2..86286bb083b 100644 --- a/modules/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt +++ b/modules/analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt @@ -1,6 +1,10 @@ # Also see http://www.opensource.org/licenses/bsd-license.html # See http://members.unine.ch/jacques.savoy/clef/index.html. # This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. अंदर अत अपना diff --git a/solr/example/solr/conf/lang/stopwords_ar.txt b/solr/example/solr/conf/lang/stopwords_ar.txt index 21897849280..046829db6a2 100644 --- a/solr/example/solr/conf/lang/stopwords_ar.txt +++ b/solr/example/solr/conf/lang/stopwords_ar.txt @@ -2,6 +2,8 @@ # See http://members.unine.ch/jacques.savoy/clef/index.html. # Also see http://www.opensource.org/licenses/bsd-license.html # Cleaned on October 11, 2009 (not normalized, so use before normalization) +# This means that when modifying this list, you might need to add some +# redundant entries, for example containing forms with both أ and ا من ومن منها diff --git a/solr/example/solr/conf/lang/stopwords_el.txt b/solr/example/solr/conf/lang/stopwords_el.txt index 1a08d318326..232681f5bd6 100644 --- a/solr/example/solr/conf/lang/stopwords_el.txt +++ b/solr/example/solr/conf/lang/stopwords_el.txt @@ -1,4 +1,6 @@ # Lucene Greek Stopwords list +# Note: by default this file is used after GreekLowerCaseFilter, +# so when modifying this file use 'σ' instead of 'ς' ο η το diff --git a/solr/example/solr/conf/lang/stopwords_fa.txt b/solr/example/solr/conf/lang/stopwords_fa.txt index 36182817b40..723641c6da7 100644 --- a/solr/example/solr/conf/lang/stopwords_fa.txt +++ b/solr/example/solr/conf/lang/stopwords_fa.txt @@ -1,6 +1,8 @@ # This file was created by Jacques Savoy and is distributed under the BSD license. # See http://members.unine.ch/jacques.savoy/clef/index.html. # Also see http://www.opensource.org/licenses/bsd-license.html +# Note: by default this file is used after normalization, so when adding entries +# to this file, use the arabic 'ي' instead of 'ی' انان نداشته سراسر diff --git a/solr/example/solr/conf/lang/stopwords_hi.txt b/solr/example/solr/conf/lang/stopwords_hi.txt index 53874db03d2..86286bb083b 100644 --- a/solr/example/solr/conf/lang/stopwords_hi.txt +++ b/solr/example/solr/conf/lang/stopwords_hi.txt @@ -1,6 +1,10 @@ # Also see http://www.opensource.org/licenses/bsd-license.html # See http://members.unine.ch/jacques.savoy/clef/index.html. # This file was created by Jacques Savoy and is distributed under the BSD license. +# Note: by default this file also contains forms normalized by HindiNormalizer +# for spelling variation (see section below), such that it can be used whether or +# not you enable that feature. When adding additional entries to this list, +# please add the normalized form as well. अंदर अत अपना