From 6f614bdf2a2f5e5480c0ea525563612eebe7997b Mon Sep 17 00:00:00 2001 From: Koji Sekiguchi <koji@apache.org> Date: Thu, 24 Feb 2011 02:16:30 +0000 Subject: [PATCH 1/7] LUCENE-2894: add more contribution git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074017 13f79535-47bb-0310-9956-ffa450edef68 --- .../solr/analysis/PathHierarchyTokenizerFactory.java | 7 +++++++ .../analysis/PatternReplaceCharFilterFactory.java | 11 ++++++++++- .../solr/analysis/PatternReplaceFilterFactory.java | 9 +++++++++ .../solr/analysis/PatternTokenizerFactory.java | 8 ++++++++ .../solr/analysis/PersianCharFilterFactory.java | 10 +++++++++- .../analysis/PersianNormalizationFilterFactory.java | 12 +++++++++++- .../apache/solr/analysis/PhoneticFilterFactory.java | 10 ++++++++++ .../solr/analysis/PorterStemFilterFactory.java | 8 ++++++++ .../analysis/PortugueseLightStemFilterFactory.java | 12 +++++++++++- .../analysis/PortugueseMinimalStemFilterFactory.java | 12 +++++++++++- .../solr/analysis/PortugueseStemFilterFactory.java | 12 +++++++++++- .../apache/solr/analysis/PositionFilterFactory.java | 8 ++++++++ .../analysis/RemoveDuplicatesTokenFilterFactory.java | 8 ++++++++ .../solr/analysis/ReverseStringFilterFactory.java | 9 ++++++++- .../solr/analysis/ReversedWildcardFilterFactory.java | 12 ++++++++++++ .../solr/analysis/RussianLightStemFilterFactory.java | 12 +++++++++++- 16 files changed, 152 insertions(+), 8 deletions(-) diff --git a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java index 50380764923..c93db874ec7 100644 --- a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java @@ -24,6 +24,13 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer; /** + * Factory for {@link PathHierarchyTokenizer}. + * <pre class="prettyprint" > + * <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/> + * </analyzer> + * </fieldType></pre> * @version $Id$ */ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java index 48014cb1a04..aff0b9892cc 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java @@ -25,9 +25,18 @@ import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter; /** + * Factory for {@link PatternReplaceCharFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([^a-z])" replacement="" + * maxBlockChars="10000" blockDelimiters="|"/> + * <tokenizer class="solr.KeywordTokenizerFactory"/> + * </analyzer> + * </fieldType></pre> * * @version $Id$ - * @since Solr 1.5 + * @since Solr 3.1 */ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java index e364002b446..039408e208c 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java @@ -24,6 +24,15 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** + * Factory for {@link PatternReplaceFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.KeywordTokenizerFactory"/> + * <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement="" + * replace="all"/> + * </analyzer> + * </fieldType></pre> * @version $Id$ * @see PatternReplaceFilter */ diff --git a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java index d83c97ee3b2..2e748f5fe04 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java @@ -28,6 +28,7 @@ import org.apache.solr.common.SolrException; /** + * Factory for {@link PatternTokenizer}. * This tokenizer uses regex pattern matching to construct distinct tokens * for the input stream. It takes two arguments: "pattern" and "group". * <p/> @@ -52,6 +53,13 @@ import org.apache.solr.common.SolrException; * </p> * <p>NOTE: This Tokenizer does not output tokens that are of zero length.</p> * + * <pre class="prettyprint" > + * <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/> + * </analyzer> + * </fieldType></pre> + * * @see PatternTokenizer * @since solr1.2 * @version $Id$ diff --git a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java index 14098634d9c..f165ce3b954 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java @@ -21,7 +21,15 @@ import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.fa.PersianCharFilter; /** - * Factory for {@link PersianCharFilter} + * Factory for {@link PersianCharFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <charFilter class="solr.PersianCharFilterFactory"/> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ */ public class PersianCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java index 7e095460215..b0b3bffdf05 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java @@ -22,7 +22,17 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.fa.PersianNormalizationFilter; import org.apache.lucene.analysis.TokenStream; -/** Factory for {@link PersianNormalizationFilter} */ +/** + * Factory for {@link PersianNormalizationFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PersianNormalizationFilterFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ + */ public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory { public PersianNormalizationFilter create(TokenStream input) { return new PersianNormalizationFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java index c7763302935..6653ee44b05 100644 --- a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java @@ -33,6 +33,8 @@ import org.apache.lucene.analysis.phonetic.PhoneticFilter; import org.apache.solr.common.SolrException; /** + * Factory for {@link PhoneticFilter}. + * * Create tokens based on phonetic encoders * * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html @@ -41,6 +43,14 @@ import org.apache.solr.common.SolrException; * "encoder" required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex" * * "inject" (default=true) add tokens to the stream with the offset=0 + * + * <pre class="prettyprint" > + * <fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PhoneticFilterFactory" encoder="DoubleMetaphone" inject="true"/> + * </analyzer> + * </fieldType></pre> * * @version $Id$ * @see PhoneticFilter diff --git a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java index 2c72a79b763..ff4e1d0a8b1 100644 --- a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java @@ -21,6 +21,14 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.PorterStemFilter; /** + * Factory for {@link PorterStemFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PorterStemFilterFactory"/> + * </analyzer> + * </fieldType></pre> * @version $Id$ */ public class PorterStemFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java index 50ec45a58c2..d11e8fba894 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java @@ -20,7 +20,17 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; -/** Factory for {@link PortugueseLightStemFilter} */ +/** + * Factory for {@link PortugueseLightStemFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PortugueseLightStemFilterFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ + */ public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseLightStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java index 60039a7af40..5b70e764673 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java @@ -20,7 +20,17 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; -/** Factory for {@link PortugueseMinimalStemFilter} */ +/** + * Factory for {@link PortugueseMinimalStemFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PortugueseMinimalStemFilterFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ + */ public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseMinimalStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java index be397e33623..3d3e20c56ff 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java @@ -20,7 +20,17 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseStemFilter; -/** Factory for {@link PortugueseStemFilter} */ +/** + * Factory for {@link PortugueseStemFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PortugueseStemFilterFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ + */ public class PortugueseStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java index ab1fb48c740..7f6a01fe02e 100644 --- a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java @@ -23,8 +23,16 @@ import org.apache.lucene.analysis.position.PositionFilter; import java.util.Map; /** + * Factory for {@link PositionFilter}. * Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its * original positionIncrement value. The default positionIncrement value is zero. + * <pre class="prettyprint" > + * <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.PositionFilterFactory" positionIncrement="0"/> + * </analyzer> + * </fieldType></pre> * * @version $Id$ * @see org.apache.lucene.analysis.position.PositionFilter diff --git a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java index 2ee613113d7..4b14f33b6fd 100644 --- a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java @@ -21,6 +21,14 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; /** + * Factory for {@link RemoveDuplicatesTokenFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.RemoveDuplicatesTokenFilterFactory"/> + * </analyzer> + * </fieldType></pre> * @version $Id$ */ public class RemoveDuplicatesTokenFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java index 7ebba14bd56..103a694b506 100644 --- a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java @@ -21,7 +21,14 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.reverse.ReverseStringFilter; /** - * A FilterFactory which reverses the input. + * Factory for {@link ReverseStringFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.ReverseStringFilterFactory"/> + * </analyzer> + * </fieldType></pre> * * @version $Id$ * @since solr 1.4 diff --git a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java index 85a16dfc646..b4c1b7bb538 100644 --- a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java @@ -48,6 +48,18 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter; * </ul> * Note 1: This filter always reverses input tokens during indexing. * Note 2: Query tokens without wildcard characters will never be reversed. + * <pre class="prettyprint" > + * <fieldType name="text_rvswc" class="solr.TextField" positionIncrementGap="100"> + * <analyzer type="index"> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true" + * maxPosAsterisk="2" maxPosQuestion="1" minTrailing="2" maxFractionAsterisk="0"/> + * </analyzer> + * <analyzer type="query"> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ */ public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java index fc9e301b64d..b6677a92019 100644 --- a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java @@ -20,7 +20,17 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ru.RussianLightStemFilter; -/** Factory for {@link RussianLightStemFilter} */ +/** + * Factory for {@link RussianLightStemFilter}. + * <pre class="prettyprint" > + * <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <filter class="solr.RussianLightStemFilterFactory"/> + * </analyzer> + * </fieldType></pre> + * @version $Id$ + */ public class RussianLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new RussianLightStemFilter(input); From 12c4c4e987ba8150918304d500e6c260792e9154 Mon Sep 17 00:00:00 2001 From: Steven Rowe <sarowe@apache.org> Date: Thu, 24 Feb 2011 13:02:44 +0000 Subject: [PATCH 2/7] LUCENE-2611: switched the code style file from a manually configured global style to an already-setup per-project code style; adjusted code style continuation indents; added groovy code style support. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074130 13f79535-47bb-0310-9956-ffa450edef68 --- build.xml | 55 ++------------------ dev-tools/idea/.idea/projectCodeStyle.xml | 54 +++++++++++++++++++ dev-tools/idea/Intellij-Lucene-Codestyle.xml | 48 ----------------- 3 files changed, 57 insertions(+), 100 deletions(-) create mode 100644 dev-tools/idea/.idea/projectCodeStyle.xml delete mode 100644 dev-tools/idea/Intellij-Lucene-Codestyle.xml diff --git a/build.xml b/build.xml index 05d3df35af1..ca4a3105ff4 100644 --- a/build.xml +++ b/build.xml @@ -83,19 +83,9 @@ </echo> </target> - <condition property="isMac"><os family="mac"/></condition> - <condition property="isUnix"><os family="unix"/></condition> - <condition property="isWindows"><os family="windows"/></condition> - <property environment="env"/> - <target name="idea" depends="copy-idea-files,finish-idea-setup-windows, - finish-idea-setup-mac, - finish-idea-setup-unix" - description="Setup IntelliJ IDEA configuration files"/> - <target name="copy-idea-files"> + <target name="idea" description="Setup IntelliJ IDEA configuration"> <copy todir="."> - <fileset dir="dev-tools/idea"> - <exclude name="Intellij-Lucene-Codestyle.xml"/> - </fileset> + <fileset dir="dev-tools/idea"/> </copy> <echo>Running Lucene contrib db/bdb-je task 'get-je-jar' ...</echo> <subant target="get-je-jar"> @@ -105,57 +95,18 @@ <subant target="get-db-jar"> <fileset dir="lucene/contrib/db/bdb" includes="build.xml" /> </subant> - </target> - <target name="finish-idea-setup-windows" if="isWindows"> <echo> - To install the Lucene/Solr codestyle file, copy - dev-tools\idea\Intellij-Lucene-Codestyle.xml to - ${env.HOMEDRIVE}${env.HOMEPATH}\.IntelliJIdeaXX\config\codestyles\ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - To complete IntelliJ IDEA setup, you must manually configure Project Structure | Project | Project SDK. </echo> </target> - <target name="finish-idea-setup-mac" if="isMac"> - <echo> - To install the Lucene/Solr codestyle file, copy - dev-tools/idea/Intellij-Lucene-Codestyle.xml to - ~/Library/Preferences/IntelliJXX/codestyles/ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - - To complete IntelliJ IDEA setup, you must manually configure - Project Structure | Project | Project SDK. - </echo> - </target> - <target name="finish-idea-setup-unix" if="isUnix"> - <echo> - To install the Lucene/Solr codestyle file, copy - dev-tools/idea/Intellij-Lucene-Codestyle.xml to - ~/.IntelliJIdeaXX/config/codestyles/ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - - To complete IntelliJ IDEA setup, you must manually configure - Project Structure | Project | Project SDK. - </echo> - </target> - <target name="clean-idea" + <target name="clean-idea" description="Removes all IntelliJ IDEA configuration files"> <delete dir=".idea" failonerror="true"/> <delete failonerror="true"> <fileset dir="." includes="*.iml,*.ipr,*.iws"/> <fileset dir="solr" includes="**/*.iml"/> <fileset dir="lucene" includes="**/*.iml"/> - <fileset dir="modules" includes="**/*.iml"/> </delete> </target> diff --git a/dev-tools/idea/.idea/projectCodeStyle.xml b/dev-tools/idea/.idea/projectCodeStyle.xml new file mode 100644 index 00000000000..e6ba7b64af2 --- /dev/null +++ b/dev-tools/idea/.idea/projectCodeStyle.xml @@ -0,0 +1,54 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project version="4"> + <component name="CodeStyleSettingsManager"> + <option name="PER_PROJECT_SETTINGS"> + <value> + <option name="USE_SAME_INDENTS" value="true" /> + <option name="OTHER_INDENT_OPTIONS"> + <value> + <option name="INDENT_SIZE" value="2" /> + <option name="CONTINUATION_INDENT_SIZE" value="4" /> + <option name="TAB_SIZE" value="2" /> + <option name="USE_TAB_CHARACTER" value="false" /> + <option name="SMART_TABS" value="false" /> + <option name="LABEL_INDENT_SIZE" value="0" /> + <option name="LABEL_INDENT_ABSOLUTE" value="false" /> + <option name="USE_RELATIVE_INDENTS" value="false" /> + </value> + </option> + <ADDITIONAL_INDENT_OPTIONS fileType="groovy"> + <option name="INDENT_SIZE" value="2" /> + <option name="CONTINUATION_INDENT_SIZE" value="4" /> + <option name="TAB_SIZE" value="2" /> + <option name="USE_TAB_CHARACTER" value="false" /> + <option name="SMART_TABS" value="false" /> + <option name="LABEL_INDENT_SIZE" value="0" /> + <option name="LABEL_INDENT_ABSOLUTE" value="false" /> + <option name="USE_RELATIVE_INDENTS" value="false" /> + </ADDITIONAL_INDENT_OPTIONS> + <ADDITIONAL_INDENT_OPTIONS fileType="java"> + <option name="INDENT_SIZE" value="2" /> + <option name="CONTINUATION_INDENT_SIZE" value="4" /> + <option name="TAB_SIZE" value="2" /> + <option name="USE_TAB_CHARACTER" value="false" /> + <option name="SMART_TABS" value="false" /> + <option name="LABEL_INDENT_SIZE" value="0" /> + <option name="LABEL_INDENT_ABSOLUTE" value="false" /> + <option name="USE_RELATIVE_INDENTS" value="false" /> + </ADDITIONAL_INDENT_OPTIONS> + <ADDITIONAL_INDENT_OPTIONS fileType="xml"> + <option name="INDENT_SIZE" value="2" /> + <option name="CONTINUATION_INDENT_SIZE" value="4" /> + <option name="TAB_SIZE" value="2" /> + <option name="USE_TAB_CHARACTER" value="false" /> + <option name="SMART_TABS" value="false" /> + <option name="LABEL_INDENT_SIZE" value="0" /> + <option name="LABEL_INDENT_ABSOLUTE" value="false" /> + <option name="USE_RELATIVE_INDENTS" value="false" /> + </ADDITIONAL_INDENT_OPTIONS> + </value> + </option> + <option name="USE_PER_PROJECT_SETTINGS" value="true" /> + </component> +</project> + diff --git a/dev-tools/idea/Intellij-Lucene-Codestyle.xml b/dev-tools/idea/Intellij-Lucene-Codestyle.xml deleted file mode 100644 index 279bf5346e0..00000000000 --- a/dev-tools/idea/Intellij-Lucene-Codestyle.xml +++ /dev/null @@ -1,48 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<code_scheme name="Lucene" parent="Default"> - <option name="JAVA_INDENT_OPTIONS"> - <value> - <option name="INDENT_SIZE" value="2" /> - <option name="CONTINUATION_INDENT_SIZE" value="8" /> - <option name="TAB_SIZE" value="2" /> - <option name="USE_TAB_CHARACTER" value="false" /> - <option name="SMART_TABS" value="false" /> - <option name="LABEL_INDENT_SIZE" value="0" /> - <option name="LABEL_INDENT_ABSOLUTE" value="false" /> - </value> - </option> - <option name="JSP_INDENT_OPTIONS"> - <value> - <option name="INDENT_SIZE" value="2" /> - <option name="CONTINUATION_INDENT_SIZE" value="8" /> - <option name="TAB_SIZE" value="2" /> - <option name="USE_TAB_CHARACTER" value="false" /> - <option name="SMART_TABS" value="false" /> - <option name="LABEL_INDENT_SIZE" value="0" /> - <option name="LABEL_INDENT_ABSOLUTE" value="false" /> - </value> - </option> - <option name="XML_INDENT_OPTIONS"> - <value> - <option name="INDENT_SIZE" value="2" /> - <option name="CONTINUATION_INDENT_SIZE" value="8" /> - <option name="TAB_SIZE" value="2" /> - <option name="USE_TAB_CHARACTER" value="false" /> - <option name="SMART_TABS" value="false" /> - <option name="LABEL_INDENT_SIZE" value="0" /> - <option name="LABEL_INDENT_ABSOLUTE" value="false" /> - </value> - </option> - <option name="OTHER_INDENT_OPTIONS"> - <value> - <option name="INDENT_SIZE" value="2" /> - <option name="CONTINUATION_INDENT_SIZE" value="8" /> - <option name="TAB_SIZE" value="2" /> - <option name="USE_TAB_CHARACTER" value="false" /> - <option name="SMART_TABS" value="false" /> - <option name="LABEL_INDENT_SIZE" value="0" /> - <option name="LABEL_INDENT_ABSOLUTE" value="false" /> - </value> - </option> -</code_scheme> - From e315a4a3110f8b84a0d1204d46129f8c9cc6d7ee Mon Sep 17 00:00:00 2001 From: Robert Muir <rmuir@apache.org> Date: Thu, 24 Feb 2011 17:25:40 +0000 Subject: [PATCH 3/7] LUCENE-2894: additional minor improvements to factory examples git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074226 13f79535-47bb-0310-9956-ffa450edef68 --- .../solr/analysis/ArabicNormalizationFilterFactory.java | 2 +- .../org/apache/solr/analysis/ArabicStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/BrazilianStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/BulgarianStemFilterFactory.java | 3 ++- .../java/org/apache/solr/analysis/ClassicFilterFactory.java | 2 +- .../org/apache/solr/analysis/CollationKeyFilterFactory.java | 2 +- .../java/org/apache/solr/analysis/CzechStemFilterFactory.java | 3 ++- .../java/org/apache/solr/analysis/ElisionFilterFactory.java | 3 ++- .../apache/solr/analysis/EnglishMinimalStemFilterFactory.java | 3 ++- .../apache/solr/analysis/EnglishPossessiveFilterFactory.java | 3 ++- .../apache/solr/analysis/FinnishLightStemFilterFactory.java | 3 ++- .../apache/solr/analysis/FrenchLightStemFilterFactory.java | 4 +++- .../apache/solr/analysis/FrenchMinimalStemFilterFactory.java | 4 +++- .../org/apache/solr/analysis/GalicianStemFilterFactory.java | 3 ++- .../apache/solr/analysis/GermanLightStemFilterFactory.java | 3 ++- .../apache/solr/analysis/GermanMinimalStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/GermanStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/GreekLowerCaseFilterFactory.java | 2 +- .../java/org/apache/solr/analysis/GreekStemFilterFactory.java | 3 ++- .../apache/solr/analysis/HindiNormalizationFilterFactory.java | 2 +- .../java/org/apache/solr/analysis/HindiStemFilterFactory.java | 2 +- .../apache/solr/analysis/HungarianLightStemFilterFactory.java | 3 ++- .../apache/solr/analysis/IndicNormalizationFilterFactory.java | 2 +- .../org/apache/solr/analysis/IndonesianStemFilterFactory.java | 3 ++- .../apache/solr/analysis/ItalianLightStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/PersianCharFilterFactory.java | 2 +- .../solr/analysis/PersianNormalizationFilterFactory.java | 3 ++- .../org/apache/solr/analysis/PorterStemFilterFactory.java | 3 ++- .../solr/analysis/PortugueseLightStemFilterFactory.java | 3 ++- .../solr/analysis/PortugueseMinimalStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/PortugueseStemFilterFactory.java | 3 ++- .../apache/solr/analysis/RussianLightStemFilterFactory.java | 3 ++- .../org/apache/solr/analysis/SnowballPorterFilterFactory.java | 3 ++- .../apache/solr/analysis/SpanishLightStemFilterFactory.java | 3 ++- .../java/org/apache/solr/analysis/StandardFilterFactory.java | 2 +- .../apache/solr/analysis/SwedishLightStemFilterFactory.java | 3 ++- .../java/org/apache/solr/analysis/ThaiWordFilterFactory.java | 2 +- .../apache/solr/analysis/TurkishLowerCaseFilterFactory.java | 2 +- 38 files changed, 67 insertions(+), 38 deletions(-) diff --git a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java index 7a3e9830237..f777959aa21 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; * <pre class="prettyprint" > * <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.ArabicNormalizationFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java index b8773019a4c..0cbb097f31c 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter; * <pre class="prettyprint" > * <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.ArabicNormalizationFilterFactory"/> * <filter class="solr.ArabicStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java index 6d96441d312..a6af3dbf9ec 100644 --- a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java @@ -26,7 +26,8 @@ import org.apache.lucene.analysis.br.BrazilianStemFilter; * <pre class="prettyprint" > * <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.BrazilianStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java index 44563df043b..13d323089fe 100644 --- a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.bg.BulgarianStemFilter; * <pre class="prettyprint" > * <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.BulgarianStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java index ff97f64753d..1d8f02c1d11 100644 --- a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.standard.ClassicFilter; * <pre class="prettyprint" > * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.ClassicTokenizerFactory"/> * <filter class="solr.ClassicFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java index 67b84126511..c342d36e0bf 100644 --- a/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java @@ -60,7 +60,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; * <pre class="prettyprint" > * <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.KeywordTokenizerFactory"/> * <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java index 18d3ea77b1b..6229d8ee769 100644 --- a/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.cz.CzechStemFilter; * <pre class="prettyprint" > * <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.CzechStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java index 894ec436e19..027766843bb 100644 --- a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java @@ -32,7 +32,8 @@ import org.apache.lucene.analysis.TokenStream; * <pre class="prettyprint" > * <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java index f64ce24d862..5d2292e9c4a 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; * <pre class="prettyprint" > * <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.EnglishMinimalStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java index 488c822ff43..30b9ea5afae 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter; * <pre class="prettyprint" > * <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.EnglishPossessiveFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java index ebb077bda1b..6d4bf82133c 100644 --- a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.fi.FinnishLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.FinnishLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java index 2e1c16fb6c6..e0325465e86 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.ElisionFilterFactory"/> * <filter class="solr.FrenchLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java index d2381da89bf..753984f122e 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; * <pre class="prettyprint" > * <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.ElisionFilterFactory"/> * <filter class="solr.FrenchMinimalStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java index 844c3f25c76..bb32d502dbe 100644 --- a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.gl.GalicianStemFilter; * <pre class="prettyprint" > * <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.GalicianStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java index 08cb732e9cc..601d12e668c 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.GermanLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java index 5c2f65f69c1..ac145878778 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanMinimalStemFilter; * <pre class="prettyprint" > * <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.GermanMinimalStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java index 34d8aaf5651..c2f3d03dfb8 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java @@ -27,7 +27,8 @@ import org.apache.lucene.analysis.TokenStream; * <pre class="prettyprint" > * <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.GermanStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java index 2c3a043b781..ecd02e2f703 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException.ErrorCode; * <pre class="prettyprint" > * <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.GreekLowerCaseFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java index 2783a7e1995..0a12b04f8a1 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.el.GreekStemFilter; * <pre class="prettyprint" > * <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.GreekLowerCaseFilterFactory"/> * <filter class="solr.GreekStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java index 2770a547582..e4137b83302 100644 --- a/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiNormalizationFilter; * <pre class="prettyprint" > * <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.HindiNormalizationFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java index e54e8c0ce3c..7dd3544015c 100644 --- a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiStemFilter; * <pre class="prettyprint" > * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.HindiStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java index 60a46fbb3ce..0b06fa99ed3 100644 --- a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.HungarianLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java index 7811a0190eb..588946cc15a 100644 --- a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter; * <pre class="prettyprint" > * <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.IndicNormalizationFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java index 1e27f73e8ff..bf3497a01a3 100644 --- a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java @@ -27,7 +27,8 @@ import org.apache.lucene.analysis.id.IndonesianStemFilter; * <pre class="prettyprint" > * <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java index a93412fe05c..1ca00e3090a 100644 --- a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.it.ItalianLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.ItalianLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java index f165ce3b954..8afd32c3ebd 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.fa.PersianCharFilter; * <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100"> * <analyzer> * <charFilter class="solr.PersianCharFilterFactory"/> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * </analyzer> * </fieldType></pre> * @version $Id$ diff --git a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java index b0b3bffdf05..d6e9733ab75 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java @@ -27,7 +27,8 @@ import org.apache.lucene.analysis.TokenStream; * <pre class="prettyprint" > * <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <charFilter class="solr.PersianCharFilterFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.PersianNormalizationFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java index ff4e1d0a8b1..17e9d6cbd55 100644 --- a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.PorterStemFilter; * <pre class="prettyprint" > * <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.PorterStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java index d11e8fba894..1e91e8804c4 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.PortugueseLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java index 5b70e764673..32d67fafb9d 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; * <pre class="prettyprint" > * <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.PortugueseMinimalStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java index 3d3e20c56ff..2c264a10bed 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.pt.PortugueseStemFilter; * <pre class="prettyprint" > * <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.PortugueseStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java index b6677a92019..e3307597a26 100644 --- a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.ru.RussianLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.RussianLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java index e3297e45d8c..2c1f8fb4d1f 100644 --- a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java @@ -35,7 +35,8 @@ import org.tartarus.snowball.SnowballProgram; * <pre class="prettyprint" > * <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java index 9f3c7a1a657..107679f7622 100644 --- a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.es.SpanishLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.SpanishLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java index e4a7d93a7b2..91daca19f20 100644 --- a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; * <pre class="prettyprint" > * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.StandardFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java index 6e0478d161b..429f7ac1ed6 100644 --- a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.sv.SwedishLightStemFilter; * <pre class="prettyprint" > * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> * <filter class="solr.SwedishLightStemFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java index d35385ae3c4..8ed23ff9973 100644 --- a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.TokenStream; * <pre class="prettyprint" > * <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.NGramTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.ThaiWordFilterFactory"/> * </analyzer> * </fieldType></pre> diff --git a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java index 9efa18e3b19..660a278fdf4 100644 --- a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; * <pre class="prettyprint" > * <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100"> * <analyzer> - * <tokenizer class="solr.WhitespaceTokenizerFactory"/> + * <tokenizer class="solr.StandardTokenizerFactory"/> * <filter class="solr.TurkishLowerCaseFilterFactory"/> * </analyzer> * </fieldType></pre> From b43011aef5b00f80dc3646831aa4d7d26cf45462 Mon Sep 17 00:00:00 2001 From: Yonik Seeley <yonik@apache.org> Date: Thu, 24 Feb 2011 22:29:35 +0000 Subject: [PATCH 4/7] SOLR-2380 fix facet.limit for distrib faceting and facet.sort=index git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074326 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 ++++ .../org/apache/solr/handler/component/FacetComponent.java | 5 ++--- solr/src/test/org/apache/solr/TestDistributedSearch.java | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5bb242de949..eaf2a2e1418 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -693,6 +693,10 @@ Bug Fixes useful error reporting when no match found (previously failed with a NullPointerException in log and no clear user feedback). (gthb via yonik) +* SOLR-2380: Distributed faceting could miss values when facet.sort=index + and when facet.offset was greater than 0. (yonik) + + Other Changes ---------------------- diff --git a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java index 0bbeaff45f2..f2f48b0b37c 100644 --- a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -222,12 +222,11 @@ public class FacetComponent extends SearchComponent sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT); sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); + dff.initialLimit = dff.offset + dff.limit; + if(dff.sort.equals(FacetParams.FACET_SORT_COUNT) && dff.limit > 0) { // set the initial limit higher to increase accuracy - dff.initialLimit = dff.offset + dff.limit; dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10; - } else { - dff.initialLimit = dff.limit; } // Currently this is for testing only and allows overriding of the diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index 5151564fedd..746229e05d4 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -137,6 +137,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",10, "facet.limit",1, "facet.sort","index"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1); query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1); From 6d1353bc590bf31f4b5c5942935c316c87cdc19e Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" <hossman@apache.org> Date: Fri, 25 Feb 2011 00:15:42 +0000 Subject: [PATCH 5/7] LUCENE-2936: PhraseQuery score explanations were not correctly identifying matches vs non-matches git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074357 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../org/apache/lucene/search/PhraseQuery.java | 7 +-- .../org/apache/lucene/search/CheckHits.java | 13 +++-- .../lucene/search/TestExplanations.java | 8 ++- .../lucene/search/TestSimpleExplanations.java | 58 +++++++++++++++++++ 5 files changed, 78 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2835d89937c..67e9246b88c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -355,6 +355,9 @@ Bug fixes with more document deletions is requested before a reader with fewer deletions, provided they share some segments. (yonik) +* LUCENE-2936: PhraseQuery score explanations were not correctly + identifying matches vs non-matches. (hossman) + ======================= Lucene 3.x (not yet released) ======================= Changes in backwards compatibility policy diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index 8c71ad78bd5..2c8d977fa82 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -224,7 +224,7 @@ public class PhraseQuery extends Query { public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Explanation result = new Explanation(); + ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); StringBuilder docFreqs = new StringBuilder(); @@ -303,10 +303,7 @@ public class PhraseQuery extends Query { // combine them result.setValue(queryExpl.getValue() * fieldExpl.getValue()); - - if (queryExpl.getValue() == 1.0f) - return fieldExpl; - + result.setMatch(tfExplanation.isMatch()); return result; } } diff --git a/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java index 6846e59deca..fd6a8f8afd0 100644 --- a/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java +++ b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java @@ -39,8 +39,8 @@ public class CheckHits { /** * Tests that all documents up to maxDoc which are *not* in the - * expected result set, have an explanation which indicates no match - * (ie: Explanation value of 0.0f) + * expected result set, have an explanation which indicates that + * the document does not match */ public static void checkNoMatchExplanations(Query q, String defaultFieldName, IndexSearcher searcher, int[] results) @@ -59,9 +59,9 @@ public class CheckHits { Explanation exp = searcher.explain(q, doc); Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); - Assert.assertEquals("Explanation of [["+d+"]] for #"+doc+ - " doesn't indicate non-match: " + exp.toString(), - 0.0f, exp.getValue(), 0.0f); + Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+ + " doesn't indicate non-match: " + exp.toString(), + exp.isMatch()); } } @@ -484,6 +484,9 @@ public class CheckHits { Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); verifyExplanation(d,doc,scorer.score(),deep,exp); + Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc + + " does not indicate match: " + exp.toString(), + exp.isMatch()); } @Override public void setNextReader(AtomicReaderContext context) { diff --git a/lucene/src/test/org/apache/lucene/search/TestExplanations.java b/lucene/src/test/org/apache/lucene/search/TestExplanations.java index 3f2712af511..467c9477484 100644 --- a/lucene/src/test/org/apache/lucene/search/TestExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestExplanations.java @@ -52,7 +52,10 @@ public class TestExplanations extends LuceneTestCase { protected Directory directory; public static final String KEY = "KEY"; + // boost on this field is the same as the iterator for the doc public static final String FIELD = "field"; + // same contents, but no field boost + public static final String ALTFIELD = "alt"; public static final QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer()); @@ -72,7 +75,10 @@ public class TestExplanations extends LuceneTestCase { for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + Field f = newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED); + f.setBoost(i); + doc.add(f); + doc.add(newField(ALTFIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java b/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java index 116b10a6e20..de21d8aaf2a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java @@ -289,4 +289,62 @@ public class TestSimpleExplanations extends TestExplanations { qtest(q, new int[] { 0,3 }); } + + /* BQ of TQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQ1() throws Exception { + qtest("+w1 +alt:w2", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ2() throws Exception { + qtest("+yy +alt:w3", new int[] { 2,3 }); + } + public void testMultiFieldBQ3() throws Exception { + qtest("yy +alt:w3", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ4() throws Exception { + qtest("w1 (-xx alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ5() throws Exception { + qtest("w1 (+alt:qq alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ6() throws Exception { + qtest("w1 -(-alt:qq alt:w5)", new int[] { 1,2,3 }); + } + public void testMultiFieldBQ7() throws Exception { + qtest("+w1 +(alt:qq (alt:xx -alt:w2) (+alt:w3 +alt:w4))", new int[] { 0 }); + } + public void testMultiFieldBQ8() throws Exception { + qtest("+alt:w1 (qq (alt:xx -w2) (+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ9() throws Exception { + qtest("+w1 (alt:qq (-xx w2) -(+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ10() throws Exception { + qtest("+w1 +(alt:qq (-xx alt:w2) -(+alt:w3 +w4))", new int[] { 1 }); + } + + /* BQ of PQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQofPQ1() throws Exception { + qtest("\"w1 w2\" alt:\"w1 w2\"", new int[] { 0 }); + } + public void testMultiFieldBQofPQ2() throws Exception { + qtest("\"w1 w3\" alt:\"w1 w3\"", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ3() throws Exception { + qtest("\"w1 w2\"~1 alt:\"w1 w2\"~1", new int[] { 0,1,2 }); + } + public void testMultiFieldBQofPQ4() throws Exception { + qtest("\"w2 w3\"~1 alt:\"w2 w3\"~1", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQofPQ5() throws Exception { + qtest("\"w3 w2\"~1 alt:\"w3 w2\"~1", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ6() throws Exception { + qtest("\"w3 w2\"~2 alt:\"w3 w2\"~2", new int[] { 0,1,3 }); + } + public void testMultiFieldBQofPQ7() throws Exception { + qtest("\"w3 w2\"~3 alt:\"w3 w2\"~3", new int[] { 0,1,2,3 }); + } + } From eeeb300d5482b8860667df6cf87b8e5e1a6fcacf Mon Sep 17 00:00:00 2001 From: Robert Muir <rmuir@apache.org> Date: Fri, 25 Feb 2011 01:55:13 +0000 Subject: [PATCH 6/7] correct writeVLong typo git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074376 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/store/DataOutput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/src/java/org/apache/lucene/store/DataOutput.java b/lucene/src/java/org/apache/lucene/store/DataOutput.java index 1db4d905402..af125bd04e7 100644 --- a/lucene/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/src/java/org/apache/lucene/store/DataOutput.java @@ -82,7 +82,7 @@ public abstract class DataOutput { writeInt((int) i); } - /** Writes an long in a variable-length format. Writes between one and five + /** Writes an long in a variable-length format. Writes between one and nine * bytes. Smaller values take fewer bytes. Negative numbers are not * supported. * @see DataInput#readVLong() From c66172cf3bc14cac9160d29d70b5f57af3fa1082 Mon Sep 17 00:00:00 2001 From: Robert Muir <rmuir@apache.org> Date: Fri, 25 Feb 2011 03:44:36 +0000 Subject: [PATCH 7/7] fix javadocs warnings git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1074392 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/src/java/org/apache/lucene/search/IndexSearcher.java | 4 ++-- lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java index e3e0a1b6602..f7619fe3bd1 100644 --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java @@ -429,7 +429,7 @@ public class IndexSearcher { * <p>NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, * Collector)}.</p> */ protected TopFieldDocs search(Weight weight, Filter filter, int nDocs, @@ -475,7 +475,7 @@ public class IndexSearcher { * <p>NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, * Collector)}.</p> */ protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs, diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java index 60dc55c137c..4af5add0149 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java @@ -483,7 +483,7 @@ public class FST<T> { * this changes the provided <code>arc</code> (2nd arg) in-place and returns * it. * - * @returns Returns the second argument (<code>arc</code>). + * @return Returns the second argument (<code>arc</code>). */ public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException { //int pos = address;