diff --git a/build.xml b/build.xml index 9f52b014c2d..0733633a618 100644 --- a/build.xml +++ b/build.xml @@ -83,19 +83,9 @@ - - - - - - + - - - + Running Lucene contrib db/bdb-je task 'get-je-jar' ... @@ -105,57 +95,18 @@ - - - To install the Lucene/Solr codestyle file, copy - dev-tools\idea\Intellij-Lucene-Codestyle.xml to - ${env.HOMEDRIVE}${env.HOMEPATH}\.IntelliJIdeaXX\config\codestyles\ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - To complete IntelliJ IDEA setup, you must manually configure Project Structure | Project | Project SDK. - - - To install the Lucene/Solr codestyle file, copy - dev-tools/idea/Intellij-Lucene-Codestyle.xml to - ~/Library/Preferences/IntelliJXX/codestyles/ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - - To complete IntelliJ IDEA setup, you must manually configure - Project Structure | Project | Project SDK. - - - - - To install the Lucene/Solr codestyle file, copy - dev-tools/idea/Intellij-Lucene-Codestyle.xml to - ~/.IntelliJIdeaXX/config/codestyles/ - where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc. - After restarting IntelliJ, select "Lucene" - from the dropdown list at: - Settings | Code Style | Use global settings | Scheme name - - To complete IntelliJ IDEA setup, you must manually configure - Project Structure | Project | Project SDK. - - - - diff --git a/dev-tools/idea/.idea/projectCodeStyle.xml b/dev-tools/idea/.idea/projectCodeStyle.xml new file mode 100644 index 00000000000..e6ba7b64af2 --- /dev/null +++ b/dev-tools/idea/.idea/projectCodeStyle.xml @@ -0,0 +1,54 @@ + + + + + + + diff --git a/dev-tools/idea/Intellij-Lucene-Codestyle.xml b/dev-tools/idea/Intellij-Lucene-Codestyle.xml deleted file mode 100644 index 279bf5346e0..00000000000 --- a/dev-tools/idea/Intellij-Lucene-Codestyle.xml +++ /dev/null @@ -1,48 +0,0 @@ - - - - - - - - diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 2835d89937c..67e9246b88c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -355,6 +355,9 @@ Bug fixes with more document deletions is requested before a reader with fewer deletions, provided they share some segments. (yonik) +* LUCENE-2936: PhraseQuery score explanations were not correctly + identifying matches vs non-matches. (hossman) + ======================= Lucene 3.x (not yet released) ======================= Changes in backwards compatibility policy diff --git a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java index e3e0a1b6602..f7619fe3bd1 100644 --- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java @@ -429,7 +429,7 @@ public class IndexSearcher { *

NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, * Collector)}.

*/ protected TopFieldDocs search(Weight weight, Filter filter, int nDocs, @@ -475,7 +475,7 @@ public class IndexSearcher { *

NOTE: this does not compute scores by default. If you * need scores, create a {@link TopFieldCollector} * instance by calling {@link TopFieldCollector#create} and - * then pass that to {@link #search(Weight, Filter, + * then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter, * Collector)}.

*/ protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs, diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index 8c71ad78bd5..2c8d977fa82 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -224,7 +224,7 @@ public class PhraseQuery extends Query { public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - Explanation result = new Explanation(); + ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); StringBuilder docFreqs = new StringBuilder(); @@ -303,10 +303,7 @@ public class PhraseQuery extends Query { // combine them result.setValue(queryExpl.getValue() * fieldExpl.getValue()); - - if (queryExpl.getValue() == 1.0f) - return fieldExpl; - + result.setMatch(tfExplanation.isMatch()); return result; } } diff --git a/lucene/src/java/org/apache/lucene/store/DataOutput.java b/lucene/src/java/org/apache/lucene/store/DataOutput.java index 1db4d905402..af125bd04e7 100644 --- a/lucene/src/java/org/apache/lucene/store/DataOutput.java +++ b/lucene/src/java/org/apache/lucene/store/DataOutput.java @@ -82,7 +82,7 @@ public abstract class DataOutput { writeInt((int) i); } - /** Writes an long in a variable-length format. Writes between one and five + /** Writes an long in a variable-length format. Writes between one and nine * bytes. Smaller values take fewer bytes. Negative numbers are not * supported. * @see DataInput#readVLong() diff --git a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java index 60dc55c137c..4af5add0149 100644 --- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java +++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java @@ -483,7 +483,7 @@ public class FST { * this changes the provided arc (2nd arg) in-place and returns * it. * - * @returns Returns the second argument (arc). + * @return Returns the second argument (arc). */ public Arc readFirstTargetArc(Arc follow, Arc arc) throws IOException { //int pos = address; diff --git a/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java index 6846e59deca..fd6a8f8afd0 100644 --- a/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java +++ b/lucene/src/test-framework/org/apache/lucene/search/CheckHits.java @@ -39,8 +39,8 @@ public class CheckHits { /** * Tests that all documents up to maxDoc which are *not* in the - * expected result set, have an explanation which indicates no match - * (ie: Explanation value of 0.0f) + * expected result set, have an explanation which indicates that + * the document does not match */ public static void checkNoMatchExplanations(Query q, String defaultFieldName, IndexSearcher searcher, int[] results) @@ -59,9 +59,9 @@ public class CheckHits { Explanation exp = searcher.explain(q, doc); Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); - Assert.assertEquals("Explanation of [["+d+"]] for #"+doc+ - " doesn't indicate non-match: " + exp.toString(), - 0.0f, exp.getValue(), 0.0f); + Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+ + " doesn't indicate non-match: " + exp.toString(), + exp.isMatch()); } } @@ -484,6 +484,9 @@ public class CheckHits { Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp); verifyExplanation(d,doc,scorer.score(),deep,exp); + Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc + + " does not indicate match: " + exp.toString(), + exp.isMatch()); } @Override public void setNextReader(AtomicReaderContext context) { diff --git a/lucene/src/test/org/apache/lucene/search/TestExplanations.java b/lucene/src/test/org/apache/lucene/search/TestExplanations.java index 3f2712af511..467c9477484 100644 --- a/lucene/src/test/org/apache/lucene/search/TestExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestExplanations.java @@ -52,7 +52,10 @@ public class TestExplanations extends LuceneTestCase { protected Directory directory; public static final String KEY = "KEY"; + // boost on this field is the same as the iterator for the doc public static final String FIELD = "field"; + // same contents, but no field boost + public static final String ALTFIELD = "alt"; public static final QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer()); @@ -72,7 +75,10 @@ public class TestExplanations extends LuceneTestCase { for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + Field f = newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED); + f.setBoost(i); + doc.add(f); + doc.add(newField(ALTFIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java b/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java index 116b10a6e20..de21d8aaf2a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestSimpleExplanations.java @@ -289,4 +289,62 @@ public class TestSimpleExplanations extends TestExplanations { qtest(q, new int[] { 0,3 }); } + + /* BQ of TQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQ1() throws Exception { + qtest("+w1 +alt:w2", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ2() throws Exception { + qtest("+yy +alt:w3", new int[] { 2,3 }); + } + public void testMultiFieldBQ3() throws Exception { + qtest("yy +alt:w3", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ4() throws Exception { + qtest("w1 (-xx alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ5() throws Exception { + qtest("w1 (+alt:qq alt:w2)", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ6() throws Exception { + qtest("w1 -(-alt:qq alt:w5)", new int[] { 1,2,3 }); + } + public void testMultiFieldBQ7() throws Exception { + qtest("+w1 +(alt:qq (alt:xx -alt:w2) (+alt:w3 +alt:w4))", new int[] { 0 }); + } + public void testMultiFieldBQ8() throws Exception { + qtest("+alt:w1 (qq (alt:xx -w2) (+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ9() throws Exception { + qtest("+w1 (alt:qq (-xx w2) -(+alt:w3 +w4))", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQ10() throws Exception { + qtest("+w1 +(alt:qq (-xx alt:w2) -(+alt:w3 +w4))", new int[] { 1 }); + } + + /* BQ of PQ: using alt so some fields have zero boost and some don't */ + + public void testMultiFieldBQofPQ1() throws Exception { + qtest("\"w1 w2\" alt:\"w1 w2\"", new int[] { 0 }); + } + public void testMultiFieldBQofPQ2() throws Exception { + qtest("\"w1 w3\" alt:\"w1 w3\"", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ3() throws Exception { + qtest("\"w1 w2\"~1 alt:\"w1 w2\"~1", new int[] { 0,1,2 }); + } + public void testMultiFieldBQofPQ4() throws Exception { + qtest("\"w2 w3\"~1 alt:\"w2 w3\"~1", new int[] { 0,1,2,3 }); + } + public void testMultiFieldBQofPQ5() throws Exception { + qtest("\"w3 w2\"~1 alt:\"w3 w2\"~1", new int[] { 1,3 }); + } + public void testMultiFieldBQofPQ6() throws Exception { + qtest("\"w3 w2\"~2 alt:\"w3 w2\"~2", new int[] { 0,1,3 }); + } + public void testMultiFieldBQofPQ7() throws Exception { + qtest("\"w3 w2\"~3 alt:\"w3 w2\"~3", new int[] { 0,1,2,3 }); + } + } diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5bb242de949..eaf2a2e1418 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -693,6 +693,10 @@ Bug Fixes useful error reporting when no match found (previously failed with a NullPointerException in log and no clear user feedback). (gthb via yonik) +* SOLR-2380: Distributed faceting could miss values when facet.sort=index + and when facet.offset was greater than 0. (yonik) + + Other Changes ---------------------- diff --git a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java index 7a3e9830237..f777959aa21 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; *
  * <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.ArabicNormalizationFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java index b8773019a4c..0cbb097f31c 100644 --- a/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ArabicStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter; *
  * <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.ArabicNormalizationFilterFactory"/>
  *     <filter class="solr.ArabicStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java index 6d96441d312..a6af3dbf9ec 100644 --- a/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java @@ -26,7 +26,8 @@ import org.apache.lucene.analysis.br.BrazilianStemFilter; *
  * <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.BrazilianStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java index 44563df043b..13d323089fe 100644 --- a/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/BulgarianStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.bg.BulgarianStemFilter; *
  * <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.BulgarianStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java index ff97f64753d..1d8f02c1d11 100644 --- a/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ClassicFilterFactory.java @@ -26,7 +26,7 @@ import org.apache.lucene.analysis.standard.ClassicFilter; *
  * <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.ClassicTokenizerFactory"/>
  *     <filter class="solr.ClassicFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java index 67b84126511..c342d36e0bf 100644 --- a/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CollationKeyFilterFactory.java @@ -60,7 +60,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware; *
  * <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.KeywordTokenizerFactory"/>
  *     <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java index 18d3ea77b1b..6229d8ee769 100644 --- a/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CzechStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.cz.CzechStemFilter; *
  * <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.CzechStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java index 894ec436e19..027766843bb 100644 --- a/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ElisionFilterFactory.java @@ -32,7 +32,8 @@ import org.apache.lucene.analysis.TokenStream; *
  * <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java index f64ce24d862..5d2292e9c4a 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishMinimalStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; *
  * <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.EnglishMinimalStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java index 488c822ff43..30b9ea5afae 100644 --- a/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter; *
  * <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.EnglishPossessiveFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java index ebb077bda1b..6d4bf82133c 100644 --- a/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FinnishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.fi.FinnishLightStemFilter; *
  * <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.FinnishLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java index 2e1c16fb6c6..e0325465e86 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchLightStemFilterFactory.java @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchLightStemFilter; *
  * <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.ElisionFilterFactory"/>
  *     <filter class="solr.FrenchLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java index d2381da89bf..753984f122e 100644 --- a/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/FrenchMinimalStemFilterFactory.java @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter; *
  * <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.ElisionFilterFactory"/>
  *     <filter class="solr.FrenchMinimalStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java index 844c3f25c76..bb32d502dbe 100644 --- a/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.gl.GalicianStemFilter; *
  * <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.GalicianStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java index 08cb732e9cc..601d12e668c 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanLightStemFilter; *
  * <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.GermanLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java index 5c2f65f69c1..ac145878778 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanMinimalStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanMinimalStemFilter; *
  * <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.GermanMinimalStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java index 34d8aaf5651..c2f3d03dfb8 100644 --- a/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java @@ -27,7 +27,8 @@ import org.apache.lucene.analysis.TokenStream; *
  * <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.GermanStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java index 2c3a043b781..ecd02e2f703 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java @@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException.ErrorCode; *
  * <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.GreekLowerCaseFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java index 2783a7e1995..0a12b04f8a1 100644 --- a/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/GreekStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.el.GreekStemFilter; *
  * <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.GreekLowerCaseFilterFactory"/>
  *     <filter class="solr.GreekStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java index 2770a547582..e4137b83302 100644 --- a/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HindiNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiNormalizationFilter; *
  * <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.HindiNormalizationFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java index e54e8c0ce3c..7dd3544015c 100644 --- a/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HindiStemFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiStemFilter; *
  * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.HindiStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java index 60a46fbb3ce..0b06fa99ed3 100644 --- a/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/HungarianLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter; *
  * <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.HungarianLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java index 7811a0190eb..588946cc15a 100644 --- a/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndicNormalizationFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter; *
  * <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.IndicNormalizationFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java index 1e27f73e8ff..bf3497a01a3 100644 --- a/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/IndonesianStemFilterFactory.java @@ -27,7 +27,8 @@ import org.apache.lucene.analysis.id.IndonesianStemFilter; *
  * <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java index a93412fe05c..1ca00e3090a 100644 --- a/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ItalianLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.it.ItalianLightStemFilter; *
  * <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.ItalianLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java index 50380764923..c93db874ec7 100644 --- a/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PathHierarchyTokenizerFactory.java @@ -24,6 +24,13 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer; /** + * Factory for {@link PathHierarchyTokenizer}. + *
+ * <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/>
+ *   </analyzer>
+ * </fieldType>
* @version $Id$ */ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java index 48014cb1a04..aff0b9892cc 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceCharFilterFactory.java @@ -25,9 +25,18 @@ import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter; /** + * Factory for {@link PatternReplaceCharFilter}. + *
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([^a-z])" replacement=""
+ *                 maxBlockChars="10000" blockDelimiters="|"/>
+ *     <tokenizer class="solr.KeywordTokenizerFactory"/>
+ *   </analyzer>
+ * </fieldType>
* * @version $Id$ - * @since Solr 1.5 + * @since Solr 3.1 */ public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java index e364002b446..039408e208c 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilterFactory.java @@ -24,6 +24,15 @@ import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; /** + * Factory for {@link PatternReplaceFilter}. + *
+ * <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.KeywordTokenizerFactory"/>
+ *     <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement=""
+ *             replace="all"/>
+ *   </analyzer>
+ * </fieldType>
* @version $Id$ * @see PatternReplaceFilter */ diff --git a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java index d83c97ee3b2..2e748f5fe04 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java @@ -28,6 +28,7 @@ import org.apache.solr.common.SolrException; /** + * Factory for {@link PatternTokenizer}. * This tokenizer uses regex pattern matching to construct distinct tokens * for the input stream. It takes two arguments: "pattern" and "group". *

@@ -52,6 +53,13 @@ import org.apache.solr.common.SolrException; *

*

NOTE: This Tokenizer does not output tokens that are of zero length.

* + *
+ * <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/>
+ *   </analyzer>
+ * </fieldType>
+ * * @see PatternTokenizer * @since solr1.2 * @version $Id$ diff --git a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java index 14098634d9c..8afd32c3ebd 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianCharFilterFactory.java @@ -21,7 +21,15 @@ import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.fa.PersianCharFilter; /** - * Factory for {@link PersianCharFilter} + * Factory for {@link PersianCharFilter}. + *
+ * <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <charFilter class="solr.PersianCharFilterFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ */ public class PersianCharFilterFactory extends BaseCharFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java index 7e095460215..d6e9733ab75 100644 --- a/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PersianNormalizationFilterFactory.java @@ -22,7 +22,18 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.fa.PersianNormalizationFilter; import org.apache.lucene.analysis.TokenStream; -/** Factory for {@link PersianNormalizationFilter} */ +/** + * Factory for {@link PersianNormalizationFilter}. + *
+ * <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <charFilter class="solr.PersianCharFilterFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.PersianNormalizationFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ + */ public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory { public PersianNormalizationFilter create(TokenStream input) { return new PersianNormalizationFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java index c7763302935..6653ee44b05 100644 --- a/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilterFactory.java @@ -33,6 +33,8 @@ import org.apache.lucene.analysis.phonetic.PhoneticFilter; import org.apache.solr.common.SolrException; /** + * Factory for {@link PhoneticFilter}. + * * Create tokens based on phonetic encoders * * http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html @@ -41,6 +43,14 @@ import org.apache.solr.common.SolrException; * "encoder" required, one of "DoubleMetaphone", "Metaphone", "Soundex", "RefinedSoundex" * * "inject" (default=true) add tokens to the stream with the offset=0 + * + *
+ * <fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <filter class="solr.PhoneticFilterFactory" encoder="DoubleMetaphone" inject="true"/>
+ *   </analyzer>
+ * </fieldType>
* * @version $Id$ * @see PhoneticFilter diff --git a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java index 2c72a79b763..17e9d6cbd55 100644 --- a/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PorterStemFilterFactory.java @@ -21,6 +21,15 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.PorterStemFilter; /** + * Factory for {@link PorterStemFilter}. + *
+ * <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.PorterStemFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
* @version $Id$ */ public class PorterStemFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java index 50ec45a58c2..1e91e8804c4 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseLightStemFilterFactory.java @@ -20,7 +20,18 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseLightStemFilter; -/** Factory for {@link PortugueseLightStemFilter} */ +/** + * Factory for {@link PortugueseLightStemFilter}. + *
+ * <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.PortugueseLightStemFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ + */ public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseLightStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java index 60039a7af40..32d67fafb9d 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseMinimalStemFilterFactory.java @@ -20,7 +20,18 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter; -/** Factory for {@link PortugueseMinimalStemFilter} */ +/** + * Factory for {@link PortugueseMinimalStemFilter}. + *
+ * <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.PortugueseMinimalStemFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ + */ public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseMinimalStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java index be397e33623..2c264a10bed 100644 --- a/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java @@ -20,7 +20,18 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.pt.PortugueseStemFilter; -/** Factory for {@link PortugueseStemFilter} */ +/** + * Factory for {@link PortugueseStemFilter}. + *
+ * <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.PortugueseStemFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ + */ public class PortugueseStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new PortugueseStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java index ab1fb48c740..7f6a01fe02e 100644 --- a/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/PositionFilterFactory.java @@ -23,8 +23,16 @@ import org.apache.lucene.analysis.position.PositionFilter; import java.util.Map; /** + * Factory for {@link PositionFilter}. * Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its * original positionIncrement value. The default positionIncrement value is zero. + *
+ * <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <filter class="solr.PositionFilterFactory" positionIncrement="0"/>
+ *   </analyzer>
+ * </fieldType>
* * @version $Id$ * @see org.apache.lucene.analysis.position.PositionFilter diff --git a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java index 2ee613113d7..4b14f33b6fd 100644 --- a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java @@ -21,6 +21,14 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter; /** + * Factory for {@link RemoveDuplicatesTokenFilter}. + *
+ * <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
* @version $Id$ */ public class RemoveDuplicatesTokenFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java index 7ebba14bd56..103a694b506 100644 --- a/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReverseStringFilterFactory.java @@ -21,7 +21,14 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.reverse.ReverseStringFilter; /** - * A FilterFactory which reverses the input. + * Factory for {@link ReverseStringFilter}. + *
+ * <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <filter class="solr.ReverseStringFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
* * @version $Id$ * @since solr 1.4 diff --git a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java index 85a16dfc646..b4c1b7bb538 100644 --- a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilterFactory.java @@ -48,6 +48,18 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter; * * Note 1: This filter always reverses input tokens during indexing. * Note 2: Query tokens without wildcard characters will never be reversed. + *
+ * <fieldType name="text_rvswc" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer type="index">
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
+ *             maxPosAsterisk="2" maxPosQuestion="1" minTrailing="2" maxFractionAsterisk="0"/>
+ *   </analyzer>
+ *   <analyzer type="query">
+ *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ */ public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory { diff --git a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java index fc9e301b64d..e3307597a26 100644 --- a/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/RussianLightStemFilterFactory.java @@ -20,7 +20,18 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ru.RussianLightStemFilter; -/** Factory for {@link RussianLightStemFilter} */ +/** + * Factory for {@link RussianLightStemFilter}. + *
+ * <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.RussianLightStemFilterFactory"/>
+ *   </analyzer>
+ * </fieldType>
+ * @version $Id$ + */ public class RussianLightStemFilterFactory extends BaseTokenFilterFactory { public TokenStream create(TokenStream input) { return new RussianLightStemFilter(input); diff --git a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java index e3297e45d8c..2c1f8fb4d1f 100644 --- a/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SnowballPorterFilterFactory.java @@ -35,7 +35,8 @@ import org.tartarus.snowball.SnowballProgram; *
  * <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java index 9f3c7a1a657..107679f7622 100644 --- a/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SpanishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.es.SpanishLightStemFilter; *
  * <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.SpanishLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java index e4a7d93a7b2..91daca19f20 100644 --- a/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/StandardFilterFactory.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.standard.StandardFilter; *
  * <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.StandardFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java index 6e0478d161b..429f7ac1ed6 100644 --- a/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SwedishLightStemFilterFactory.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.sv.SwedishLightStemFilter; *
  * <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
  *     <filter class="solr.SwedishLightStemFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java index d35385ae3c4..8ed23ff9973 100644 --- a/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.TokenStream; *
  * <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.NGramTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.ThaiWordFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java index 9efa18e3b19..660a278fdf4 100644 --- a/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TurkishLowerCaseFilterFactory.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; *
  * <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
  *     <filter class="solr.TurkishLowerCaseFilterFactory"/>
  *   </analyzer>
  * </fieldType>
diff --git a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java index 0bbeaff45f2..f2f48b0b37c 100644 --- a/solr/src/java/org/apache/solr/handler/component/FacetComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/FacetComponent.java @@ -222,12 +222,11 @@ public class FacetComponent extends SearchComponent sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT); sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); + dff.initialLimit = dff.offset + dff.limit; + if(dff.sort.equals(FacetParams.FACET_SORT_COUNT) && dff.limit > 0) { // set the initial limit higher to increase accuracy - dff.initialLimit = dff.offset + dff.limit; dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10; - } else { - dff.initialLimit = dff.limit; } // Currently this is for testing only and allows overriding of the diff --git a/solr/src/test/org/apache/solr/TestDistributedSearch.java b/solr/src/test/org/apache/solr/TestDistributedSearch.java index 5151564fedd..746229e05d4 100755 --- a/solr/src/test/org/apache/solr/TestDistributedSearch.java +++ b/solr/src/test/org/apache/solr/TestDistributedSearch.java @@ -137,6 +137,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase { query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2); + query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",10, "facet.limit",1, "facet.sort","index"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1); query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"); query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1);