mirror of https://github.com/apache/lucene.git
Merging r1074015 through r1074414 into realtime branch
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/realtime_search@1074415 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
a11da24e91
53
build.xml
53
build.xml
|
@ -83,19 +83,9 @@
|
|||
</echo>
|
||||
</target>
|
||||
|
||||
<condition property="isMac"><os family="mac"/></condition>
|
||||
<condition property="isUnix"><os family="unix"/></condition>
|
||||
<condition property="isWindows"><os family="windows"/></condition>
|
||||
<property environment="env"/>
|
||||
<target name="idea" depends="copy-idea-files,finish-idea-setup-windows,
|
||||
finish-idea-setup-mac,
|
||||
finish-idea-setup-unix"
|
||||
description="Setup IntelliJ IDEA configuration files"/>
|
||||
<target name="copy-idea-files">
|
||||
<target name="idea" description="Setup IntelliJ IDEA configuration">
|
||||
<copy todir=".">
|
||||
<fileset dir="dev-tools/idea">
|
||||
<exclude name="Intellij-Lucene-Codestyle.xml"/>
|
||||
</fileset>
|
||||
<fileset dir="dev-tools/idea"/>
|
||||
</copy>
|
||||
<echo>Running Lucene contrib db/bdb-je task 'get-je-jar' ...</echo>
|
||||
<subant target="get-je-jar">
|
||||
|
@ -105,45 +95,7 @@
|
|||
<subant target="get-db-jar">
|
||||
<fileset dir="lucene/contrib/db/bdb" includes="build.xml" />
|
||||
</subant>
|
||||
</target>
|
||||
<target name="finish-idea-setup-windows" if="isWindows">
|
||||
<echo>
|
||||
To install the Lucene/Solr codestyle file, copy
|
||||
dev-tools\idea\Intellij-Lucene-Codestyle.xml to
|
||||
${env.HOMEDRIVE}${env.HOMEPATH}\.IntelliJIdeaXX\config\codestyles\
|
||||
where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc.
|
||||
After restarting IntelliJ, select "Lucene"
|
||||
from the dropdown list at:
|
||||
Settings | Code Style | Use global settings | Scheme name
|
||||
|
||||
To complete IntelliJ IDEA setup, you must manually configure
|
||||
Project Structure | Project | Project SDK.
|
||||
</echo>
|
||||
</target>
|
||||
<target name="finish-idea-setup-mac" if="isMac">
|
||||
<echo>
|
||||
To install the Lucene/Solr codestyle file, copy
|
||||
dev-tools/idea/Intellij-Lucene-Codestyle.xml to
|
||||
~/Library/Preferences/IntelliJXX/codestyles/
|
||||
where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc.
|
||||
After restarting IntelliJ, select "Lucene"
|
||||
from the dropdown list at:
|
||||
Settings | Code Style | Use global settings | Scheme name
|
||||
|
||||
To complete IntelliJ IDEA setup, you must manually configure
|
||||
Project Structure | Project | Project SDK.
|
||||
</echo>
|
||||
</target>
|
||||
<target name="finish-idea-setup-unix" if="isUnix">
|
||||
<echo>
|
||||
To install the Lucene/Solr codestyle file, copy
|
||||
dev-tools/idea/Intellij-Lucene-Codestyle.xml to
|
||||
~/.IntelliJIdeaXX/config/codestyles/
|
||||
where "XX" is "90" for IntelliJ 9.0.X, "10" for 10.0.X, etc.
|
||||
After restarting IntelliJ, select "Lucene"
|
||||
from the dropdown list at:
|
||||
Settings | Code Style | Use global settings | Scheme name
|
||||
|
||||
To complete IntelliJ IDEA setup, you must manually configure
|
||||
Project Structure | Project | Project SDK.
|
||||
</echo>
|
||||
|
@ -155,7 +107,6 @@
|
|||
<fileset dir="." includes="*.iml,*.ipr,*.iws"/>
|
||||
<fileset dir="solr" includes="**/*.iml"/>
|
||||
<fileset dir="lucene" includes="**/*.iml"/>
|
||||
<fileset dir="modules" includes="**/*.iml"/>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="CodeStyleSettingsManager">
|
||||
<option name="PER_PROJECT_SETTINGS">
|
||||
<value>
|
||||
<option name="USE_SAME_INDENTS" value="true" />
|
||||
<option name="OTHER_INDENT_OPTIONS">
|
||||
<value>
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="4" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</value>
|
||||
</option>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="groovy">
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="4" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="java">
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="4" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
<ADDITIONAL_INDENT_OPTIONS fileType="xml">
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="4" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
<option name="USE_RELATIVE_INDENTS" value="false" />
|
||||
</ADDITIONAL_INDENT_OPTIONS>
|
||||
</value>
|
||||
</option>
|
||||
<option name="USE_PER_PROJECT_SETTINGS" value="true" />
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<code_scheme name="Lucene" parent="Default">
|
||||
<option name="JAVA_INDENT_OPTIONS">
|
||||
<value>
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="JSP_INDENT_OPTIONS">
|
||||
<value>
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="XML_INDENT_OPTIONS">
|
||||
<value>
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
</value>
|
||||
</option>
|
||||
<option name="OTHER_INDENT_OPTIONS">
|
||||
<value>
|
||||
<option name="INDENT_SIZE" value="2" />
|
||||
<option name="CONTINUATION_INDENT_SIZE" value="8" />
|
||||
<option name="TAB_SIZE" value="2" />
|
||||
<option name="USE_TAB_CHARACTER" value="false" />
|
||||
<option name="SMART_TABS" value="false" />
|
||||
<option name="LABEL_INDENT_SIZE" value="0" />
|
||||
<option name="LABEL_INDENT_ABSOLUTE" value="false" />
|
||||
</value>
|
||||
</option>
|
||||
</code_scheme>
|
||||
|
|
@ -355,6 +355,9 @@ Bug fixes
|
|||
with more document deletions is requested before a reader with fewer
|
||||
deletions, provided they share some segments. (yonik)
|
||||
|
||||
* LUCENE-2936: PhraseQuery score explanations were not correctly
|
||||
identifying matches vs non-matches. (hossman)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -429,7 +429,7 @@ public class IndexSearcher {
|
|||
* <p>NOTE: this does not compute scores by default. If you
|
||||
* need scores, create a {@link TopFieldCollector}
|
||||
* instance by calling {@link TopFieldCollector#create} and
|
||||
* then pass that to {@link #search(Weight, Filter,
|
||||
* then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter,
|
||||
* Collector)}.</p>
|
||||
*/
|
||||
protected TopFieldDocs search(Weight weight, Filter filter, int nDocs,
|
||||
|
@ -475,7 +475,7 @@ public class IndexSearcher {
|
|||
* <p>NOTE: this does not compute scores by default. If you
|
||||
* need scores, create a {@link TopFieldCollector}
|
||||
* instance by calling {@link TopFieldCollector#create} and
|
||||
* then pass that to {@link #search(Weight, Filter,
|
||||
* then pass that to {@link #search(IndexReader.AtomicReaderContext[], Weight, Filter,
|
||||
* Collector)}.</p>
|
||||
*/
|
||||
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, Filter filter, int nDocs,
|
||||
|
|
|
@ -224,7 +224,7 @@ public class PhraseQuery extends Query {
|
|||
public Explanation explain(AtomicReaderContext context, int doc)
|
||||
throws IOException {
|
||||
|
||||
Explanation result = new Explanation();
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
|
||||
|
||||
StringBuilder docFreqs = new StringBuilder();
|
||||
|
@ -303,10 +303,7 @@ public class PhraseQuery extends Query {
|
|||
|
||||
// combine them
|
||||
result.setValue(queryExpl.getValue() * fieldExpl.getValue());
|
||||
|
||||
if (queryExpl.getValue() == 1.0f)
|
||||
return fieldExpl;
|
||||
|
||||
result.setMatch(tfExplanation.isMatch());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ public abstract class DataOutput {
|
|||
writeInt((int) i);
|
||||
}
|
||||
|
||||
/** Writes an long in a variable-length format. Writes between one and five
|
||||
/** Writes an long in a variable-length format. Writes between one and nine
|
||||
* bytes. Smaller values take fewer bytes. Negative numbers are not
|
||||
* supported.
|
||||
* @see DataInput#readVLong()
|
||||
|
|
|
@ -483,7 +483,7 @@ public class FST<T> {
|
|||
* this changes the provided <code>arc</code> (2nd arg) in-place and returns
|
||||
* it.
|
||||
*
|
||||
* @returns Returns the second argument (<code>arc</code>).
|
||||
* @return Returns the second argument (<code>arc</code>).
|
||||
*/
|
||||
public Arc<T> readFirstTargetArc(Arc<T> follow, Arc<T> arc) throws IOException {
|
||||
//int pos = address;
|
||||
|
|
|
@ -39,8 +39,8 @@ public class CheckHits {
|
|||
|
||||
/**
|
||||
* Tests that all documents up to maxDoc which are *not* in the
|
||||
* expected result set, have an explanation which indicates no match
|
||||
* (ie: Explanation value of 0.0f)
|
||||
* expected result set, have an explanation which indicates that
|
||||
* the document does not match
|
||||
*/
|
||||
public static void checkNoMatchExplanations(Query q, String defaultFieldName,
|
||||
IndexSearcher searcher, int[] results)
|
||||
|
@ -59,9 +59,9 @@ public class CheckHits {
|
|||
Explanation exp = searcher.explain(q, doc);
|
||||
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
|
||||
exp);
|
||||
Assert.assertEquals("Explanation of [["+d+"]] for #"+doc+
|
||||
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
|
||||
" doesn't indicate non-match: " + exp.toString(),
|
||||
0.0f, exp.getValue(), 0.0f);
|
||||
exp.isMatch());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -484,6 +484,9 @@ public class CheckHits {
|
|||
|
||||
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null", exp);
|
||||
verifyExplanation(d,doc,scorer.score(),deep,exp);
|
||||
Assert.assertTrue("Explanation of [["+d+"]] for #"+ doc +
|
||||
" does not indicate match: " + exp.toString(),
|
||||
exp.isMatch());
|
||||
}
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {
|
||||
|
|
|
@ -52,7 +52,10 @@ public class TestExplanations extends LuceneTestCase {
|
|||
protected Directory directory;
|
||||
|
||||
public static final String KEY = "KEY";
|
||||
// boost on this field is the same as the iterator for the doc
|
||||
public static final String FIELD = "field";
|
||||
// same contents, but no field boost
|
||||
public static final String ALTFIELD = "alt";
|
||||
public static final QueryParser qp =
|
||||
new QueryParser(TEST_VERSION_CURRENT, FIELD, new MockAnalyzer());
|
||||
|
||||
|
@ -72,7 +75,10 @@ public class TestExplanations extends LuceneTestCase {
|
|||
for (int i = 0; i < docFields.length; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
|
||||
Field f = newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED);
|
||||
f.setBoost(i);
|
||||
doc.add(f);
|
||||
doc.add(newField(ALTFIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
reader = writer.getReader();
|
||||
|
|
|
@ -289,4 +289,62 @@ public class TestSimpleExplanations extends TestExplanations {
|
|||
qtest(q, new int[] { 0,3 });
|
||||
|
||||
}
|
||||
|
||||
/* BQ of TQ: using alt so some fields have zero boost and some don't */
|
||||
|
||||
public void testMultiFieldBQ1() throws Exception {
|
||||
qtest("+w1 +alt:w2", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ2() throws Exception {
|
||||
qtest("+yy +alt:w3", new int[] { 2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ3() throws Exception {
|
||||
qtest("yy +alt:w3", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ4() throws Exception {
|
||||
qtest("w1 (-xx alt:w2)", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ5() throws Exception {
|
||||
qtest("w1 (+alt:qq alt:w2)", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ6() throws Exception {
|
||||
qtest("w1 -(-alt:qq alt:w5)", new int[] { 1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ7() throws Exception {
|
||||
qtest("+w1 +(alt:qq (alt:xx -alt:w2) (+alt:w3 +alt:w4))", new int[] { 0 });
|
||||
}
|
||||
public void testMultiFieldBQ8() throws Exception {
|
||||
qtest("+alt:w1 (qq (alt:xx -w2) (+alt:w3 +w4))", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ9() throws Exception {
|
||||
qtest("+w1 (alt:qq (-xx w2) -(+alt:w3 +w4))", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQ10() throws Exception {
|
||||
qtest("+w1 +(alt:qq (-xx alt:w2) -(+alt:w3 +w4))", new int[] { 1 });
|
||||
}
|
||||
|
||||
/* BQ of PQ: using alt so some fields have zero boost and some don't */
|
||||
|
||||
public void testMultiFieldBQofPQ1() throws Exception {
|
||||
qtest("\"w1 w2\" alt:\"w1 w2\"", new int[] { 0 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ2() throws Exception {
|
||||
qtest("\"w1 w3\" alt:\"w1 w3\"", new int[] { 1,3 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ3() throws Exception {
|
||||
qtest("\"w1 w2\"~1 alt:\"w1 w2\"~1", new int[] { 0,1,2 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ4() throws Exception {
|
||||
qtest("\"w2 w3\"~1 alt:\"w2 w3\"~1", new int[] { 0,1,2,3 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ5() throws Exception {
|
||||
qtest("\"w3 w2\"~1 alt:\"w3 w2\"~1", new int[] { 1,3 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ6() throws Exception {
|
||||
qtest("\"w3 w2\"~2 alt:\"w3 w2\"~2", new int[] { 0,1,3 });
|
||||
}
|
||||
public void testMultiFieldBQofPQ7() throws Exception {
|
||||
qtest("\"w3 w2\"~3 alt:\"w3 w2\"~3", new int[] { 0,1,2,3 });
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -693,6 +693,10 @@ Bug Fixes
|
|||
useful error reporting when no match found (previously failed with a
|
||||
NullPointerException in log and no clear user feedback). (gthb via yonik)
|
||||
|
||||
* SOLR-2380: Distributed faceting could miss values when facet.sort=index
|
||||
and when facet.offset was greater than 0. (yonik)
|
||||
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_arnormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_arstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.ArabicNormalizationFilterFactory"/>
|
||||
* <filter class="solr.ArabicStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -26,7 +26,8 @@ import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_brstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.BrazilianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.bg.BulgarianStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_bgstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.BulgarianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.standard.ClassicFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_clssc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.ClassicTokenizerFactory"/>
|
||||
* <filter class="solr.ClassicFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -60,7 +60,7 @@ import org.apache.solr.util.plugin.ResourceLoaderAware;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_clltnky" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* <filter class="solr.CollationKeyFilterFactory" language="ja" country="JP"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.cz.CzechStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_czstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.CzechStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -32,7 +32,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_elsn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.ElisionFilterFactory" articles="stopwordarticles.txt"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_enminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.EnglishMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_enpossessive" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.EnglishPossessiveFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.fi.FinnishLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_filgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.FinnishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_frlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.ElisionFilterFactory"/>
|
||||
* <filter class="solr.FrenchLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,9 @@ import org.apache.lucene.analysis.fr.FrenchMinimalStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_frminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.ElisionFilterFactory"/>
|
||||
* <filter class="solr.FrenchMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.gl.GalicianStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_glstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GalicianStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_delgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.de.GermanMinimalStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_deminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -27,7 +27,8 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_destem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GermanStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_glc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.el.GreekStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_gstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.GreekLowerCaseFilterFactory"/>
|
||||
* <filter class="solr.GreekStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hinormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.HindiNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.hi.HindiStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.HindiStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.hu.HungarianLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_hulgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.HungarianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_innormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.IndicNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -27,7 +27,8 @@ import org.apache.lucene.analysis.id.IndonesianStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_idstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_itlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.ItalianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -24,6 +24,13 @@ import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link PathHierarchyTokenizer}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_path" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="\" replace="/"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
|
|
@ -25,9 +25,18 @@ import org.apache.lucene.analysis.CharStream;
|
|||
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link PatternReplaceCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([^a-z])" replacement=""
|
||||
* maxBlockChars="10000" blockDelimiters="|"/>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since Solr 1.5
|
||||
* @since Solr 3.1
|
||||
*/
|
||||
public class PatternReplaceCharFilterFactory extends BaseCharFilterFactory {
|
||||
|
||||
|
|
|
@ -24,6 +24,15 @@ import java.util.regex.Pattern;
|
|||
import java.util.regex.PatternSyntaxException;
|
||||
|
||||
/**
|
||||
* Factory for {@link PatternReplaceFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptnreplace" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* <filter class="solr.PatternReplaceFilterFactory" pattern="([^a-z])" replacement=""
|
||||
* replace="all"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
* @see PatternReplaceFilter
|
||||
*/
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.solr.common.SolrException;
|
|||
|
||||
|
||||
/**
|
||||
* Factory for {@link PatternTokenizer}.
|
||||
* This tokenizer uses regex pattern matching to construct distinct tokens
|
||||
* for the input stream. It takes two arguments: "pattern" and "group".
|
||||
* <p/>
|
||||
|
@ -52,6 +53,13 @@ import org.apache.solr.common.SolrException;
|
|||
* </p>
|
||||
* <p>NOTE: This Tokenizer does not output tokens that are of zero length.</p>
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.PatternTokenizerFactory" pattern="\'([^\']+)\'" group="1"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @see PatternTokenizer
|
||||
* @since solr1.2
|
||||
* @version $Id$
|
||||
|
|
|
@ -21,7 +21,15 @@ import org.apache.lucene.analysis.CharStream;
|
|||
import org.apache.lucene.analysis.fa.PersianCharFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link PersianCharFilter}
|
||||
* Factory for {@link PersianCharFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PersianCharFilterFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PersianCharFilterFactory extends BaseCharFilterFactory {
|
||||
|
||||
|
|
|
@ -22,7 +22,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/** Factory for {@link PersianNormalizationFilter} */
|
||||
/**
|
||||
* Factory for {@link PersianNormalizationFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_fanormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <charFilter class="solr.PersianCharFilterFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.PersianNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PersianNormalizationFilterFactory extends BaseTokenFilterFactory {
|
||||
public PersianNormalizationFilter create(TokenStream input) {
|
||||
return new PersianNormalizationFilter(input);
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.phonetic.PhoneticFilter;
|
|||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/**
|
||||
* Factory for {@link PhoneticFilter}.
|
||||
*
|
||||
* Create tokens based on phonetic encoders
|
||||
*
|
||||
* http://jakarta.apache.org/commons/codec/api-release/org/apache/commons/codec/language/package-summary.html
|
||||
|
@ -42,6 +44,14 @@ import org.apache.solr.common.SolrException;
|
|||
*
|
||||
* "inject" (default=true) add tokens to the stream with the offset=0
|
||||
*
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_phonetic" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.PhoneticFilterFactory" encoder="DoubleMetaphone" inject="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see PhoneticFilter
|
||||
*/
|
||||
|
|
|
@ -21,6 +21,15 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link PorterStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_porterstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.PorterStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PorterStemFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -20,7 +20,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseLightStemFilter;
|
||||
|
||||
/** Factory for {@link PortugueseLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link PortugueseLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.PortugueseLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PortugueseLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseLightStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilter;
|
||||
|
||||
/** Factory for {@link PortugueseMinimalStemFilter} */
|
||||
/**
|
||||
* Factory for {@link PortugueseMinimalStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptminstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.PortugueseMinimalStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PortugueseMinimalStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseMinimalStemFilter(input);
|
||||
|
|
|
@ -20,7 +20,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.pt.PortugueseStemFilter;
|
||||
|
||||
/** Factory for {@link PortugueseStemFilter} */
|
||||
/**
|
||||
* Factory for {@link PortugueseStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_ptstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.PortugueseStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class PortugueseStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new PortugueseStemFilter(input);
|
||||
|
|
|
@ -23,8 +23,16 @@ import org.apache.lucene.analysis.position.PositionFilter;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link PositionFilter}.
|
||||
* Set the positionIncrement of all tokens to the "positionIncrement", except the first return token which retains its
|
||||
* original positionIncrement value. The default positionIncrement value is zero.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_position" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.PositionFilterFactory" positionIncrement="0"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @see org.apache.lucene.analysis.position.PositionFilter
|
||||
|
|
|
@ -21,6 +21,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
|
||||
|
||||
/**
|
||||
* Factory for {@link RemoveDuplicatesTokenFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_rmdup" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class RemoveDuplicatesTokenFilterFactory extends BaseTokenFilterFactory {
|
||||
|
|
|
@ -21,7 +21,14 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
|
||||
/**
|
||||
* A FilterFactory which reverses the input.
|
||||
* Factory for {@link ReverseStringFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_rvsstr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ReverseStringFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.4
|
||||
|
|
|
@ -48,6 +48,18 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
|||
* </ul>
|
||||
* Note 1: This filter always reverses input tokens during indexing.
|
||||
* Note 2: Query tokens without wildcard characters will never be reversed.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_rvswc" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer type="index">
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||
* maxPosAsterisk="2" maxPosQuestion="1" minTrailing="2" maxFractionAsterisk="0"/>
|
||||
* </analyzer>
|
||||
* <analyzer type="query">
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
|
|
|
@ -20,7 +20,18 @@ package org.apache.solr.analysis;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ru.RussianLightStemFilter;
|
||||
|
||||
/** Factory for {@link RussianLightStemFilter} */
|
||||
/**
|
||||
* Factory for {@link RussianLightStemFilter}.
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_rulgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.RussianLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* @version $Id$
|
||||
*/
|
||||
public class RussianLightStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new RussianLightStemFilter(input);
|
||||
|
|
|
@ -35,7 +35,8 @@ import org.tartarus.snowball.SnowballProgram;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_snowballstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.SnowballPorterFilterFactory" protected="protectedkeyword.txt" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.es.SpanishLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_eslgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.SpanishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.StandardFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,8 @@ import org.apache.lucene.analysis.sv.SwedishLightStemFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_svlgtstem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.SwedishLightStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_thai" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.NGramTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.ThaiWordFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
|||
* <pre class="prettyprint" >
|
||||
* <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.TurkishLowerCaseFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
|
|
|
@ -222,12 +222,11 @@ public class FacetComponent extends SearchComponent
|
|||
sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT);
|
||||
sreq.params.remove(paramStart + FacetParams.FACET_OFFSET);
|
||||
|
||||
dff.initialLimit = dff.offset + dff.limit;
|
||||
|
||||
if(dff.sort.equals(FacetParams.FACET_SORT_COUNT) && dff.limit > 0) {
|
||||
// set the initial limit higher to increase accuracy
|
||||
dff.initialLimit = dff.offset + dff.limit;
|
||||
dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
|
||||
} else {
|
||||
dff.initialLimit = dff.limit;
|
||||
}
|
||||
|
||||
// Currently this is for testing only and allows overriding of the
|
||||
|
|
|
@ -137,6 +137,7 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2);
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index");
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2);
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",10, "facet.limit",1, "facet.sort","index");
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1);
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*");
|
||||
query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1);
|
||||
|
|
Loading…
Reference in New Issue