mirror of https://github.com/apache/lucene.git
SOLR-2703: Added surround query parser support
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1166185 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1d56ff5bc3
commit
80359e0812
|
@ -168,7 +168,8 @@ New Features
|
|||
* SOLR-2656: realtime-get, efficiently retrieves the latest stored fields for specified
|
||||
documents, even if they are not yet searchable (i.e. without reopening a searcher)
|
||||
(yonik)
|
||||
|
||||
|
||||
* SOLR-2703: Added support for Lucene's "surround" query parser. (Simon Rosenthal, ehatcher)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
|
|
@ -41,6 +41,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin {
|
|||
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
|
||||
SpatialBoxQParserPlugin.NAME, SpatialBoxQParserPlugin.class,
|
||||
JoinQParserPlugin.NAME, JoinQParserPlugin.class,
|
||||
SurroundQParserPlugin.NAME, SurroundQParserPlugin.class,
|
||||
};
|
||||
|
||||
/** return a {@link QParser} */
|
||||
|
|
|
@ -0,0 +1,116 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.SnapPuller;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.lucene.queryparser.surround.parser.*;
|
||||
import org.apache.lucene.queryparser.surround.query.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* Plugin for lucene/contrib Surround query parser, bringing SpanQuery support
|
||||
* to Solr
|
||||
*
|
||||
* <queryParser name="surround"
|
||||
* class="org.apache.solr.search.SurroundQParserPlugin" />
|
||||
*
|
||||
* Examples of query syntax can be found in modules/queryparser/docs/surround
|
||||
*
|
||||
* Note that the query string is not analyzed in any way
|
||||
*
|
||||
* @since 4.0
|
||||
*/
|
||||
|
||||
public class SurroundQParserPlugin extends QParserPlugin {
|
||||
public static String NAME = "surround";
|
||||
|
||||
@Override
|
||||
public void init(NamedList args) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public QParser createParser(String qstr, SolrParams localParams,
|
||||
SolrParams params, SolrQueryRequest req) {
|
||||
return new SurroundQParser(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class SurroundQParser extends QParser {
|
||||
protected static final Logger LOG = LoggerFactory .getLogger(SurroundQParser.class);
|
||||
static final int DEFMAXBASICQUERIES = 1000;
|
||||
static final String MBQParam = "maxBasicQueries";
|
||||
|
||||
String sortStr;
|
||||
SolrQueryParser lparser;
|
||||
int maxBasicQueries;
|
||||
|
||||
public SurroundQParser(String qstr, SolrParams localParams,
|
||||
SolrParams params, SolrQueryRequest req) {
|
||||
super(qstr, localParams, params, req);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query parse()
|
||||
throws org.apache.lucene.queryparser.classic.ParseException {
|
||||
SrndQuery sq;
|
||||
String qstr = getString();
|
||||
if (qstr == null)
|
||||
return null;
|
||||
String mbqparam = getParam(MBQParam);
|
||||
if (mbqparam == null) {
|
||||
this.maxBasicQueries = DEFMAXBASICQUERIES;
|
||||
} else {
|
||||
try {
|
||||
this.maxBasicQueries = Integer.parseInt(mbqparam);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Couldn't parse maxBasicQueries value " + mbqparam +", using default of 1000");
|
||||
this.maxBasicQueries = DEFMAXBASICQUERIES;
|
||||
}
|
||||
}
|
||||
// ugh .. colliding ParseExceptions
|
||||
try {
|
||||
sq = org.apache.lucene.queryparser.surround.parser.QueryParser
|
||||
.parse(qstr);
|
||||
} catch (org.apache.lucene.queryparser.surround.parser.ParseException pe) {
|
||||
throw new org.apache.lucene.queryparser.classic.ParseException(
|
||||
pe.getMessage());
|
||||
}
|
||||
|
||||
// so what do we do with the SrndQuery ??
|
||||
// processing based on example in LIA Ch 9
|
||||
|
||||
String defaultField = getParam(CommonParams.DF);
|
||||
if (defaultField == null) {
|
||||
defaultField = getReq().getSchema().getDefaultSearchFieldName();
|
||||
}
|
||||
|
||||
BasicQueryFactory bqFactory = new BasicQueryFactory(this.maxBasicQueries);
|
||||
Query lquery = sq.makeLuceneQueryField(defaultField, bqFactory);
|
||||
return lquery;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,613 @@
|
|||
<?xml version="1.0" ?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<!-- The Solr schema file. This file should be named "schema.xml" and
|
||||
should be located where the classloader for the Solr webapp can find it.
|
||||
|
||||
This schema is used for testing, and as such has everything and the
|
||||
kitchen sink thrown in. See example/solr/conf/schema.xml for a
|
||||
more concise example.
|
||||
|
||||
$Id: schema12.xml 1149050 2011-07-21 07:09:27Z koji $
|
||||
$Source: /cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
|
||||
$Name: $
|
||||
-->
|
||||
|
||||
<schema name="test" version="1.4">
|
||||
<types>
|
||||
|
||||
<!-- field type definitions... note that the "name" attribute is
|
||||
just a label to be used by field definitions. The "class"
|
||||
attribute and any other attributes determine the real type and
|
||||
behavior of the fieldtype.
|
||||
-->
|
||||
|
||||
<!-- numeric field types that store and index the text
|
||||
value verbatim (and hence don't sort correctly or support range queries.)
|
||||
These are provided more for backward compatability, allowing one
|
||||
to create a schema that matches an existing lucene index.
|
||||
-->
|
||||
<fieldType name="pint" class="solr.IntField"/>
|
||||
<fieldType name="plong" class="solr.LongField"/>
|
||||
<fieldtype name="pfloat" class="solr.FloatField"/>
|
||||
<fieldType name="pdouble" class="solr.DoubleField"/>
|
||||
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
<!-- numeric field types that manipulate the value into
|
||||
a string value that isn't human readable in it's internal form,
|
||||
but sorts correctly and supports range queries.
|
||||
|
||||
If sortMissingLast="true" then a sort on this field will cause documents
|
||||
without the field to come after documents with the field,
|
||||
regardless of the requested sort order.
|
||||
If sortMissingFirst="true" then a sort on this field will cause documents
|
||||
without the field to come before documents with the field,
|
||||
regardless of the requested sort order.
|
||||
If sortMissingLast="false" and sortMissingFirst="false" (the default),
|
||||
then default lucene sorting will be used which places docs without the field
|
||||
first in an ascending sort and last in a descending sort.
|
||||
-->
|
||||
<fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true" />
|
||||
<fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
|
||||
<fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
|
||||
<fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
|
||||
|
||||
<!-- Field type demonstrating an Analyzer failure -->
|
||||
<fieldtype name="failtype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- Demonstrating ignoreCaseChange -->
|
||||
<fieldtype name="wdf_nocase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="wdf_preserve" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
||||
<!-- HighlitText optimizes storage for (long) columns which will be highlit -->
|
||||
<fieldtype name="highlittext" class="solr.TextField"/>
|
||||
|
||||
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
||||
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
|
||||
|
||||
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
|
||||
seconds part (.999) is optional.
|
||||
-->
|
||||
<fieldtype name="date" class="solr.TrieDateField" sortMissingLast="true"/>
|
||||
<fieldtype name="tdate" class="solr.TrieDateField" sortMissingLast="true" precisionStep="6"/>
|
||||
<fieldtype name="pdate" class="solr.DateField" sortMissingLast="true"/>
|
||||
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true" >
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- field type that doesn't generate phrases from unquoted multiple tokens per analysis unit -->
|
||||
<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldtype name="nametext" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="teststop" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
|
||||
<fieldtype name="lowertok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="keywordtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lettertok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="whitetok" class="solr.TextField">
|
||||
<analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="HTMLstandardtok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="HTMLwhitetok" class="solr.TextField">
|
||||
<analyzer>
|
||||
<charFilter class="solr.HTMLStripCharFilterFactory"/>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardtokfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="standardfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StandardFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lowerfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="patternreplacefilt" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
<filter class="solr.PatternReplaceFilterFactory"
|
||||
pattern="([^a-zA-Z])" replacement="_" replace="all"
|
||||
/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="porterfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="engporterfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="custengporterfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="stopfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="custstopfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
<fieldtype name="lengthfilt" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="numericsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.StopFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="protectedsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
||||
<!-- more flexible in matching skus, but more chance of a false match -->
|
||||
<fieldtype name="skutype1" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- less flexible in matching skus, but less chance of a false match -->
|
||||
<fieldtype name="skutype2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- less flexible in matching skus, but less chance of a false match -->
|
||||
<fieldtype name="syn" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- a text field with the stop filter only on the query analyzer
|
||||
-->
|
||||
<fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- A general minimally stemmed textfield -->
|
||||
<fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory" />
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishMinimalStemFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
|
||||
synonyms "better"
|
||||
-->
|
||||
<fieldtype name="dedup" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory"
|
||||
synonyms="synonyms.txt" expand="true" />
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldtype name="unstored" class="solr.StrField" indexed="true" stored="false"/>
|
||||
|
||||
|
||||
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<fieldType name="uuid" class="solr.UUIDField" />
|
||||
<fieldType name="ignored" class="solr.StrField" indexed="false" stored="false" />
|
||||
|
||||
|
||||
<fieldType name="random" class="solr.RandomSortField" indexed="true" />
|
||||
|
||||
<!-- Poly field -->
|
||||
<fieldType name="xy" class="solr.PointType" dimension="2" subFieldType="double"/>
|
||||
<fieldType name="xyd" class="solr.PointType" dimension="2" subFieldSuffix="*_d"/>
|
||||
<fieldtype name="geohash" class="solr.GeoHashField"/>
|
||||
|
||||
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
|
||||
<!-- A specialized field for geospatial search. If indexed, this fieldType must not be multivalued. -->
|
||||
<fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
|
||||
|
||||
</types>
|
||||
|
||||
|
||||
<fields>
|
||||
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
|
||||
<field name="signatureField" type="string" indexed="true" stored="false"/>
|
||||
<field name="uuid" type="uuid" stored="true" />
|
||||
<field name="name" type="nametext" indexed="true" stored="true"/>
|
||||
<field name="text" type="textgen" indexed="true" stored="false"/>
|
||||
<field name="subject" type="text" indexed="true" stored="true"/>
|
||||
<field name="title" type="nametext" indexed="true" stored="true"/>
|
||||
<field name="weight" type="float" indexed="true" stored="true"/>
|
||||
<field name="bday" type="date" indexed="true" stored="true"/>
|
||||
|
||||
<field name="text_np" type="text_np" indexed="true" stored="false"/>
|
||||
|
||||
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
|
||||
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
|
||||
|
||||
<field name="syn" type="syn" indexed="true" stored="true"/>
|
||||
|
||||
<!-- to test property inheritance and overriding -->
|
||||
<field name="shouldbeunstored" type="unstored" />
|
||||
<field name="shouldbestored" type="unstored" stored="true"/>
|
||||
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
|
||||
|
||||
|
||||
<!-- test different combinations of indexed and stored -->
|
||||
<field name="bind" type="boolean" indexed="true" stored="false"/>
|
||||
<field name="bsto" type="boolean" indexed="false" stored="true"/>
|
||||
<field name="bindsto" type="boolean" indexed="true" stored="true"/>
|
||||
<field name="isto" type="int" indexed="false" stored="true"/>
|
||||
<field name="iind" type="int" indexed="true" stored="false"/>
|
||||
<field name="ssto" type="string" indexed="false" stored="true"/>
|
||||
<field name="sind" type="string" indexed="true" stored="false"/>
|
||||
<field name="sindsto" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<!-- test combinations of term vector settings -->
|
||||
<field name="test_basictv" type="text" termVectors="true"/>
|
||||
<field name="test_notv" type="text" termVectors="false"/>
|
||||
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
|
||||
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
|
||||
<field name="test_posofftv" type="text" termVectors="true"
|
||||
termPositions="true" termOffsets="true"/>
|
||||
|
||||
<!-- test highlit field settings -->
|
||||
<field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
|
||||
<field name="test_hlt_off" type="highlittext" indexed="true" compressed="false"/>
|
||||
|
||||
<!-- fields to test individual tokenizers and tokenfilters -->
|
||||
<field name="teststop" type="teststop" indexed="true" stored="true"/>
|
||||
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
|
||||
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
|
||||
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
|
||||
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
|
||||
<field name="lettertok" type="lettertok" indexed="true" stored="true"/>
|
||||
<field name="whitetok" type="whitetok" indexed="true" stored="true"/>
|
||||
<field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
|
||||
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
|
||||
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
|
||||
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
|
||||
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
|
||||
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
|
||||
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
|
||||
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
|
||||
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
|
||||
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
|
||||
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
|
||||
<field name="dedup" type="dedup" indexed="true" stored="true"/>
|
||||
<field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
|
||||
<field name="wdf_preserve" type="wdf_preserve" indexed="true" stored="true"/>
|
||||
|
||||
<field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
|
||||
|
||||
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
|
||||
|
||||
<field name="subword" type="subword" indexed="true" stored="true"/>
|
||||
<field name="numericsubword" type="numericsubword" indexed="true" stored="true"/>
|
||||
<field name="protectedsubword" type="protectedsubword" indexed="true" stored="true"/>
|
||||
|
||||
<field name="sku1" type="skutype1" indexed="true" stored="true"/>
|
||||
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
|
||||
|
||||
<field name="textgap" type="textgap" indexed="true" stored="true"/>
|
||||
|
||||
<!--
|
||||
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW"/>
|
||||
-->
|
||||
<field name="timestamp" type="date" indexed="true" stored="true"/>
|
||||
|
||||
<!-- Test a point field for distances -->
|
||||
<field name="point" type="xy" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="pointD" type="xyd" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="point_hash" type="geohash" indexed="true" stored="true" multiValued="false"/>
|
||||
<field name="store" type="location" indexed="true" stored="true"/>
|
||||
|
||||
<!-- to test uniq fields -->
|
||||
<field name="uniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="uniq2" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="uniq3" type="string" indexed="true" stored="true"/>
|
||||
<field name="nouniq" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
|
||||
|
||||
<dynamicField name="*_si" type="sint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_sl" type="slong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_sf" type="sfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_sd" type="sdouble" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
<dynamicField name="tv_mv_*" type="text" indexed="true" stored="true" multiValued="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
|
||||
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
|
||||
|
||||
<dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_is" type="int" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
|
||||
<!-- :TODO: why are these identical?!?!?! -->
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
|
||||
<!-- some trie-coded dynamic fields for faster range queries -->
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_pi" type="pint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pf" type="pfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pl" type="plong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pd" type="pdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_pdt" type="pdate" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||
<dynamicField name="attr_*" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="random_*" type="random" />
|
||||
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>text</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
<copyField source="title" dest="title_stemmed"/>
|
||||
<copyField source="title" dest="title_lettertok"/>
|
||||
|
||||
<copyField source="title" dest="text"/>
|
||||
<copyField source="subject" dest="text"/>
|
||||
|
||||
</schema>
|
|
@ -0,0 +1,88 @@
|
|||
|
||||
package org.apache.solr.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
public class TestSurroundQueryParser extends AbstractSolrTestCase {
|
||||
|
||||
@Override
|
||||
public String getSchemaFile() { return "schemasurround.xml"; }
|
||||
@Override
|
||||
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||
// public String getCoreName() { return "collection1"; }
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
public void testQueryParser() {
|
||||
String v = "a b c d e a b c f g h i j k l m l k j z z z";
|
||||
assertU(adoc("id","1", "text",v, "text_np",v));
|
||||
|
||||
v="abc abxy cde efg ef e ";
|
||||
assertU(adoc("id","2", "text",v, "text_np",v));
|
||||
|
||||
v="1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 1001 1002 1003 1004 1005 1006 1007 1008 1009";
|
||||
assertU(adoc("id","3", "text",v, "text_np",v));
|
||||
assertU(commit());
|
||||
|
||||
|
||||
// run through a series of syntax tests, not exhaustive yet
|
||||
String localP = "{!surround df=text}";
|
||||
String t1;
|
||||
|
||||
t1 = localP+"1 N 2";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='1']");
|
||||
// but ordered search should fail
|
||||
t1 = localP +"2 W 1";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='0']");
|
||||
|
||||
// alternate syntax
|
||||
t1 = localP + "3n(a,e)";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='1']");
|
||||
|
||||
// wildcards
|
||||
t1 =localP + "100* w 20";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='0']");
|
||||
t1 =localP + "100* n 20";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='1']");
|
||||
|
||||
// nested
|
||||
t1 = localP + "(1003 2n 1001) 3N 1006";
|
||||
assertQ(req("q", t1, "indent","true")
|
||||
,"//*[@numFound='1']");
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue