mirror of https://github.com/apache/lucene.git
SOLR-2015: add boolean attribute autoGeneratePhraseQueries to TextField
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@979049 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a5430edf83
commit
da7655c72a
|
@ -203,6 +203,15 @@ New Features
|
|||
http://wiki.apache.org/solr/SpatialSearch and the example. Refactored some items in Lucene spatial.
|
||||
Removed SpatialTileField as the underlying CartesianTier is broken beyond repair and is going to be moved. (gsingers)
|
||||
|
||||
* SOLR-2015: Add a boolean attribute autoGeneratePhraseQueries to TextField.
|
||||
autoGeneratePhraseQueries="true" (the default) causes the query parser to
|
||||
generate phrase queries if multiple tokens are generated from a single
|
||||
non-quoted analysis string. For example WordDelimiterFilter splitting text:pdp-11
|
||||
will cause the parser to generate text:"pdp 11" rather than (text:PDP OR text:11).
|
||||
Note that autoGeneratePhraseQueries="true" tends to not work well for non whitespace
|
||||
delimited languages. (yonik)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -213,8 +213,12 @@
|
|||
words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
|
||||
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
|
||||
Synonyms and stopwords are customized by external files, and stemming is enabled.
|
||||
The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
|
||||
form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
|
||||
to generate text:"pdp 11" rather than (text:PDP OR text:11).
|
||||
NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
|
||||
-->
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
|
||||
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
|
|
|
@ -46,13 +46,21 @@ import java.io.StringReader;
|
|||
* @version $Id$
|
||||
*/
|
||||
public class TextField extends FieldType {
|
||||
protected boolean autoGeneratePhraseQueries = true;
|
||||
|
||||
protected void init(IndexSchema schema, Map<String,String> args) {
|
||||
properties |= TOKENIZED;
|
||||
if (schema.getVersion()> 1.1f) properties &= ~OMIT_TF_POSITIONS;
|
||||
|
||||
String autoGeneratePhraseQueriesStr = args.remove("autoGeneratePhraseQueries");
|
||||
if (autoGeneratePhraseQueriesStr != null)
|
||||
autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr);
|
||||
super.init(schema, args);
|
||||
}
|
||||
|
||||
public boolean getAutoGeneratePhraseQueries() {
|
||||
return autoGeneratePhraseQueries;
|
||||
}
|
||||
|
||||
public SortField getSortField(SchemaField field, boolean reverse) {
|
||||
return getStringSort(field, reverse);
|
||||
}
|
||||
|
|
|
@ -142,11 +142,15 @@ public class SolrQueryParser extends QueryParser {
|
|||
return parser.subQuery(queryText, null).getQuery();
|
||||
}
|
||||
}
|
||||
//Intercept poly fields, as they get expanded by default to an OR clause of
|
||||
SchemaField sf = schema.getField(field);
|
||||
//TODO: is there anyway to avoid this instance of check?
|
||||
if (sf != null&& !(sf.getType() instanceof TextField)){//we have a poly field, deal with it specially by delegating to the FieldType
|
||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
if (sf != null) {
|
||||
FieldType ft = sf.getType();
|
||||
// delegate to type for everything except TextField
|
||||
if (ft instanceof TextField) {
|
||||
return super.getFieldQuery(field, queryText, quoted || ((TextField)ft).getAutoGeneratePhraseQueries());
|
||||
} else {
|
||||
return sf.getType().getFieldQuery(parser, sf, queryText);
|
||||
}
|
||||
}
|
||||
|
||||
// default to a normal field query
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.After;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
createIndex();
|
||||
}
|
||||
|
||||
public static void createIndex() {
|
||||
String v;
|
||||
v="how now brown cow";
|
||||
assertU(adoc("id","1", "text",v, "text_np",v));
|
||||
v="now cow";
|
||||
assertU(adoc("id","2", "text",v, "text_np",v));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPhrase() {
|
||||
// should generate a phrase of "now cow" and match only one doc
|
||||
assertQ(req("q","text:now-cow", "indent","true")
|
||||
,"//*[@numFound='1']"
|
||||
);
|
||||
// should generate a query of (now OR cow) and match both docs
|
||||
assertQ(req("q","text_np:now-cow", "indent","true")
|
||||
,"//*[@numFound='2']"
|
||||
);
|
||||
}
|
||||
|
||||
}
|
|
@ -145,6 +145,35 @@
|
|||
</fieldType>
|
||||
|
||||
|
||||
<!-- field type that doesn't generate phrases from unquoted multiple tokens per analysis unit -->
|
||||
<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false" >
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<fieldtype name="nametext" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldtype>
|
||||
|
@ -403,6 +432,8 @@
|
|||
<field name="weight" type="float" indexed="true" stored="true"/>
|
||||
<field name="bday" type="date" indexed="true" stored="true"/>
|
||||
|
||||
<field name="text_np" type="text_np" indexed="true" stored="false"/>
|
||||
|
||||
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
|
||||
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
|
||||
|
||||
|
|
Loading…
Reference in New Issue