mirror of https://github.com/apache/lucene.git
SOLR-2438, allow an analysis chain to be created for multiterm query terms or synthesize one if not defined explicitly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206229 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6870592252
commit
098371446a
|
@ -290,7 +290,6 @@ public abstract class QueryParserBase {
|
||||||
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
|
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @see #setLowercaseExpandedTerms(boolean)
|
* @see #setLowercaseExpandedTerms(boolean)
|
||||||
*/
|
*/
|
||||||
|
@ -778,14 +777,21 @@ public abstract class QueryParserBase {
|
||||||
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
|
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
private BytesRef analyzeRangePart(String field, String part) {
|
// TODO: Should this be protected instead?
|
||||||
|
private BytesRef analyzeMultitermTerm(String field, String part) {
|
||||||
|
return analyzeMultitermTerm(field, part, analyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
|
||||||
TokenStream source;
|
TokenStream source;
|
||||||
|
|
||||||
|
if (analyzerIn == null) analyzerIn = analyzer;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
source = analyzer.tokenStream(field, new StringReader(part));
|
source = analyzerIn.tokenStream(field, new StringReader(part));
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
@ -793,10 +799,10 @@ public abstract class QueryParserBase {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
if (source.incrementToken())
|
if (source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("error analyzing range part: " + part, e);
|
throw new RuntimeException("error analyzing range part: " + part, e);
|
||||||
}
|
}
|
||||||
|
@ -805,7 +811,7 @@ public abstract class QueryParserBase {
|
||||||
source.end();
|
source.end();
|
||||||
source.close();
|
source.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e);
|
throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return BytesRef.deepCopyOf(bytes);
|
return BytesRef.deepCopyOf(bytes);
|
||||||
|
@ -827,13 +833,13 @@ public abstract class QueryParserBase {
|
||||||
if (part1 == null) {
|
if (part1 == null) {
|
||||||
start = null;
|
start = null;
|
||||||
} else {
|
} else {
|
||||||
start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1);
|
start = analyzeRangeTerms ? analyzeMultitermTerm(field, part1) : new BytesRef(part1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (part2 == null) {
|
if (part2 == null) {
|
||||||
end = null;
|
end = null;
|
||||||
} else {
|
} else {
|
||||||
end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2);
|
end = analyzeRangeTerms ? analyzeMultitermTerm(field, part2) : new BytesRef(part2);
|
||||||
}
|
}
|
||||||
|
|
||||||
final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
|
final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive);
|
||||||
|
|
|
@ -188,6 +188,11 @@ New Features
|
||||||
|
|
||||||
* SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types
|
* SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types
|
||||||
(Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson)
|
(Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson)
|
||||||
|
|
||||||
|
* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
|
||||||
|
a complete analysis chain for multiterm queries.
|
||||||
|
(Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
|
||||||
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -383,6 +388,11 @@ New Features
|
||||||
* SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
|
* SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update
|
||||||
format (shalin)
|
format (shalin)
|
||||||
|
|
||||||
|
* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify
|
||||||
|
a complete analysis chain for multiterm queries.
|
||||||
|
(Pete Sturge Erick Erickson, Mentoring from Seeley and Muir)
|
||||||
|
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin)
|
* SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin)
|
||||||
|
|
|
@ -48,13 +48,15 @@ public abstract class FieldProperties {
|
||||||
|
|
||||||
protected final static int REQUIRED = 0x00001000;
|
protected final static int REQUIRED = 0x00001000;
|
||||||
protected final static int OMIT_POSITIONS = 0x00002000;
|
protected final static int OMIT_POSITIONS = 0x00002000;
|
||||||
|
protected final static int LEGACY_MULTITERM = 0x00004000;
|
||||||
|
|
||||||
static final String[] propertyNames = {
|
static final String[] propertyNames = {
|
||||||
"indexed", "tokenized", "stored",
|
"indexed", "tokenized", "stored",
|
||||||
"binary", "omitNorms", "omitTermFreqAndPositions",
|
"binary", "omitNorms", "omitTermFreqAndPositions",
|
||||||
"termVectors", "termPositions", "termOffsets",
|
"termVectors", "termPositions", "termOffsets",
|
||||||
"multiValued",
|
"multiValued",
|
||||||
"sortMissingFirst","sortMissingLast","required", "omitPositions"
|
"sortMissingFirst","sortMissingLast","required", "omitPositions" ,
|
||||||
|
"legacyMultiTerm"
|
||||||
};
|
};
|
||||||
|
|
||||||
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
|
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
|
||||||
|
|
|
@ -428,6 +428,21 @@ public abstract class FieldType extends FieldProperties {
|
||||||
*/
|
*/
|
||||||
protected Analyzer queryAnalyzer=analyzer;
|
protected Analyzer queryAnalyzer=analyzer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Analyzer set by schema for text types to use when searching fields
|
||||||
|
* of this type, subclasses can set analyzer themselves or override
|
||||||
|
* getAnalyzer()
|
||||||
|
* This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It
|
||||||
|
* assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and
|
||||||
|
* lowercasing filters, and charfilters.
|
||||||
|
*
|
||||||
|
* If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file
|
||||||
|
* @see #getMultiTermAnalyzer
|
||||||
|
* @see #setMultiTermAnalyzer
|
||||||
|
*/
|
||||||
|
protected Analyzer multiTermAnalyzer=null;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the Analyzer to be used when indexing fields of this type.
|
* Returns the Analyzer to be used when indexing fields of this type.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -450,6 +465,17 @@ public abstract class FieldType extends FieldProperties {
|
||||||
return queryAnalyzer;
|
return queryAnalyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified.
|
||||||
|
* <p>
|
||||||
|
* This method may be called many times, at any time.
|
||||||
|
* </p>
|
||||||
|
* @see #getAnalyzer
|
||||||
|
*/
|
||||||
|
public Analyzer getMultiTermAnalyzer() {
|
||||||
|
return multiTermAnalyzer;
|
||||||
|
}
|
||||||
|
|
||||||
private final String analyzerError =
|
private final String analyzerError =
|
||||||
"FieldType: " + this.getClass().getSimpleName() +
|
"FieldType: " + this.getClass().getSimpleName() +
|
||||||
" (" + typeName + ") does not support specifying an analyzer";
|
" (" + typeName + ") does not support specifying an analyzer";
|
||||||
|
@ -498,6 +524,28 @@ public abstract class FieldType extends FieldProperties {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the Analyzer to be used when querying fields of this type.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
*
|
||||||
|
* Subclasses that override this method need to ensure the behavior
|
||||||
|
* of the analyzer is consistent with the implementation of toInternal.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* @see #toInternal
|
||||||
|
* @see #setAnalyzer
|
||||||
|
* @see #getQueryAnalyzer
|
||||||
|
*/
|
||||||
|
public void setMultiTermAnalyzer(Analyzer analyzer) {
|
||||||
|
SolrException e = new SolrException
|
||||||
|
(ErrorCode.SERVER_ERROR,
|
||||||
|
"FieldType: " + this.getClass().getSimpleName() +
|
||||||
|
" (" + typeName + ") does not support specifying an analyzer");
|
||||||
|
SolrException.logOnce(log,null,e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
/** @lucene.internal */
|
/** @lucene.internal */
|
||||||
protected Similarity similarity;
|
protected Similarity similarity;
|
||||||
|
|
||||||
|
|
|
@ -18,19 +18,15 @@
|
||||||
package org.apache.solr.schema;
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.apache.solr.analysis.*;
|
||||||
import org.apache.solr.common.ResourceLoader;
|
import org.apache.solr.common.ResourceLoader;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.SolrParams;
|
|
||||||
import org.apache.solr.common.util.DOMUtil;
|
import org.apache.solr.common.util.DOMUtil;
|
||||||
import org.apache.solr.common.util.NamedList;
|
|
||||||
import org.apache.solr.core.Config;
|
import org.apache.solr.core.Config;
|
||||||
import org.apache.solr.core.SolrResourceLoader;
|
import org.apache.solr.core.SolrResourceLoader;
|
||||||
import org.apache.solr.analysis.CharFilterFactory;
|
|
||||||
import org.apache.solr.analysis.TokenFilterFactory;
|
|
||||||
import org.apache.solr.analysis.TokenizerChain;
|
|
||||||
import org.apache.solr.analysis.TokenizerFactory;
|
|
||||||
import org.apache.solr.util.plugin.AbstractPluginLoader;
|
import org.apache.solr.util.plugin.AbstractPluginLoader;
|
||||||
import org.w3c.dom.*;
|
import org.w3c.dom.*;
|
||||||
|
|
||||||
|
@ -88,12 +84,16 @@ public final class FieldTypePluginLoader
|
||||||
String expression = "./analyzer[@type='query']";
|
String expression = "./analyzer[@type='query']";
|
||||||
Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
||||||
Analyzer queryAnalyzer = readAnalyzer(anode);
|
Analyzer queryAnalyzer = readAnalyzer(anode);
|
||||||
|
|
||||||
|
expression = "./analyzer[@type='multiterm']";
|
||||||
|
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
||||||
|
Analyzer multiAnalyzer = readAnalyzer(anode);
|
||||||
|
|
||||||
// An analyzer without a type specified, or with type="index"
|
// An analyzer without a type specified, or with type="index"
|
||||||
expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
|
expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
|
||||||
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
||||||
Analyzer analyzer = readAnalyzer(anode);
|
Analyzer analyzer = readAnalyzer(anode);
|
||||||
|
|
||||||
// a custom similarity[Factory]
|
// a custom similarity[Factory]
|
||||||
expression = "./similarity";
|
expression = "./similarity";
|
||||||
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
|
||||||
|
@ -101,9 +101,16 @@ public final class FieldTypePluginLoader
|
||||||
|
|
||||||
if (queryAnalyzer==null) queryAnalyzer=analyzer;
|
if (queryAnalyzer==null) queryAnalyzer=analyzer;
|
||||||
if (analyzer==null) analyzer=queryAnalyzer;
|
if (analyzer==null) analyzer=queryAnalyzer;
|
||||||
|
if (multiAnalyzer == null) {
|
||||||
|
Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36);
|
||||||
|
legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch :
|
||||||
|
Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null));
|
||||||
|
multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch);
|
||||||
|
}
|
||||||
if (analyzer!=null) {
|
if (analyzer!=null) {
|
||||||
ft.setAnalyzer(analyzer);
|
ft.setAnalyzer(analyzer);
|
||||||
ft.setQueryAnalyzer(queryAnalyzer);
|
ft.setQueryAnalyzer(queryAnalyzer);
|
||||||
|
ft.setMultiTermAnalyzer(multiAnalyzer);
|
||||||
}
|
}
|
||||||
if (similarity!=null) {
|
if (similarity!=null) {
|
||||||
ft.setSimilarity(similarity);
|
ft.setSimilarity(similarity);
|
||||||
|
@ -130,6 +137,42 @@ public final class FieldTypePluginLoader
|
||||||
return fieldTypes.put( name, plugin );
|
return fieldTypes.put( name, plugin );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The point here is that, if no multitermanalyzer was specified in the schema file, do one of several things:
|
||||||
|
// 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
|
||||||
|
// lowercase filters and asciifoldingfilter.
|
||||||
|
// 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
|
||||||
|
// Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
|
||||||
|
|
||||||
|
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) {
|
||||||
|
if (queryAnalyzer == null) return null;
|
||||||
|
|
||||||
|
if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) {
|
||||||
|
return new KeywordAnalyzer();
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenizerChain tc = (TokenizerChain) queryAnalyzer;
|
||||||
|
|
||||||
|
// we know it'll never be longer than this unless the code below is explicitly changed
|
||||||
|
TokenFilterFactory[] filters = new TokenFilterFactory[2];
|
||||||
|
int idx = 0;
|
||||||
|
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
|
||||||
|
if (factory instanceof LowerCaseFilterFactory) {
|
||||||
|
filters[idx] = new LowerCaseFilterFactory();
|
||||||
|
filters[idx++].init(factory.getArgs());
|
||||||
|
}
|
||||||
|
if (factory instanceof ASCIIFoldingFilterFactory) {
|
||||||
|
filters[idx] = new ASCIIFoldingFilterFactory();
|
||||||
|
filters[idx++].init(factory.getArgs());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory();
|
||||||
|
white.init(tc.getTokenizerFactory().getArgs());
|
||||||
|
|
||||||
|
return new TokenizerChain(tc.getCharFilterFactories(),
|
||||||
|
white,
|
||||||
|
Arrays.copyOfRange(filters, 0, idx));
|
||||||
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
|
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
|
||||||
//
|
//
|
||||||
|
|
|
@ -97,6 +97,9 @@ public final class SchemaField extends FieldProperties {
|
||||||
boolean isTokenized() { return (properties & TOKENIZED)!=0; }
|
boolean isTokenized() { return (properties & TOKENIZED)!=0; }
|
||||||
boolean isBinary() { return (properties & BINARY)!=0; }
|
boolean isBinary() { return (properties & BINARY)!=0; }
|
||||||
|
|
||||||
|
boolean legacyMultiTerm() {
|
||||||
|
return (properties & LEGACY_MULTITERM) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
public IndexableField createField(Object val, float boost) {
|
public IndexableField createField(Object val, float boost) {
|
||||||
return type.createField(this,val,boost);
|
return type.createField(this,val,boost);
|
||||||
|
|
|
@ -98,6 +98,11 @@ public class TextField extends FieldType {
|
||||||
this.queryAnalyzer = analyzer;
|
this.queryAnalyzer = analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setMultiTermAnalyzer(Analyzer analyzer) {
|
||||||
|
this.multiTermAnalyzer = analyzer;
|
||||||
|
}
|
||||||
|
|
||||||
static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
|
static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) {
|
||||||
int phraseSlop = 0;
|
int phraseSlop = 0;
|
||||||
boolean enablePositionIncrements = true;
|
boolean enablePositionIncrements = true;
|
||||||
|
|
|
@ -26,7 +26,6 @@ import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||||
import org.apache.lucene.util.automaton.BasicOperations;
|
import org.apache.lucene.util.automaton.BasicOperations;
|
||||||
|
@ -71,7 +70,6 @@ public class SolrQueryParser extends QueryParser {
|
||||||
this.schema = parser.getReq().getSchema();
|
this.schema = parser.getReq().getSchema();
|
||||||
this.parser = parser;
|
this.parser = parser;
|
||||||
this.defaultField = defaultField;
|
this.defaultField = defaultField;
|
||||||
setLowercaseExpandedTerms(false);
|
|
||||||
setEnablePositionIncrements(true);
|
setEnablePositionIncrements(true);
|
||||||
checkAllowLeadingWildcards();
|
checkAllowLeadingWildcards();
|
||||||
}
|
}
|
||||||
|
@ -106,6 +104,14 @@ public class SolrQueryParser extends QueryParser {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) {
|
||||||
|
if (part == null) return part;
|
||||||
|
|
||||||
|
SchemaField sf = schema.getFieldOrNull((field));
|
||||||
|
if (sf == null || ! (sf.getType() instanceof TextField)) return part;
|
||||||
|
return analyzeMultitermTerm(field, part, analyzer).utf8ToString();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
|
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
|
||||||
checkNullField(field);
|
checkNullField(field);
|
||||||
|
@ -137,6 +143,8 @@ public class SolrQueryParser extends QueryParser {
|
||||||
@Override
|
@Override
|
||||||
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
|
protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
|
||||||
checkNullField(field);
|
checkNullField(field);
|
||||||
|
part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer());
|
||||||
|
part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer());
|
||||||
SchemaField sf = schema.getField(field);
|
SchemaField sf = schema.getField(field);
|
||||||
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive);
|
||||||
}
|
}
|
||||||
|
@ -144,9 +152,8 @@ public class SolrQueryParser extends QueryParser {
|
||||||
@Override
|
@Override
|
||||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
||||||
checkNullField(field);
|
checkNullField(field);
|
||||||
if (getLowercaseExpandedTerms()) {
|
|
||||||
termStr = termStr.toLowerCase();
|
termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: toInternal() won't necessarily work on partial
|
// TODO: toInternal() won't necessarily work on partial
|
||||||
// values, so it looks like we need a getPrefix() function
|
// values, so it looks like we need a getPrefix() function
|
||||||
|
@ -162,14 +169,13 @@ public class SolrQueryParser extends QueryParser {
|
||||||
PrefixQuery prefixQuery = new PrefixQuery(t);
|
PrefixQuery prefixQuery = new PrefixQuery(t);
|
||||||
return prefixQuery;
|
return prefixQuery;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||||
// *:* -> MatchAllDocsQuery
|
// *:* -> MatchAllDocsQuery
|
||||||
if ("*".equals(field) && "*".equals(termStr)) {
|
if ("*".equals(field) && "*".equals(termStr)) {
|
||||||
return newMatchAllDocsQuery();
|
return newMatchAllDocsQuery();
|
||||||
}
|
}
|
||||||
|
termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
|
||||||
// can we use reversed wildcards in this field?
|
// can we use reversed wildcards in this field?
|
||||||
String type = schema.getFieldType(field).getTypeName();
|
String type = schema.getFieldType(field).getTypeName();
|
||||||
ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
|
ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
|
||||||
|
@ -213,4 +219,11 @@ public class SolrQueryParser extends QueryParser {
|
||||||
}
|
}
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected Query getRegexpQuery(String field, String termStr) throws ParseException
|
||||||
|
{
|
||||||
|
termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer());
|
||||||
|
return super.getRegexpQuery(field, termStr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,145 @@
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
|
||||||
|
|
||||||
|
<fieldType name="text" class="solr.TextField" multiValued="false">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.PatternTokenizerFactory" pattern="\s+"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_multi" class="solr.TextField" multiValued="true">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.TrimFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="multiterm"> <!-- Intentionally different to test that these are kept distinct -->
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_multi_bad" class="solr.TextField" multiValued="false">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.TrimFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="multiterm"> <!-- Intentionally different to test that these are kept distinct -->
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||||
|
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
|
<fieldType name="text_ws" class="solr.TextField" multiValued="true">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_rev" class="solr.TextField" legacyMultiTerm="false">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
|
||||||
|
maxPosAsterisk="1" maxPosQuestion="2" maxFractionAsterisk="0.99"
|
||||||
|
minTrailing="1"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_lower_tokenizer" class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_charfilter" class="solr.TextField" multiValued="false">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="text_oldstyle" class="solr.TextField" multiValued="false" legacyMultiTerm="true">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.TrimFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="byte" class="solr.ByteField" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldType name="short" class="solr.ShortField" omitNorms="true" positionIncrementGap="0"/>
|
||||||
|
<fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
|
||||||
|
<fieldtype name="date" class="solr.TrieDateField" precisionStep="0"/>
|
||||||
|
</types>
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="string" indexed="true" stored="true" required="true"/>
|
||||||
|
<field name="int_f" type="int"/>
|
||||||
|
<field name="float_f" type="float"/>
|
||||||
|
<field name="long_f" type="long"/>
|
||||||
|
<field name="double_f" type="double"/>
|
||||||
|
<field name="byte_f" type="byte"/>
|
||||||
|
<field name="short_f" type="short"/>
|
||||||
|
<field name="bool_f" type="boolean"/>
|
||||||
|
<field name="date_f" type="date"/>
|
||||||
|
|
||||||
|
<field name="content" type="text" indexed="true" stored="true"/>
|
||||||
|
<field name="content_ws" type="text_ws" indexed="true" stored="true"/>
|
||||||
|
<field name="content_rev" type="text_rev" indexed="true" stored="true"/>
|
||||||
|
<field name="content_multi" type="text_multi" indexed="true" stored="true"/>
|
||||||
|
<field name="content_lower_token" type="text_multi" indexed="true" stored="true"/>
|
||||||
|
<field name="content_oldstyle" type="text_oldstyle" indexed="true" stored="true"/>
|
||||||
|
<field name="content_charfilter" type="text_charfilter" indexed="true" stored="true"/>
|
||||||
|
<field name="content_multi_bad" type="text_multi_bad" indexed="true" stored="true"/>
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>content</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
|
||||||
|
</schema>
|
|
@ -0,0 +1,87 @@
|
||||||
|
package org.apache.solr.schema;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.analysis.*;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class MultiTermTest extends SolrTestCaseJ4 {
|
||||||
|
public String getCoreName() {
|
||||||
|
return "basic";
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeTests() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml", "schema-folding.xml");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiFound() {
|
||||||
|
SchemaField field = h.getCore().getSchema().getField("content_multi");
|
||||||
|
Analyzer analyzer = field.getType().getMultiTermAnalyzer();
|
||||||
|
assertTrue(analyzer instanceof TokenizerChain);
|
||||||
|
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
|
||||||
|
TokenizerChain tc = (TokenizerChain) analyzer;
|
||||||
|
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
|
||||||
|
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = field.getType().getAnalyzer();
|
||||||
|
assertTrue(analyzer instanceof TokenizerChain);
|
||||||
|
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
|
||||||
|
tc = (TokenizerChain) analyzer;
|
||||||
|
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
|
||||||
|
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(tc.getCharFilterFactories().length == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testQueryCopiedToMulti() {
|
||||||
|
SchemaField field = h.getCore().getSchema().getField("content_charfilter");
|
||||||
|
Analyzer analyzer = field.getType().getMultiTermAnalyzer();
|
||||||
|
assertTrue(analyzer instanceof TokenizerChain);
|
||||||
|
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
|
||||||
|
TokenizerChain tc = (TokenizerChain) analyzer;
|
||||||
|
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
|
||||||
|
assertTrue(factory instanceof LowerCaseFilterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(tc.getCharFilterFactories().length == 1);
|
||||||
|
assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDefaultCopiedToMulti() {
|
||||||
|
SchemaField field = h.getCore().getSchema().getField("content_ws");
|
||||||
|
Analyzer analyzer = field.getType().getMultiTermAnalyzer();
|
||||||
|
assertTrue(analyzer instanceof TokenizerChain);
|
||||||
|
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
|
||||||
|
TokenizerChain tc = (TokenizerChain) analyzer;
|
||||||
|
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
|
||||||
|
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(tc.getCharFilterFactories().length == 0);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,231 @@
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestFoldingMultitermQuery extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
public String getCoreName() {
|
||||||
|
return "basic";
|
||||||
|
}
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeTests() throws Exception {
|
||||||
|
initCore("solrconfig-basic.xml", "schema-folding.xml");
|
||||||
|
IndexWriter iw;
|
||||||
|
|
||||||
|
String docs[] = {
|
||||||
|
"abcdefg1 finger",
|
||||||
|
"gangs hijklmn1",
|
||||||
|
"opqrstu1 zilly",
|
||||||
|
};
|
||||||
|
|
||||||
|
// prepare the index
|
||||||
|
for (int i = 0; i < docs.length; i++) {
|
||||||
|
String num = Integer.toString(i);
|
||||||
|
String boolVal = ((i % 2) == 0) ? "true" : "false";
|
||||||
|
assertU(adoc("id", num,
|
||||||
|
"int_f", num,
|
||||||
|
"float_f", num,
|
||||||
|
"long_f", num,
|
||||||
|
"double_f", num,
|
||||||
|
"byte_f", num,
|
||||||
|
"short_f", num,
|
||||||
|
"bool_f", boolVal,
|
||||||
|
"date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z",
|
||||||
|
"content", docs[i],
|
||||||
|
"content_ws", docs[i],
|
||||||
|
"content_rev", docs[i],
|
||||||
|
"content_multi", docs[i],
|
||||||
|
"content_lower_token", docs[i],
|
||||||
|
"content_oldstyle", docs[i],
|
||||||
|
"content_charfilter", docs[i],
|
||||||
|
"content_multi_bad", docs[i]
|
||||||
|
));
|
||||||
|
}
|
||||||
|
assertU(optimize());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPrefixCaseAccentFolding() throws Exception {
|
||||||
|
String matchOneDocPrefixUpper[][] = {
|
||||||
|
{"A*", "ÁB*", "ABÇ*"}, // these should find only doc 0
|
||||||
|
{"H*", "HÏ*", "HìJ*"}, // these should find only doc 1
|
||||||
|
{"O*", "ÖP*", "OPQ*"}, // these should find only doc 2
|
||||||
|
};
|
||||||
|
|
||||||
|
String matchRevPrefixUpper[][] = {
|
||||||
|
{"*Ğ1", "*DEfG1", "*EfG1"},
|
||||||
|
{"*N1", "*LmŊ1", "*MÑ1"},
|
||||||
|
{"*Ǖ1", "*sTu1", "*RŠTU1"}
|
||||||
|
};
|
||||||
|
|
||||||
|
// test the prefix queries find only one doc where the query is uppercased. Must go through query parser here!
|
||||||
|
for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) {
|
||||||
|
for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) {
|
||||||
|
String me = matchOneDocPrefixUpper[idx][jdx];
|
||||||
|
assertQ(req("q", "content:" + me),
|
||||||
|
"//*[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_ws:" + me),
|
||||||
|
"//*[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_multi:" + me),
|
||||||
|
"//*[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_lower_token:" + me),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) {
|
||||||
|
for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) {
|
||||||
|
String me = matchRevPrefixUpper[idx][jdx];
|
||||||
|
assertQ(req("q", "content_rev:" + me),
|
||||||
|
"//*[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// test the wildcard queries find only one doc where the query is uppercased and/or accented.
|
||||||
|
@Test
|
||||||
|
public void testWildcardCaseAccentFolding() throws Exception {
|
||||||
|
String matchOneDocWildUpper[][] = {
|
||||||
|
{"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"}, // these should find only doc 0
|
||||||
|
{"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"}, // these should find only doc 1
|
||||||
|
{"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"}, // these should find only doc 2
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) {
|
||||||
|
for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) {
|
||||||
|
String me = matchOneDocWildUpper[idx][jdx];
|
||||||
|
assertQ("Error with " + me, req("q", "content:" + me),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_ws:" + me),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_multi:" + me),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
assertQ(req("q", "content_lower_token:" + me),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='" + Integer.toString(idx) + "']");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go
|
||||||
|
// and update the documentation
|
||||||
|
@Test
|
||||||
|
public void testPhrase() {
|
||||||
|
assertQ(req("q", "content:\"silly ABCD*\""),
|
||||||
|
"//result[@numFound='0']");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the legacy behavior flag is honored
|
||||||
|
@Test
|
||||||
|
public void testLegacyBehavior() {
|
||||||
|
assertQ(req("q", "content_oldstyle:ABCD*"),
|
||||||
|
"//result[@numFound='0']");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWildcardRange() {
|
||||||
|
assertQ(req("q", "content:[* TO *]"),
|
||||||
|
"//result[@numFound='3']");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Does the char filter get correctly handled?
|
||||||
|
@Test
|
||||||
|
public void testCharFilter() {
|
||||||
|
assertQ(req("q", "content_charfilter:" + "Á*C*"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='0']");
|
||||||
|
assertQ(req("q", "content_charfilter:" + "ABÇ*g1"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='0']");
|
||||||
|
assertQ(req("q", "content_charfilter:" + "HÏ*l?*"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='1']");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRangeQuery() {
|
||||||
|
assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='2']");
|
||||||
|
|
||||||
|
assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='0']");
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNonTextTypes() {
|
||||||
|
String[] intTypes = {"int_f", "float_f", "long_f", "double_f", "byte_f", "short_f"};
|
||||||
|
|
||||||
|
for (String str : intTypes) {
|
||||||
|
assertQ(req("q", str + ":" + "0"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='0']");
|
||||||
|
|
||||||
|
assertQ(req("q", str + ":" + "[0 TO 2]"),
|
||||||
|
"//result[@numFound='3']",
|
||||||
|
"//*[@name='id'][.='0']",
|
||||||
|
"//*[@name='id'][.='1']",
|
||||||
|
"//*[@name='id'][.='2']");
|
||||||
|
}
|
||||||
|
assertQ(req("q", "bool_f:true"),
|
||||||
|
"//result[@numFound='2']",
|
||||||
|
"//*[@name='id'][.='0']",
|
||||||
|
"//*[@name='id'][.='2']");
|
||||||
|
|
||||||
|
assertQ(req("q", "bool_f:[false TO true]"),
|
||||||
|
"//result[@numFound='3']",
|
||||||
|
"//*[@name='id'][.='0']",
|
||||||
|
"//*[@name='id'][.='1']",
|
||||||
|
"//*[@name='id'][.='2']");
|
||||||
|
|
||||||
|
assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"),
|
||||||
|
"//result[@numFound='1']",
|
||||||
|
"//*[@name='id'][.='0']");
|
||||||
|
|
||||||
|
assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"),
|
||||||
|
"//result[@numFound='2']",
|
||||||
|
"//*[@name='id'][.='1']",
|
||||||
|
"//*[@name='id'][.='2']");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiBad() {
|
||||||
|
try {
|
||||||
|
assertQ(req("q", "content_multi_bad:" + "abCD*"));
|
||||||
|
fail("Should throw exception when token evaluates to more than one term");
|
||||||
|
} catch (Exception expected) {
|
||||||
|
assertTrue(expected.getCause() instanceof IllegalArgumentException);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -427,6 +427,42 @@
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- Illustrates the new "multiterm" analyzer definition the <fieldType> can take a new
|
||||||
|
parameter legacyMultiTerm="true" if the old behvaior is desired. The new default
|
||||||
|
behavior as of 3.6+ is to automatically define a multiterm analyzer
|
||||||
|
-->
|
||||||
|
<fieldType name="text_multiterm" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<!-- Illustrates the use of a new analyzer type "multiterm". See the Wiki page "Multiterm
|
||||||
|
Query Analysis" and SOLR-2438 for full details. The short form is that this analyzer is
|
||||||
|
applied to wildcard terms (prefix, wildcard range) if specified. This allows, among other
|
||||||
|
things, not having to lowercase wildcard terms on the client.
|
||||||
|
|
||||||
|
In the absence of this section, the new default behavior (3.6, 4.0) is to construct
|
||||||
|
one of these from the query analyzer that incorporates any defined charfilters, a
|
||||||
|
WhitespaceTokenizer, a LowerCaseFilter (if defined), and an ASCIIFoldingFilter
|
||||||
|
(if defined).
|
||||||
|
|
||||||
|
Arguably, this is an expert-level analyzer, most cases will be handled by an instance
|
||||||
|
of this being automatically constructed from the queryanalyzer.
|
||||||
|
|
||||||
|
-->
|
||||||
|
<analyzer type="multiterm">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ASCIIFoldingFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
<!-- since fields of this type are by default not stored or indexed,
|
<!-- since fields of this type are by default not stored or indexed,
|
||||||
any data added to them will be ignored outright. -->
|
any data added to them will be ignored outright. -->
|
||||||
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
|
||||||
|
|
Loading…
Reference in New Issue