mirror of https://github.com/apache/lucene.git
SOLR-1321: Added better support for efficient leading wildcards
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@813830 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e723f7fbad
commit
70fe600134
|
@ -309,6 +309,8 @@ New Features
|
||||||
to allow more efficient bulk queries (those that retrieve many or all
|
to allow more efficient bulk queries (those that retrieve many or all
|
||||||
documents). (Brian Whitman via yonik)
|
documents). (Brian Whitman via yonik)
|
||||||
|
|
||||||
|
78. SOLR-1321: Add better support for efficient wildcard handling (Andrzej Bialecki, Robert Muir, gsingers)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class produces a special form of reversed tokens, suitable for
|
||||||
|
* better handling of leading wildcards. Tokens from the input TokenStream
|
||||||
|
* are reversed and prepended with a special "reversed" marker character.
|
||||||
|
* If <code>withOriginal<code> argument is <code>true</code> then first the
|
||||||
|
* original token is returned, and then the reversed token (with
|
||||||
|
* <code>positionIncrement == 0</code>) is returned. Otherwise only reversed
|
||||||
|
* tokens are returned.
|
||||||
|
* <p>Note: this filter doubles the number of tokens in the input stream when
|
||||||
|
* <code>withOriginal == true</code>, which proportionally increases the size
|
||||||
|
* of postings and term dictionary in the index.
|
||||||
|
*/
|
||||||
|
public class ReversedWildcardFilter extends TokenFilter {
|
||||||
|
|
||||||
|
private boolean withOriginal;
|
||||||
|
private char markerChar;
|
||||||
|
private State save;
|
||||||
|
private TermAttribute termAtt;
|
||||||
|
private PositionIncrementAttribute posAtt;
|
||||||
|
|
||||||
|
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
|
||||||
|
super(input);
|
||||||
|
this.termAtt = (TermAttribute)addAttribute(TermAttribute.class);
|
||||||
|
this.posAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
|
||||||
|
this.withOriginal = withOriginal;
|
||||||
|
this.markerChar = markerChar;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
if( save != null ) {
|
||||||
|
// clearAttributes(); // not currently necessary
|
||||||
|
restoreState(save);
|
||||||
|
save = null;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!input.incrementToken()) return false;
|
||||||
|
|
||||||
|
// pass through zero-length terms
|
||||||
|
int oldLen = termAtt.termLength();
|
||||||
|
if (oldLen ==0) return true;
|
||||||
|
int origOffset = posAtt.getPositionIncrement();
|
||||||
|
if (withOriginal == true){
|
||||||
|
posAtt.setPositionIncrement(0);
|
||||||
|
save = captureState();
|
||||||
|
}
|
||||||
|
char [] buffer = termAtt.resizeTermBuffer(oldLen + 1);
|
||||||
|
buffer[oldLen] = markerChar;
|
||||||
|
//String reversed = reverseAndMark(value, markerChar);
|
||||||
|
ReverseStringFilter.reverse(buffer, oldLen + 1);
|
||||||
|
|
||||||
|
posAtt.setPositionIncrement(origOffset);
|
||||||
|
termAtt.setTermBuffer(buffer, 0, oldLen +1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,131 @@
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory for {@link ReversedWildcardFilter}-s. When this factory is
|
||||||
|
* added to an analysis chain, it will be used both for filtering the
|
||||||
|
* tokens during indexing, and to determine the query processing of
|
||||||
|
* this field during search.
|
||||||
|
* <p>This class supports the following init arguments:
|
||||||
|
* <ul>
|
||||||
|
* <li><code>withOriginal</code> - if true, then produce both original and reversed tokens at
|
||||||
|
* the same positions. If false, then produce only reversed tokens.</li>
|
||||||
|
* <li><code>maxPosAsterisk</code> - maximum position (1-based) of the asterisk wildcard
|
||||||
|
* ('*') that triggers the reversal of query term. Asterisk that occurs at
|
||||||
|
* positions higher than this value will not cause the reversal of query term.
|
||||||
|
* Defaults to 2, meaning that asterisks on positions 1 and 2 will cause
|
||||||
|
* a reversal.</li>
|
||||||
|
* <li><code>maxPosQuestion</code> - maximum position (1-based) of the question
|
||||||
|
* mark wildcard ('?') that triggers the reversal of query term. Defaults to 1.
|
||||||
|
* Set this to 0, and <code>maxPosAsterisk</code> to 1 to reverse only
|
||||||
|
* pure suffix queries (i.e. ones with a single leading asterisk).</li>
|
||||||
|
* <li><code>maxFractionAsterisk</code> - additional parameter that
|
||||||
|
* triggers the reversal if asterisk ('*') position is less than this
|
||||||
|
* fraction of the query token length. Defaults to 0.0f (disabled).</li>
|
||||||
|
* <li><code>minTrailing</code> - minimum number of trailing characters in query
|
||||||
|
* token after the last wildcard character. For good performance this should be
|
||||||
|
* set to a value larger than 1. Defaults to 2.
|
||||||
|
* </ul>
|
||||||
|
* Note 1: This filter always reverses input tokens during indexing.
|
||||||
|
* Note 2: Query tokens without wildcard characters will never be reversed.
|
||||||
|
*/
|
||||||
|
public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
|
||||||
|
private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER;
|
||||||
|
private boolean withOriginal;
|
||||||
|
private int maxPosAsterisk;
|
||||||
|
private int maxPosQuestion;
|
||||||
|
private int minTrailing;
|
||||||
|
private float maxFractionAsterisk;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(Map<String, String> args) {
|
||||||
|
super.init(args);
|
||||||
|
withOriginal = getBoolean("withOriginal", true);
|
||||||
|
maxPosAsterisk = getInt("maxPosAsterisk", 2);
|
||||||
|
maxPosQuestion = getInt("maxPosQuestion", 1);
|
||||||
|
minTrailing = getInt("minTrailing", 2);
|
||||||
|
maxFractionAsterisk = getFloat("maxFractionAsterisk", 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TokenStream create(TokenStream input) {
|
||||||
|
return new ReversedWildcardFilter(input, withOriginal, markerChar);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method encapsulates the logic that determines whether
|
||||||
|
* a query token should be reversed in order to use the
|
||||||
|
* reversed terms in the index.
|
||||||
|
* @param token input token.
|
||||||
|
* @return true if input token should be reversed, false otherwise.
|
||||||
|
*/
|
||||||
|
public boolean shouldReverse(String token) {
|
||||||
|
int posQ = token.indexOf('?');
|
||||||
|
int posA = token.indexOf('*');
|
||||||
|
if (posQ == -1 && posA == -1) { // not a wildcard query
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
int pos;
|
||||||
|
int lastPos;
|
||||||
|
int len = token.length();
|
||||||
|
lastPos = token.lastIndexOf('?');
|
||||||
|
pos = token.lastIndexOf('*');
|
||||||
|
if (pos > lastPos) lastPos = pos;
|
||||||
|
if (posQ != -1) {
|
||||||
|
pos = posQ;
|
||||||
|
if (posA != -1) {
|
||||||
|
pos = Math.min(posQ, posA);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pos = posA;
|
||||||
|
}
|
||||||
|
if (len - lastPos < minTrailing) { // too few trailing chars
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (posQ != -1 && posQ < maxPosQuestion) { // leading '?'
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (posA != -1 && posA < maxPosAsterisk) { // leading '*'
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// '*' in the leading part
|
||||||
|
if (maxFractionAsterisk > 0.0f && pos < (float)token.length() * maxFractionAsterisk) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public char getMarkerChar() {
|
||||||
|
return markerChar;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected float getFloat(String name, float defValue) {
|
||||||
|
String val = args.get(name);
|
||||||
|
if (val == null) {
|
||||||
|
return defValue;
|
||||||
|
} else {
|
||||||
|
return Float.parseFloat(val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,14 +17,24 @@
|
||||||
|
|
||||||
package org.apache.solr.search;
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
import org.apache.lucene.search.*;
|
import org.apache.lucene.search.*;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
|
import org.apache.solr.analysis.ReversedWildcardFilter;
|
||||||
|
import org.apache.solr.analysis.ReversedWildcardFilterFactory;
|
||||||
|
import org.apache.solr.analysis.TokenFilterFactory;
|
||||||
|
import org.apache.solr.analysis.TokenizerChain;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.schema.TrieField;
|
import org.apache.solr.schema.TrieField;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
|
||||||
|
@ -55,6 +65,8 @@ public class SolrQueryParser extends QueryParser {
|
||||||
protected final IndexSchema schema;
|
protected final IndexSchema schema;
|
||||||
protected final QParser parser;
|
protected final QParser parser;
|
||||||
protected final String defaultField;
|
protected final String defaultField;
|
||||||
|
protected final Map<String, ReversedWildcardFilterFactory> leadingWildcards =
|
||||||
|
new HashMap<String, ReversedWildcardFilterFactory>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a SolrQueryParser using the schema to understand the
|
* Constructs a SolrQueryParser using the schema to understand the
|
||||||
|
@ -73,6 +85,7 @@ public class SolrQueryParser extends QueryParser {
|
||||||
this.defaultField = defaultField;
|
this.defaultField = defaultField;
|
||||||
setLowercaseExpandedTerms(false);
|
setLowercaseExpandedTerms(false);
|
||||||
setEnablePositionIncrements(true);
|
setEnablePositionIncrements(true);
|
||||||
|
checkAllowLeadingWildcards();
|
||||||
}
|
}
|
||||||
|
|
||||||
public SolrQueryParser(QParser parser, String defaultField) {
|
public SolrQueryParser(QParser parser, String defaultField) {
|
||||||
|
@ -86,6 +99,29 @@ public class SolrQueryParser extends QueryParser {
|
||||||
this.defaultField = defaultField;
|
this.defaultField = defaultField;
|
||||||
setLowercaseExpandedTerms(false);
|
setLowercaseExpandedTerms(false);
|
||||||
setEnablePositionIncrements(true);
|
setEnablePositionIncrements(true);
|
||||||
|
checkAllowLeadingWildcards();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void checkAllowLeadingWildcards() {
|
||||||
|
boolean allow = false;
|
||||||
|
for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) {
|
||||||
|
Analyzer a = e.getValue().getAnalyzer();
|
||||||
|
if (a instanceof TokenizerChain) {
|
||||||
|
// examine the indexing analysis chain if it supports leading wildcards
|
||||||
|
TokenizerChain tc = (TokenizerChain)a;
|
||||||
|
TokenFilterFactory[] factories = tc.getTokenFilterFactories();
|
||||||
|
for (TokenFilterFactory factory : factories) {
|
||||||
|
if (factory instanceof ReversedWildcardFilterFactory) {
|
||||||
|
allow = true;
|
||||||
|
leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// XXX should be enabled on a per-field basis
|
||||||
|
if (allow) {
|
||||||
|
setAllowLeadingWildcard(true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkNullField(String field) throws SolrException {
|
private void checkNullField(String field) throws SolrException {
|
||||||
|
@ -149,6 +185,17 @@ public class SolrQueryParser extends QueryParser {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||||
|
// *:* -> MatchAllDocsQuery
|
||||||
|
if ("*".equals(field) && "*".equals(termStr)) {
|
||||||
|
return newMatchAllDocsQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
// can we use reversed wildcards in this field?
|
||||||
|
String type = schema.getFieldType(field).getTypeName();
|
||||||
|
ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
|
||||||
|
if (factory != null && factory.shouldReverse(termStr)) {
|
||||||
|
termStr = ReverseStringFilter.reverse(termStr + factory.getMarkerChar());
|
||||||
|
}
|
||||||
Query q = super.getWildcardQuery(field, termStr);
|
Query q = super.getWildcardQuery(field, termStr);
|
||||||
if (q instanceof WildcardQuery) {
|
if (q instanceof WildcardQuery) {
|
||||||
// use a constant score query to avoid overflowing clauses
|
// use a constant score query to avoid overflowing clauses
|
||||||
|
|
|
@ -0,0 +1,143 @@
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||||
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.solr.schema.IndexSchema;
|
||||||
|
import org.apache.solr.search.SolrQueryParser;
|
||||||
|
|
||||||
|
public class TestReversedWildcardFilterFactory extends BaseTokenTestCase {
|
||||||
|
Map<String,String> args = new HashMap<String, String>();
|
||||||
|
ReversedWildcardFilterFactory factory = new ReversedWildcardFilterFactory();
|
||||||
|
IndexSchema schema;
|
||||||
|
|
||||||
|
public String getSchemaFile() {
|
||||||
|
return "schema-reversed.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSolrConfigFile() {
|
||||||
|
return "solrconfig.xml";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
schema = new IndexSchema(solrConfig, getSchemaFile(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testReversedTokens() throws IOException {
|
||||||
|
String text = "simple text";
|
||||||
|
String expected1 = "simple \u0001elpmis text \u0001txet";
|
||||||
|
String expected2 = "\u0001elpmis \u0001txet";
|
||||||
|
args.put("withOriginal", "true");
|
||||||
|
factory.init(args);
|
||||||
|
TokenStream input = factory.create(new WhitespaceTokenizer(new StringReader(text)));
|
||||||
|
List<Token> realTokens = getTokens(input);
|
||||||
|
List<Token> expectedTokens = tokens(expected1);
|
||||||
|
// set positionIncrements in expected tokens
|
||||||
|
for (int i = 1; i < expectedTokens.size(); i += 2) {
|
||||||
|
expectedTokens.get(i).setPositionIncrement(0);
|
||||||
|
}
|
||||||
|
assertTokEqual(realTokens, expectedTokens);
|
||||||
|
|
||||||
|
// now without original tokens
|
||||||
|
args.put("withOriginal", "false");
|
||||||
|
factory.init(args);
|
||||||
|
input = factory.create(new WhitespaceTokenizer(new StringReader(text)));
|
||||||
|
realTokens = getTokens(input);
|
||||||
|
expectedTokens = tokens(expected2);
|
||||||
|
assertTokEqual(realTokens, expectedTokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIndexingAnalysis() throws Exception {
|
||||||
|
Analyzer a = schema.getAnalyzer();
|
||||||
|
String text = "one two three";
|
||||||
|
String expected1 = "one \u0001eno two \u0001owt three \u0001eerht";
|
||||||
|
List<Token> expectedTokens1 = getTokens(
|
||||||
|
new WhitespaceTokenizer(new StringReader(expected1)));
|
||||||
|
// set positionIncrements and offsets in expected tokens
|
||||||
|
for (int i = 1; i < expectedTokens1.size(); i += 2) {
|
||||||
|
Token t = expectedTokens1.get(i);
|
||||||
|
t.setPositionIncrement(0);
|
||||||
|
}
|
||||||
|
String expected2 = "\u0001eno \u0001owt \u0001eerht";
|
||||||
|
List<Token> expectedTokens2 = getTokens(
|
||||||
|
new WhitespaceTokenizer(new StringReader(expected2)));
|
||||||
|
String expected3 = "one two three";
|
||||||
|
List<Token> expectedTokens3 = getTokens(
|
||||||
|
new WhitespaceTokenizer(new StringReader(expected3)));
|
||||||
|
// field one
|
||||||
|
TokenStream input = a.tokenStream("one", new StringReader(text));
|
||||||
|
List<Token> realTokens = getTokens(input);
|
||||||
|
assertTokEqual(realTokens, expectedTokens1);
|
||||||
|
// field two
|
||||||
|
input = a.tokenStream("two", new StringReader(text));
|
||||||
|
realTokens = getTokens(input);
|
||||||
|
assertTokEqual(realTokens, expectedTokens2);
|
||||||
|
// field three
|
||||||
|
input = a.tokenStream("three", new StringReader(text));
|
||||||
|
realTokens = getTokens(input);
|
||||||
|
assertTokEqual(realTokens, expectedTokens3);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testQueryParsing() throws IOException, ParseException {
|
||||||
|
|
||||||
|
SolrQueryParser parserOne = new SolrQueryParser(schema, "one");
|
||||||
|
assertTrue(parserOne.getAllowLeadingWildcard());
|
||||||
|
SolrQueryParser parserTwo = new SolrQueryParser(schema, "two");
|
||||||
|
assertTrue(parserTwo.getAllowLeadingWildcard());
|
||||||
|
SolrQueryParser parserThree = new SolrQueryParser(schema, "three");
|
||||||
|
// XXX note: this should be false, but for now we return true for any field,
|
||||||
|
// XXX if at least one field uses the reversing
|
||||||
|
assertTrue(parserThree.getAllowLeadingWildcard());
|
||||||
|
String text = "one +two *hree f*ur fiv*";
|
||||||
|
String expectedOne = "one:one +one:two one:\u0001eerh* one:\u0001ru*f one:fiv*";
|
||||||
|
String expectedTwo = "two:one +two:two two:\u0001eerh* two:\u0001ru*f two:fiv*";
|
||||||
|
String expectedThree = "three:one +three:two three:*hree three:f*ur three:fiv*";
|
||||||
|
Query q = parserOne.parse(text);
|
||||||
|
assertEquals(expectedOne, q.toString());
|
||||||
|
q = parserTwo.parse(text);
|
||||||
|
assertEquals(expectedTwo, q.toString());
|
||||||
|
q = parserThree.parse(text);
|
||||||
|
assertEquals(expectedThree, q.toString());
|
||||||
|
// test conditional reversal
|
||||||
|
String condText = "*hree t*ree th*ee thr*e ?hree t?ree th?ee th?*ee " +
|
||||||
|
"short*token ver*longtoken";
|
||||||
|
String expected = "two:\u0001eerh* two:\u0001eer*t two:\u0001ee*ht " +
|
||||||
|
"two:thr*e " +
|
||||||
|
"two:\u0001eerh? two:\u0001eer?t " +
|
||||||
|
"two:th?ee " +
|
||||||
|
"two:th?*ee " +
|
||||||
|
"two:short*token " +
|
||||||
|
"two:\u0001nekotgnol*rev";
|
||||||
|
q = parserTwo.parse(condText);
|
||||||
|
assertEquals(expected, q.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
<?xml version="1.0" ?>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
|
||||||
|
For testing reversed wildcards.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<schema name="test" version="1.0">
|
||||||
|
<types>
|
||||||
|
|
||||||
|
|
||||||
|
<fieldtype name="integer" class="solr.IntField" />
|
||||||
|
|
||||||
|
<fieldtype name="text" class="solr.TextField">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
|
||||||
|
<fieldtype name="srev" class="solr.TextField">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
|
||||||
|
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
|
||||||
|
</analyzer>
|
||||||
|
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
<fieldtype name="rev" class="solr.TextField">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
|
||||||
|
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"
|
||||||
|
minTrailing="1"/>
|
||||||
|
</analyzer>
|
||||||
|
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
|
||||||
|
</analyzer>
|
||||||
|
</fieldtype>
|
||||||
|
</types>
|
||||||
|
|
||||||
|
|
||||||
|
<fields>
|
||||||
|
<field name="id" type="integer" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||||
|
<field name="one" type="srev" indexed="true" stored="false"/>
|
||||||
|
<field name="two" type="rev" indexed="true" stored="false"/>
|
||||||
|
<field name="three" type="text" indexed="true" stored="false"/>
|
||||||
|
|
||||||
|
</fields>
|
||||||
|
|
||||||
|
<defaultSearchField>one</defaultSearchField>
|
||||||
|
<uniqueKey>id</uniqueKey>
|
||||||
|
|
||||||
|
|
||||||
|
</schema>
|
Loading…
Reference in New Issue