mirror of https://github.com/apache/lucene.git
LUCENE-588: Escaped wildcard character in wildcard term not handled correctly
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1031765 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2bbfc85aad
commit
a25dc8b9d5
|
@ -113,6 +113,12 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-2674: MultiTermQuery.TermCollector.collect now accepts the
|
||||
TermsEnum as well. (Robert Muir, Mike McCandless)
|
||||
|
||||
* LUCENE-588: WildcardQuery and QueryParser now allows escaping with
|
||||
the '\' character. Previously this was impossible (you could not escape */?,
|
||||
for example). If your code somehow depends on the old behavior, you will
|
||||
need to change it (e.g. using "\\" to escape '\' itself).
|
||||
(Sunil Kamath, Terry Yang via Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
|
||||
|
|
|
@ -49,9 +49,9 @@ public class WildcardQueryNode extends FieldQueryNode {
|
|||
@Override
|
||||
public CharSequence toQueryString(EscapeQuerySyntax escaper) {
|
||||
if (isDefaultField(this.field)) {
|
||||
return getTermEscaped(escaper);
|
||||
return this.text;
|
||||
} else {
|
||||
return this.field + ":" + getTermEscaped(escaper);
|
||||
return this.field + ":" + this.text;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* This test case is a copy of the core Lucene query parser test, it was adapted
|
||||
|
@ -945,6 +946,15 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&");
|
||||
}
|
||||
|
||||
@Ignore("contrib queryparser shouldn't escape wildcard terms")
|
||||
public void testEscapedWildcard() throws Exception {
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
|
||||
WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
|
||||
assertEquals(q, qp.parse("foo\\?ba?r", "field"));
|
||||
}
|
||||
|
||||
public void testTabNewlineCarriageReturn() throws Exception {
|
||||
assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
|
||||
|
||||
|
|
|
@ -33,7 +33,11 @@ import org.apache.lucene.util.StringHelper;
|
|||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.SortedMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
class SimpleTextFieldsReader extends FieldsProducer {
|
||||
|
||||
|
@ -78,7 +82,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private class SimpleTextFieldsEnum extends FieldsEnum {
|
||||
private final IndexInput in;
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
private boolean omitTF;
|
||||
private String current;
|
||||
|
||||
public SimpleTextFieldsEnum() {
|
||||
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||
|
@ -89,11 +93,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
while(true) {
|
||||
readLine(in, scratch);
|
||||
if (scratch.equals(END)) {
|
||||
current = null;
|
||||
return null;
|
||||
}
|
||||
if (scratch.startsWith(FIELD)) {
|
||||
String field = StringHelper.intern(new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8"));
|
||||
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
|
||||
current = field;
|
||||
return field;
|
||||
}
|
||||
}
|
||||
|
@ -101,7 +106,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public TermsEnum terms() throws IOException {
|
||||
return new SimpleTextTermsEnum(in.getFilePointer(), omitTF);
|
||||
return SimpleTextFieldsReader.this.terms(current).iterator();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -109,21 +114,42 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private final IndexInput in;
|
||||
private final boolean omitTF;
|
||||
private BytesRef current;
|
||||
private final long fieldStart;
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
private final BytesRef scratch2 = new BytesRef(10);
|
||||
private int docFreq;
|
||||
private long docsStart;
|
||||
private boolean ended;
|
||||
private final TreeMap<BytesRef,TermData> allTerms;
|
||||
private Iterator<Map.Entry<BytesRef,TermData>> iter;
|
||||
|
||||
public SimpleTextTermsEnum(long offset, boolean omitTF) throws IOException {
|
||||
public SimpleTextTermsEnum(TreeMap<BytesRef,TermData> allTerms, boolean omitTF) throws IOException {
|
||||
this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||
this.in.seek(offset);
|
||||
this.allTerms = allTerms;
|
||||
this.omitTF = omitTF;
|
||||
fieldStart = offset;
|
||||
iter = allTerms.entrySet().iterator();
|
||||
}
|
||||
|
||||
public SeekStatus seek(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||
|
||||
final SortedMap<BytesRef,TermData> tailMap = allTerms.tailMap(text);
|
||||
|
||||
if (tailMap.isEmpty()) {
|
||||
current = null;
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
current = tailMap.firstKey();
|
||||
final TermData td = tailMap.get(current);
|
||||
docsStart = td.docsStart;
|
||||
docFreq = td.docFreq;
|
||||
iter = tailMap.entrySet().iterator();
|
||||
assert iter.hasNext();
|
||||
iter.next();
|
||||
if (current.equals(text)) {
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
if (current != null) {
|
||||
final int cmp = current.compareTo(text);
|
||||
if (cmp == 0) {
|
||||
|
@ -153,6 +179,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
current = null;
|
||||
ended = true;
|
||||
return SeekStatus.END;
|
||||
*/
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -162,6 +189,20 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
assert !ended;
|
||||
|
||||
if (iter.hasNext()) {
|
||||
Map.Entry<BytesRef,TermData> ent = iter.next();
|
||||
current = ent.getKey();
|
||||
TermData td = ent.getValue();
|
||||
docFreq = td.docFreq;
|
||||
docsStart = td.docsStart;
|
||||
return current;
|
||||
} else {
|
||||
current = null;
|
||||
return null;
|
||||
}
|
||||
|
||||
/*
|
||||
readLine(in, scratch);
|
||||
if (scratch.equals(END) || scratch.startsWith(FIELD)) {
|
||||
ended = true;
|
||||
|
@ -192,6 +233,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
in.seek(lineStart);
|
||||
return current;
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -447,20 +489,70 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
static class TermData {
|
||||
public long docsStart;
|
||||
public int docFreq;
|
||||
|
||||
public TermData(long docsStart, int docFreq) {
|
||||
this.docsStart = docsStart;
|
||||
this.docFreq = docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
private class SimpleTextTerms extends Terms {
|
||||
private final String field;
|
||||
private final long termsStart;
|
||||
private final boolean omitTF;
|
||||
|
||||
public SimpleTextTerms(String field, long termsStart) {
|
||||
// NOTE: horribly, horribly RAM consuming, but then
|
||||
// SimpleText should never be used in production
|
||||
private final TreeMap<BytesRef,TermData> allTerms = new TreeMap<BytesRef,TermData>();
|
||||
|
||||
private final BytesRef scratch = new BytesRef(10);
|
||||
|
||||
public SimpleTextTerms(String field, long termsStart) throws IOException {
|
||||
this.field = StringHelper.intern(field);
|
||||
this.termsStart = termsStart;
|
||||
omitTF = fieldInfos.fieldInfo(field).omitTermFreqAndPositions;
|
||||
loadTerms();
|
||||
}
|
||||
|
||||
private void loadTerms() throws IOException {
|
||||
IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
|
||||
in.seek(termsStart);
|
||||
final BytesRef lastTerm = new BytesRef(10);
|
||||
long lastDocsStart = -1;
|
||||
int docFreq = 0;
|
||||
while(true) {
|
||||
readLine(in, scratch);
|
||||
if (scratch.equals(END) || scratch.startsWith(FIELD)) {
|
||||
if (lastDocsStart != -1) {
|
||||
allTerms.put(new BytesRef(lastTerm),
|
||||
new TermData(lastDocsStart, docFreq));
|
||||
}
|
||||
break;
|
||||
} else if (scratch.startsWith(DOC)) {
|
||||
docFreq++;
|
||||
} else if (scratch.startsWith(TERM)) {
|
||||
if (lastDocsStart != -1) {
|
||||
allTerms.put(new BytesRef(lastTerm),
|
||||
new TermData(lastDocsStart, docFreq));
|
||||
}
|
||||
lastDocsStart = in.getFilePointer();
|
||||
final int len = scratch.length - TERM.length;
|
||||
if (len > lastTerm.length) {
|
||||
lastTerm.grow(len);
|
||||
}
|
||||
System.arraycopy(scratch.bytes, TERM.length, lastTerm.bytes, 0, len);
|
||||
lastTerm.length = len;
|
||||
docFreq = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator() throws IOException {
|
||||
return new SimpleTextTermsEnum(termsStart, omitTF);
|
||||
return new SimpleTextTermsEnum(allTerms, omitTF);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -1045,7 +1045,7 @@ public abstract class QueryParserBase {
|
|||
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
if (wildcard) {
|
||||
q = getWildcardQuery(qfield, termImage);
|
||||
q = getWildcardQuery(qfield, term.image);
|
||||
} else if (prefix) {
|
||||
q = getPrefixQuery(qfield,
|
||||
discardEscapeChar(term.image.substring
|
||||
|
|
|
@ -45,6 +45,9 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
/** Char equality with support for wildcards */
|
||||
public static final char WILDCARD_CHAR = '?';
|
||||
|
||||
/** Escape character */
|
||||
public static final char WILDCARD_ESCAPE = '\\';
|
||||
|
||||
/**
|
||||
* Constructs a query for terms matching <code>term</code>.
|
||||
*/
|
||||
|
@ -56,6 +59,7 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
* Convert Lucene wildcard syntax into an automaton.
|
||||
* @lucene.internal
|
||||
*/
|
||||
@SuppressWarnings("fallthrough")
|
||||
public static Automaton toAutomaton(Term wildcardquery) {
|
||||
List<Automaton> automata = new ArrayList<Automaton>();
|
||||
|
||||
|
@ -63,6 +67,7 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
|
||||
for (int i = 0; i < wildcardText.length();) {
|
||||
final int c = wildcardText.codePointAt(i);
|
||||
int length = Character.charCount(c);
|
||||
switch(c) {
|
||||
case WILDCARD_STRING:
|
||||
automata.add(BasicAutomata.makeAnyString());
|
||||
|
@ -70,10 +75,18 @@ public class WildcardQuery extends AutomatonQuery {
|
|||
case WILDCARD_CHAR:
|
||||
automata.add(BasicAutomata.makeAnyChar());
|
||||
break;
|
||||
case WILDCARD_ESCAPE:
|
||||
// add the next codepoint instead, if it exists
|
||||
if (i + length < wildcardText.length()) {
|
||||
final int nextChar = wildcardText.codePointAt(i + length);
|
||||
length += Character.charCount(nextChar);
|
||||
automata.add(BasicAutomata.makeChar(nextChar));
|
||||
break;
|
||||
} // else fallthru, lenient parsing with a trailing \
|
||||
default:
|
||||
automata.add(BasicAutomata.makeChar(c));
|
||||
}
|
||||
i += Character.charCount(c);
|
||||
i += length;
|
||||
}
|
||||
|
||||
return BasicOperations.concatenate(automata);
|
||||
|
|
|
@ -178,7 +178,7 @@ final public class SpecialOperations {
|
|||
* Reverses the language of the given (non-singleton) automaton while returning
|
||||
* the set of new initial states.
|
||||
*/
|
||||
static Set<State> reverse(Automaton a) {
|
||||
public static Set<State> reverse(Automaton a) {
|
||||
a.expandSingleton();
|
||||
// reverse all edges
|
||||
HashMap<State, HashSet<Transition>> m = new HashMap<State, HashSet<Transition>>();
|
||||
|
|
|
@ -770,11 +770,11 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
|
||||
assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
|
||||
|
||||
assertQueryEquals("a:b\\-?c", a, "a:b-?c");
|
||||
assertQueryEquals("a:b\\+?c", a, "a:b+?c");
|
||||
assertQueryEquals("a:b\\:?c", a, "a:b:?c");
|
||||
assertQueryEquals("a:b\\-?c", a, "a:b\\-?c");
|
||||
assertQueryEquals("a:b\\+?c", a, "a:b\\+?c");
|
||||
assertQueryEquals("a:b\\:?c", a, "a:b\\:?c");
|
||||
|
||||
assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
|
||||
assertQueryEquals("a:b\\\\?c", a, "a:b\\\\?c");
|
||||
|
||||
assertQueryEquals("a:b\\-c~", a, "a:b-c~2.0");
|
||||
assertQueryEquals("a:b\\+c~", a, "a:b+c~2.0");
|
||||
|
@ -1062,6 +1062,12 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
|
||||
}
|
||||
|
||||
public void testEscapedWildcard() throws Exception {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
WildcardQuery q = new WildcardQuery(new Term("field", "foo\\?ba?r"));
|
||||
assertEquals(q, qp.parse("foo\\?ba?r"));
|
||||
}
|
||||
|
||||
public void testRegexps() throws Exception {
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
|
||||
|
|
|
@ -205,6 +205,38 @@ public class TestWildcard
|
|||
indexStore.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if wildcard escaping works
|
||||
*/
|
||||
public void testEscapes() throws Exception {
|
||||
Directory indexStore = getIndexStore("field",
|
||||
new String[]{"foo*bar", "foo??bar", "fooCDbar", "fooSOMETHINGbar", "foo\\"});
|
||||
IndexSearcher searcher = new IndexSearcher(indexStore, true);
|
||||
|
||||
// without escape: matches foo??bar, fooCDbar, foo*bar, and fooSOMETHINGbar
|
||||
WildcardQuery unescaped = new WildcardQuery(new Term("field", "foo*bar"));
|
||||
assertMatches(searcher, unescaped, 4);
|
||||
|
||||
// with escape: only matches foo*bar
|
||||
WildcardQuery escaped = new WildcardQuery(new Term("field", "foo\\*bar"));
|
||||
assertMatches(searcher, escaped, 1);
|
||||
|
||||
// without escape: matches foo??bar and fooCDbar
|
||||
unescaped = new WildcardQuery(new Term("field", "foo??bar"));
|
||||
assertMatches(searcher, unescaped, 2);
|
||||
|
||||
// with escape: matches foo??bar only
|
||||
escaped = new WildcardQuery(new Term("field", "foo\\?\\?bar"));
|
||||
assertMatches(searcher, escaped, 1);
|
||||
|
||||
// check escaping at end: lenient parse yields "foo\"
|
||||
WildcardQuery atEnd = new WildcardQuery(new Term("field", "foo\\"));
|
||||
assertMatches(searcher, atEnd, 1);
|
||||
|
||||
searcher.close();
|
||||
indexStore.close();
|
||||
}
|
||||
|
||||
private Directory getIndexStore(String field, String[] contents)
|
||||
throws IOException {
|
||||
Directory indexStore = newDirectory();
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.util.Version;
|
|||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.solr.analysis.*;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -202,37 +203,36 @@ public class SolrQueryParser extends QueryParser {
|
|||
String type = schema.getFieldType(field).getTypeName();
|
||||
ReversedWildcardFilterFactory factory = leadingWildcards.get(type);
|
||||
if (factory != null) {
|
||||
Term term = new Term(field, termStr);
|
||||
// fsa representing the query
|
||||
Automaton automaton = WildcardQuery.toAutomaton(term);
|
||||
// TODO: we should likely use the automaton to calculate shouldReverse, too.
|
||||
if (factory.shouldReverse(termStr)) {
|
||||
int len = termStr.length();
|
||||
char[] chars = new char[len+1];
|
||||
chars[0] = factory.getMarkerChar();
|
||||
termStr.getChars(0, len, chars, 1);
|
||||
ReversedWildcardFilter.reverse(chars, 1, len);
|
||||
termStr = new String(chars);
|
||||
automaton = BasicOperations.concatenate(automaton, BasicAutomata.makeChar(factory.getMarkerChar()));
|
||||
SpecialOperations.reverse(automaton);
|
||||
} else {
|
||||
// reverse wildcardfilter is active: remove false positives
|
||||
Term term = new Term(field, termStr);
|
||||
// fsa representing the query
|
||||
Automaton a = WildcardQuery.toAutomaton(term);
|
||||
// fsa representing false positives (markerChar*)
|
||||
Automaton falsePositives = BasicOperations.concatenate(
|
||||
BasicAutomata.makeChar(factory.getMarkerChar()),
|
||||
BasicAutomata.makeAnyString());
|
||||
return new AutomatonQuery(term, BasicOperations.minus(a, falsePositives)) {
|
||||
// override toString so its completely transparent
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(term.text());
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
};
|
||||
// subtract these away
|
||||
automaton = BasicOperations.minus(automaton, falsePositives);
|
||||
}
|
||||
return new AutomatonQuery(term, automaton) {
|
||||
// override toString so its completely transparent
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(term.text());
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
Query q = super.getWildcardQuery(field, termStr);
|
||||
if (q instanceof WildcardQuery) {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -26,8 +27,10 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.SpecialOperations;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.search.SolrQueryParser;
|
||||
|
@ -51,6 +54,8 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
schema = new IndexSchema(solrConfig, getSchemaFile(), null);
|
||||
clearIndex();
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -105,7 +110,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testQueryParsing() throws IOException, ParseException {
|
||||
public void testQueryParsing() throws Exception {
|
||||
|
||||
SolrQueryParser parserOne = new SolrQueryParser(schema, "one");
|
||||
assertTrue(parserOne.getAllowLeadingWildcard());
|
||||
|
@ -115,28 +120,53 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
// XXX note: this should be false, but for now we return true for any field,
|
||||
// XXX if at least one field uses the reversing
|
||||
assertTrue(parserThree.getAllowLeadingWildcard());
|
||||
String text = "one +two *hree f*ur fiv* *si\uD834\uDD1Ex";
|
||||
String expectedOne = "one:one +one:two one:\u0001eerh* one:\u0001ru*f one:fiv* one:\u0001x\uD834\uDD1Eis*";
|
||||
String expectedTwo = "two:one +two:two two:\u0001eerh* two:\u0001ru*f two:fiv* two:\u0001x\uD834\uDD1Eis*";
|
||||
String expectedThree = "three:one +three:two three:*hree three:f*ur three:fiv* three:*si\uD834\uDD1Ex";
|
||||
Query q = parserOne.parse(text);
|
||||
assertEquals(expectedOne, q.toString());
|
||||
q = parserTwo.parse(text);
|
||||
assertEquals(expectedTwo, q.toString());
|
||||
q = parserThree.parse(text);
|
||||
assertEquals(expectedThree, q.toString());
|
||||
|
||||
// add some docs
|
||||
assertU(adoc("id", "1", "one", "one"));
|
||||
assertU(adoc("id", "2", "two", "two"));
|
||||
assertU(adoc("id", "3", "three", "three"));
|
||||
assertU(adoc("id", "4", "one", "four"));
|
||||
assertU(adoc("id", "5", "two", "five"));
|
||||
assertU(adoc("id", "6", "three", "si\uD834\uDD1Ex"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ("should have matched",
|
||||
req("+id:1 +one:one"),
|
||||
"//result[@numFound=1]");
|
||||
|
||||
assertQ("should have matched",
|
||||
req("+id:4 +one:f*ur"),
|
||||
"//result[@numFound=1]");
|
||||
|
||||
assertQ("should have matched",
|
||||
req("+id:6 +three:*si\uD834\uDD1Ex"),
|
||||
"//result[@numFound=1]");
|
||||
|
||||
// test conditional reversal
|
||||
String condText = "*hree t*ree th*ee thr*e ?hree t?ree th?ee th?*ee " +
|
||||
"short*token ver*longtoken";
|
||||
String expected = "two:\u0001eerh* two:\u0001eer*t two:\u0001ee*ht " +
|
||||
"two:thr*e " +
|
||||
"two:\u0001eerh? two:\u0001eer?t " +
|
||||
"two:th?ee " +
|
||||
"two:th?*ee " +
|
||||
"two:short*token " +
|
||||
"two:\u0001nekotgnol*rev";
|
||||
q = parserTwo.parse(condText);
|
||||
assertEquals(expected, q.toString());
|
||||
assertTrue(wasReversed(parserTwo, "*hree"));
|
||||
assertTrue(wasReversed(parserTwo, "t*ree"));
|
||||
assertTrue(wasReversed(parserTwo, "th*ee"));
|
||||
assertFalse(wasReversed(parserTwo, "thr*e"));
|
||||
assertTrue(wasReversed(parserTwo, "?hree"));
|
||||
assertTrue(wasReversed(parserTwo, "t?ree"));
|
||||
assertFalse(wasReversed(parserTwo, "th?ee"));
|
||||
assertFalse(wasReversed(parserTwo, "th?*ee"));
|
||||
assertFalse(wasReversed(parserTwo, "short*token"));
|
||||
assertTrue(wasReversed(parserTwo, "ver*longtoken"));
|
||||
}
|
||||
|
||||
/** fragile assert: depends on our implementation, but cleanest way to check for now */
|
||||
private boolean wasReversed(SolrQueryParser qp, String query) throws Exception {
|
||||
Query q = qp.parse(query);
|
||||
if (!(q instanceof AutomatonQuery))
|
||||
return false;
|
||||
// this is a hack to get the protected Automaton field in AutomatonQuery,
|
||||
// may break in later lucene versions - we have no getter... for good reasons.
|
||||
final Field automatonField = AutomatonQuery.class.getDeclaredField("automaton");
|
||||
automatonField.setAccessible(true);
|
||||
Automaton automaton = (Automaton) automatonField.get(q);
|
||||
String prefix = SpecialOperations.getCommonPrefix(automaton);
|
||||
return prefix.length() > 0 && prefix.charAt(0) == '\u0001';
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue