diff --git a/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index ce69fa81a44..9fa79e685e5 100644 --- a/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/modules/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -290,7 +290,6 @@ public abstract class QueryParserBase { this.lowercaseExpandedTerms = lowercaseExpandedTerms; } - /** * @see #setLowercaseExpandedTerms(boolean) */ @@ -778,14 +777,21 @@ public abstract class QueryParserBase { return new FuzzyQuery(term,minimumSimilarity,prefixLength); } - private BytesRef analyzeRangePart(String field, String part) { + // TODO: Should this be protected instead? + private BytesRef analyzeMultitermTerm(String field, String part) { + return analyzeMultitermTerm(field, part, analyzer); + } + + protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) { TokenStream source; - + + if (analyzerIn == null) analyzerIn = analyzer; + try { - source = analyzer.tokenStream(field, new StringReader(part)); + source = analyzerIn.tokenStream(field, new StringReader(part)); source.reset(); } catch (IOException e) { - throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e); + throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e); } TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); @@ -793,10 +799,10 @@ public abstract class QueryParserBase { try { if (!source.incrementToken()) - throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); + throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); termAtt.fillBytesRef(); if (source.incrementToken()) - throw new IllegalArgumentException("analyzer returned too many terms for range part: " + part); + throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); } catch (IOException e) { throw new RuntimeException("error analyzing range part: " + part, e); } @@ -805,7 +811,7 @@ public abstract class QueryParserBase { source.end(); source.close(); } catch (IOException e) { - throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e); + throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e); } return BytesRef.deepCopyOf(bytes); @@ -827,13 +833,13 @@ public abstract class QueryParserBase { if (part1 == null) { start = null; } else { - start = analyzeRangeTerms ? analyzeRangePart(field, part1) : new BytesRef(part1); + start = analyzeRangeTerms ? analyzeMultitermTerm(field, part1) : new BytesRef(part1); } if (part2 == null) { end = null; } else { - end = analyzeRangeTerms ? analyzeRangePart(field, part2) : new BytesRef(part2); + end = analyzeRangeTerms ? analyzeMultitermTerm(field, part2) : new BytesRef(part2); } final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive); diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 02638629d9e..65c0a4ad4bf 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -188,6 +188,11 @@ New Features * SOLR-2134 Trie* fields should support sortMissingLast=true, and deprecate Sortable* Field Types (Ryan McKinley, Mike McCandless, Uwe Schindler, Erick Erickson) + +* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify + a complete analysis chain for multiterm queries. + (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir) + Optimizations ---------------------- @@ -383,6 +388,11 @@ New Features * SOLR-1565: StreamingUpdateSolrServer supports RequestWriter API and therefore, javabin update format (shalin) +* SOLR-2438: Case insensitive search for wildcard queries. Actually, the ability to specify + a complete analysis chain for multiterm queries. + (Pete Sturge Erick Erickson, Mentoring from Seeley and Muir) + + Bug Fixes ---------------------- * SOLR-2912: Fixed File descriptor leak in ShowFileRequestHandler (Michael Ryan, shalin) diff --git a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java index 537f38853dd..370b001ba68 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldProperties.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldProperties.java @@ -48,13 +48,15 @@ public abstract class FieldProperties { protected final static int REQUIRED = 0x00001000; protected final static int OMIT_POSITIONS = 0x00002000; + protected final static int LEGACY_MULTITERM = 0x00004000; static final String[] propertyNames = { "indexed", "tokenized", "stored", "binary", "omitNorms", "omitTermFreqAndPositions", "termVectors", "termPositions", "termOffsets", "multiValued", - "sortMissingFirst","sortMissingLast","required", "omitPositions" + "sortMissingFirst","sortMissingLast","required", "omitPositions" , + "legacyMultiTerm" }; static final Map propertyMap = new HashMap(); diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 77214b01a82..54a0206490c 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -428,6 +428,21 @@ public abstract class FieldType extends FieldProperties { */ protected Analyzer queryAnalyzer=analyzer; + /** + * Analyzer set by schema for text types to use when searching fields + * of this type, subclasses can set analyzer themselves or override + * getAnalyzer() + * This analyzer is used to process wildcard, prefix, regex and other multiterm queries. It + * assembles a list of tokenizer +filters that "make sense" for this, primarily accent folding and + * lowercasing filters, and charfilters. + * + * If users require old-style behavior, they can specify 'legacyMultiterm="true" ' in the schema file + * @see #getMultiTermAnalyzer + * @see #setMultiTermAnalyzer + */ + protected Analyzer multiTermAnalyzer=null; + + /** * Returns the Analyzer to be used when indexing fields of this type. *

@@ -450,6 +465,17 @@ public abstract class FieldType extends FieldProperties { return queryAnalyzer; } + /** + * Returns the Analyzer to be used when searching fields of this type when mult-term queries are specified. + *

+ * This method may be called many times, at any time. + *

+ * @see #getAnalyzer + */ + public Analyzer getMultiTermAnalyzer() { + return multiTermAnalyzer; + } + private final String analyzerError = "FieldType: " + this.getClass().getSimpleName() + " (" + typeName + ") does not support specifying an analyzer"; @@ -498,6 +524,28 @@ public abstract class FieldType extends FieldProperties { throw e; } + /** + * Sets the Analyzer to be used when querying fields of this type. + * + *

+ * + * Subclasses that override this method need to ensure the behavior + * of the analyzer is consistent with the implementation of toInternal. + *

+ * + * @see #toInternal + * @see #setAnalyzer + * @see #getQueryAnalyzer + */ + public void setMultiTermAnalyzer(Analyzer analyzer) { + SolrException e = new SolrException + (ErrorCode.SERVER_ERROR, + "FieldType: " + this.getClass().getSimpleName() + + " (" + typeName + ") does not support specifying an analyzer"); + SolrException.logOnce(log,null,e); + throw e; + } + /** @lucene.internal */ protected Similarity similarity; diff --git a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java index b763aa8d134..090c7b0ada2 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java @@ -18,19 +18,15 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Version; +import org.apache.solr.analysis.*; import org.apache.solr.common.ResourceLoader; import org.apache.solr.common.SolrException; -import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.DOMUtil; -import org.apache.solr.common.util.NamedList; import org.apache.solr.core.Config; import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.analysis.CharFilterFactory; -import org.apache.solr.analysis.TokenFilterFactory; -import org.apache.solr.analysis.TokenizerChain; -import org.apache.solr.analysis.TokenizerFactory; import org.apache.solr.util.plugin.AbstractPluginLoader; import org.w3c.dom.*; @@ -88,12 +84,16 @@ public final class FieldTypePluginLoader String expression = "./analyzer[@type='query']"; Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); Analyzer queryAnalyzer = readAnalyzer(anode); - + + expression = "./analyzer[@type='multiterm']"; + anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); + Analyzer multiAnalyzer = readAnalyzer(anode); + // An analyzer without a type specified, or with type="index" expression = "./analyzer[not(@type)] | ./analyzer[@type='index']"; anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); Analyzer analyzer = readAnalyzer(anode); - + // a custom similarity[Factory] expression = "./similarity"; anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE); @@ -101,9 +101,16 @@ public final class FieldTypePluginLoader if (queryAnalyzer==null) queryAnalyzer=analyzer; if (analyzer==null) analyzer=queryAnalyzer; + if (multiAnalyzer == null) { + Boolean legacyMatch = ! schema.getDefaultLuceneMatchVersion().onOrAfter(Version.LUCENE_36); + legacyMatch = (DOMUtil.getAttr(node, "legacyMultiTerm", null) == null) ? legacyMatch : + Boolean.parseBoolean(DOMUtil.getAttr(node, "legacyMultiTerm", null)); + multiAnalyzer = constructMultiTermAnalyzer(queryAnalyzer, legacyMatch); + } if (analyzer!=null) { ft.setAnalyzer(analyzer); ft.setQueryAnalyzer(queryAnalyzer); + ft.setMultiTermAnalyzer(multiAnalyzer); } if (similarity!=null) { ft.setSimilarity(similarity); @@ -130,6 +137,42 @@ public final class FieldTypePluginLoader return fieldTypes.put( name, plugin ); } + // The point here is that, if no multitermanalyzer was specified in the schema file, do one of several things: + // 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters, + // lowercase filters and asciifoldingfilter. + // 2> If letacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior. + // Do the same if they've specified that the old behavior is required (legacyMultiTerm="true") + + private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer, Boolean legacyMultiTerm) { + if (queryAnalyzer == null) return null; + + if (legacyMultiTerm || (!(queryAnalyzer instanceof TokenizerChain))) { + return new KeywordAnalyzer(); + } + + TokenizerChain tc = (TokenizerChain) queryAnalyzer; + + // we know it'll never be longer than this unless the code below is explicitly changed + TokenFilterFactory[] filters = new TokenFilterFactory[2]; + int idx = 0; + for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { + if (factory instanceof LowerCaseFilterFactory) { + filters[idx] = new LowerCaseFilterFactory(); + filters[idx++].init(factory.getArgs()); + } + if (factory instanceof ASCIIFoldingFilterFactory) { + filters[idx] = new ASCIIFoldingFilterFactory(); + filters[idx++].init(factory.getArgs()); + } + } + WhitespaceTokenizerFactory white = new WhitespaceTokenizerFactory(); + white.init(tc.getTokenizerFactory().getArgs()); + + return new TokenizerChain(tc.getCharFilterFactories(), + white, + Arrays.copyOfRange(filters, 0, idx)); + } + // // // diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaField.java b/solr/core/src/java/org/apache/solr/schema/SchemaField.java index 7aa8db5206f..34990ff9655 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaField.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaField.java @@ -97,6 +97,9 @@ public final class SchemaField extends FieldProperties { boolean isTokenized() { return (properties & TOKENIZED)!=0; } boolean isBinary() { return (properties & BINARY)!=0; } + boolean legacyMultiTerm() { + return (properties & LEGACY_MULTITERM) != 0; + } public IndexableField createField(Object val, float boost) { return type.createField(this,val,boost); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 3ca52d6f866..c3f76324d0e 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -98,6 +98,11 @@ public class TextField extends FieldType { this.queryAnalyzer = analyzer; } + @Override + public void setMultiTermAnalyzer(Analyzer analyzer) { + this.multiTermAnalyzer = analyzer; + } + static Query parseFieldQuery(QParser parser, Analyzer analyzer, String field, String queryText) { int phraseSlop = 0; boolean enablePositionIncrements = true; diff --git a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java index 94c98bc21b4..0c5584d9c3a 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java +++ b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java @@ -26,7 +26,6 @@ import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.BasicOperations; @@ -71,7 +70,6 @@ public class SolrQueryParser extends QueryParser { this.schema = parser.getReq().getSchema(); this.parser = parser; this.defaultField = defaultField; - setLowercaseExpandedTerms(false); setEnablePositionIncrements(true); checkAllowLeadingWildcards(); } @@ -106,6 +104,14 @@ public class SolrQueryParser extends QueryParser { } } + protected String analyzeIfMultitermTermText(String field, String part, Analyzer analyzer) { + if (part == null) return part; + + SchemaField sf = schema.getFieldOrNull((field)); + if (sf == null || ! (sf.getType() instanceof TextField)) return part; + return analyzeMultitermTerm(field, part, analyzer).utf8ToString(); + } + @Override protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { checkNullField(field); @@ -137,6 +143,8 @@ public class SolrQueryParser extends QueryParser { @Override protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { checkNullField(field); + part1 = analyzeIfMultitermTermText(field, part1, schema.getFieldType(field).getMultiTermAnalyzer()); + part2 = analyzeIfMultitermTermText(field, part2, schema.getFieldType(field).getMultiTermAnalyzer()); SchemaField sf = schema.getField(field); return sf.getType().getRangeQuery(parser, sf, part1, part2, startInclusive, endInclusive); } @@ -144,9 +152,8 @@ public class SolrQueryParser extends QueryParser { @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { checkNullField(field); - if (getLowercaseExpandedTerms()) { - termStr = termStr.toLowerCase(); - } + + termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer()); // TODO: toInternal() won't necessarily work on partial // values, so it looks like we need a getPrefix() function @@ -162,14 +169,13 @@ public class SolrQueryParser extends QueryParser { PrefixQuery prefixQuery = new PrefixQuery(t); return prefixQuery; } - @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { // *:* -> MatchAllDocsQuery if ("*".equals(field) && "*".equals(termStr)) { return newMatchAllDocsQuery(); } - + termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer()); // can we use reversed wildcards in this field? String type = schema.getFieldType(field).getTypeName(); ReversedWildcardFilterFactory factory = leadingWildcards.get(type); @@ -213,4 +219,11 @@ public class SolrQueryParser extends QueryParser { } return q; } + + + protected Query getRegexpQuery(String field, String termStr) throws ParseException + { + termStr = analyzeIfMultitermTermText(field, termStr, schema.getFieldType(field).getMultiTermAnalyzer()); + return super.getRegexpQuery(field, termStr); + } } diff --git a/solr/core/src/test-files/solr/conf/schema-folding.xml b/solr/core/src/test-files/solr/conf/schema-folding.xml new file mode 100644 index 00000000000..798ca301217 --- /dev/null +++ b/solr/core/src/test-files/solr/conf/schema-folding.xml @@ -0,0 +1,145 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + content + id + + diff --git a/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java new file mode 100644 index 00000000000..7e64aff3c54 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/schema/MultiTermTest.java @@ -0,0 +1,87 @@ +package org.apache.solr.schema; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.analysis.Analyzer; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.analysis.*; +import org.junit.BeforeClass; +import org.junit.Test; + +public class MultiTermTest extends SolrTestCaseJ4 { + public String getCoreName() { + return "basic"; + } + + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig-basic.xml", "schema-folding.xml"); + } + + @Test + public void testMultiFound() { + SchemaField field = h.getCore().getSchema().getField("content_multi"); + Analyzer analyzer = field.getType().getMultiTermAnalyzer(); + assertTrue(analyzer instanceof TokenizerChain); + assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory); + TokenizerChain tc = (TokenizerChain) analyzer; + for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { + assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); + } + + analyzer = field.getType().getAnalyzer(); + assertTrue(analyzer instanceof TokenizerChain); + assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory); + tc = (TokenizerChain) analyzer; + for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { + assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory)); + } + + assertTrue(tc.getCharFilterFactories().length == 0); + } + + @Test + public void testQueryCopiedToMulti() { + SchemaField field = h.getCore().getSchema().getField("content_charfilter"); + Analyzer analyzer = field.getType().getMultiTermAnalyzer(); + assertTrue(analyzer instanceof TokenizerChain); + assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory); + TokenizerChain tc = (TokenizerChain) analyzer; + for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { + assertTrue(factory instanceof LowerCaseFilterFactory); + } + + assertTrue(tc.getCharFilterFactories().length == 1); + assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory); + } + + @Test + public void testDefaultCopiedToMulti() { + SchemaField field = h.getCore().getSchema().getField("content_ws"); + Analyzer analyzer = field.getType().getMultiTermAnalyzer(); + assertTrue(analyzer instanceof TokenizerChain); + assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory); + TokenizerChain tc = (TokenizerChain) analyzer; + for (TokenFilterFactory factory : tc.getTokenFilterFactories()) { + assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory)); + } + + assertTrue(tc.getCharFilterFactories().length == 0); + + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java b/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java new file mode 100644 index 00000000000..2d02e922135 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestFoldingMultitermQuery.java @@ -0,0 +1,231 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexWriter; +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestFoldingMultitermQuery extends SolrTestCaseJ4 { + + public String getCoreName() { + return "basic"; + } + + @BeforeClass + public static void beforeTests() throws Exception { + initCore("solrconfig-basic.xml", "schema-folding.xml"); + IndexWriter iw; + + String docs[] = { + "abcdefg1 finger", + "gangs hijklmn1", + "opqrstu1 zilly", + }; + + // prepare the index + for (int i = 0; i < docs.length; i++) { + String num = Integer.toString(i); + String boolVal = ((i % 2) == 0) ? "true" : "false"; + assertU(adoc("id", num, + "int_f", num, + "float_f", num, + "long_f", num, + "double_f", num, + "byte_f", num, + "short_f", num, + "bool_f", boolVal, + "date_f", "200" + Integer.toString(i % 10) + "-01-01T00:00:00Z", + "content", docs[i], + "content_ws", docs[i], + "content_rev", docs[i], + "content_multi", docs[i], + "content_lower_token", docs[i], + "content_oldstyle", docs[i], + "content_charfilter", docs[i], + "content_multi_bad", docs[i] + )); + } + assertU(optimize()); + } + + @Test + public void testPrefixCaseAccentFolding() throws Exception { + String matchOneDocPrefixUpper[][] = { + {"A*", "ÁB*", "ABÇ*"}, // these should find only doc 0 + {"H*", "HÏ*", "HìJ*"}, // these should find only doc 1 + {"O*", "ÖP*", "OPQ*"}, // these should find only doc 2 + }; + + String matchRevPrefixUpper[][] = { + {"*Ğ1", "*DEfG1", "*EfG1"}, + {"*N1", "*LmŊ1", "*MÑ1"}, + {"*Ǖ1", "*sTu1", "*RŠTU1"} + }; + + // test the prefix queries find only one doc where the query is uppercased. Must go through query parser here! + for (int idx = 0; idx < matchOneDocPrefixUpper.length; idx++) { + for (int jdx = 0; jdx < matchOneDocPrefixUpper[idx].length; jdx++) { + String me = matchOneDocPrefixUpper[idx][jdx]; + assertQ(req("q", "content:" + me), + "//*[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_ws:" + me), + "//*[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_multi:" + me), + "//*[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_lower_token:" + me), + "//result[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + } + } + for (int idx = 0; idx < matchRevPrefixUpper.length; idx++) { + for (int jdx = 0; jdx < matchRevPrefixUpper[idx].length; jdx++) { + String me = matchRevPrefixUpper[idx][jdx]; + assertQ(req("q", "content_rev:" + me), + "//*[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + } + } + } + + // test the wildcard queries find only one doc where the query is uppercased and/or accented. + @Test + public void testWildcardCaseAccentFolding() throws Exception { + String matchOneDocWildUpper[][] = { + {"Á*C*", "ÁB*1", "ABÇ*g1", "Á*FG1"}, // these should find only doc 0 + {"H*k*", "HÏ*l?*", "HìJ*n*", "HìJ*m*"}, // these should find only doc 1 + {"O*ř*", "ÖP*ş???", "OPQ*S?Ů*", "ÖP*1"}, // these should find only doc 2 + }; + + for (int idx = 0; idx < matchOneDocWildUpper.length; idx++) { + for (int jdx = 0; jdx < matchOneDocWildUpper[idx].length; jdx++) { + String me = matchOneDocWildUpper[idx][jdx]; + assertQ("Error with " + me, req("q", "content:" + me), + "//result[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_ws:" + me), + "//result[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_multi:" + me), + "//result[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + assertQ(req("q", "content_lower_token:" + me), + "//result[@numFound='1']", + "//*[@name='id'][.='" + Integer.toString(idx) + "']"); + } + } + } + + // Phrases should fail. This test is mainly a marker so if phrases ever do start working with wildcards we go + // and update the documentation + @Test + public void testPhrase() { + assertQ(req("q", "content:\"silly ABCD*\""), + "//result[@numFound='0']"); + } + + // Make sure the legacy behavior flag is honored + @Test + public void testLegacyBehavior() { + assertQ(req("q", "content_oldstyle:ABCD*"), + "//result[@numFound='0']"); + } + + @Test + public void testWildcardRange() { + assertQ(req("q", "content:[* TO *]"), + "//result[@numFound='3']"); + } + + + // Does the char filter get correctly handled? + @Test + public void testCharFilter() { + assertQ(req("q", "content_charfilter:" + "Á*C*"), + "//result[@numFound='1']", + "//*[@name='id'][.='0']"); + assertQ(req("q", "content_charfilter:" + "ABÇ*g1"), + "//result[@numFound='1']", + "//*[@name='id'][.='0']"); + assertQ(req("q", "content_charfilter:" + "HÏ*l?*"), + "//result[@numFound='1']", + "//*[@name='id'][.='1']"); + } + + @Test + public void testRangeQuery() { + assertQ(req("q", "content:" + "{Ȫp*1 TO QŮ*}"), + "//result[@numFound='1']", + "//*[@name='id'][.='2']"); + + assertQ(req("q", "content:" + "[Áb* TO f?Ñg?r]"), + "//result[@numFound='1']", + "//*[@name='id'][.='0']"); + + } + + @Test + public void testNonTextTypes() { + String[] intTypes = {"int_f", "float_f", "long_f", "double_f", "byte_f", "short_f"}; + + for (String str : intTypes) { + assertQ(req("q", str + ":" + "0"), + "//result[@numFound='1']", + "//*[@name='id'][.='0']"); + + assertQ(req("q", str + ":" + "[0 TO 2]"), + "//result[@numFound='3']", + "//*[@name='id'][.='0']", + "//*[@name='id'][.='1']", + "//*[@name='id'][.='2']"); + } + assertQ(req("q", "bool_f:true"), + "//result[@numFound='2']", + "//*[@name='id'][.='0']", + "//*[@name='id'][.='2']"); + + assertQ(req("q", "bool_f:[false TO true]"), + "//result[@numFound='3']", + "//*[@name='id'][.='0']", + "//*[@name='id'][.='1']", + "//*[@name='id'][.='2']"); + + assertQ(req("q", "date_f:2000-01-01T00\\:00\\:00Z"), + "//result[@numFound='1']", + "//*[@name='id'][.='0']"); + + assertQ(req("q", "date_f:[2000-12-31T23:59:59.999Z TO 2002-01-02T00:00:01Z]"), + "//result[@numFound='2']", + "//*[@name='id'][.='1']", + "//*[@name='id'][.='2']"); + } + + @Test + public void testMultiBad() { + try { + assertQ(req("q", "content_multi_bad:" + "abCD*")); + fail("Should throw exception when token evaluates to more than one term"); + } catch (Exception expected) { + assertTrue(expected.getCause() instanceof IllegalArgumentException); + } + } +} \ No newline at end of file diff --git a/solr/example/solr/conf/schema.xml b/solr/example/solr/conf/schema.xml index c073c27f2a7..794c5e05b71 100755 --- a/solr/example/solr/conf/schema.xml +++ b/solr/example/solr/conf/schema.xml @@ -427,6 +427,42 @@ + + + + + + + + + + + + + + + + + + + +