diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9c4890d2c7b..0dced289cf3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,6 +23,8 @@ New Features * LUCENE-7927: Add LongValueFacetCounts, to compute facet counts for individual numeric values (Mike McCandless) +* LUCENE-7940: Add BengaliAnalyzer. (Md. Abdulla-Al-Sun via Robert Muir) + Optimizations * LUCENE-7905: Optimize how OrdinalMap (used by @@ -52,12 +54,23 @@ Bug Fixes not recommended, lucene-analyzers-icu contains binary data structures specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir) +* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache + by default. (Jan-Willem van den Broek via Mike McCandless) + Build * SOLR-11181: Switch order of maven artifact publishing procedure: deploy first instead of locally installing first, to workaround a double repository push of *-sources.jar and *-javadoc.jar files. (Lynn Monson via Steve Rowe) +Other + +* LUCENE-7948, LUCENE-7937: Upgrade randomizedtesting to 2.5.3 (minor fixes + in test filtering for IDEs). (Mike Sokolov, Dawid Weiss) + +* LUCENE-7933: LongBitSet now validates the numBits parameter (Won + Jonghoon, Mike McCandless) + ======================= Lucene 7.0.0 ======================= New Features @@ -184,6 +197,9 @@ Bug Fixes * LUCENE-7864: IndexMergeTool is not using intermediate hard links (even if possible). (Dawid Weiss) +* LUCENE-7956: Fixed potential stack overflow error in ICUNormalizer2CharFilter. + (Adrien Grand) + Improvements * LUCENE-7489: Better storage of sparse doc-values fields with the default diff --git a/lucene/NOTICE.txt b/lucene/NOTICE.txt index 1903adc743d..7e0c54e2995 100644 --- a/lucene/NOTICE.txt +++ b/lucene/NOTICE.txt @@ -54,13 +54,14 @@ The KStem stemmer in was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst) under the BSD-license. -The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default +The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default stopword list that is BSD-licensed created by Jacques Savoy. These files reside in: analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt, analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt, analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt, analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt, -analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt +analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt See http://members.unine.ch/jacques.savoy/clef/index.html. The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java new file mode 100644 index 00000000000..4f8ec06742d --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliAnalyzer.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.core.DecimalDigitFilter; +import org.apache.lucene.analysis.in.IndicNormalizationFilter; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; +import org.apache.lucene.analysis.standard.StandardFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; + +import java.io.IOException; +import java.io.Reader; + +/** + * Analyzer for Bengali. + */ +public final class BengaliAnalyzer extends StopwordAnalyzerBase { + private final CharArraySet stemExclusionSet; + + /** + * File containing default Bengali stopwords. + * + * Default stopword list is from http://members.unine.ch/jacques.savoy/clef/bengaliST.txt + * The stopword list is BSD-Licensed. + */ + public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt"; + private static final String STOPWORDS_COMMENT = "#"; + + /** + * Returns an unmodifiable instance of the default stop-words set. + * @return an unmodifiable instance of the default stop-words set. + */ + public static CharArraySet getDefaultStopSet(){ + return DefaultSetHolder.DEFAULT_STOP_SET; + } + + /** + * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class + * accesses the static final set the first time.; + */ + private static class DefaultSetHolder { + static final CharArraySet DEFAULT_STOP_SET; + + static { + try { + DEFAULT_STOP_SET = loadStopwordSet(false, BengaliAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT); + } catch (IOException ex) { + throw new RuntimeException("Unable to load default stopword set"); + } + } + } + + /** + * Builds an analyzer with the given stop words + * + * @param stopwords a stopword set + * @param stemExclusionSet a stemming exclusion set + */ + public BengaliAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) { + super(stopwords); + this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet)); + } + + /** + * Builds an analyzer with the given stop words + * + * @param stopwords a stopword set + */ + public BengaliAnalyzer(CharArraySet stopwords) { + this(stopwords, CharArraySet.EMPTY_SET); + } + + /** + * Builds an analyzer with the default stop words: + * {@link #DEFAULT_STOPWORD_FILE}. + */ + public BengaliAnalyzer() { + this(DefaultSetHolder.DEFAULT_STOP_SET); + } + + /** + * Creates + * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * used to tokenize all the text in the provided {@link Reader}. + * + * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} + * built from a {@link StandardTokenizer} filtered with + * {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link IndicNormalizationFilter}, + * {@link BengaliNormalizationFilter}, {@link SetKeywordMarkerFilter} + * if a stem exclusion set is provided, {@link BengaliStemFilter}, and + * Bengali Stop words + */ + @Override + protected TokenStreamComponents createComponents(String fieldName) { + final Tokenizer source = new StandardTokenizer(); + TokenStream result = new LowerCaseFilter(source); + result = new DecimalDigitFilter(result); + if (!stemExclusionSet.isEmpty()) + result = new SetKeywordMarkerFilter(result, stemExclusionSet); + result = new IndicNormalizationFilter(result); + result = new BengaliNormalizationFilter(result); + result = new StopFilter(result, stopwords); + result = new BengaliStemFilter(result); + return new TokenStreamComponents(source, result); + } + + @Override + protected TokenStream normalize(String fieldName, TokenStream in) { + TokenStream result = new StandardFilter(in); + result = new LowerCaseFilter(result); + result = new DecimalDigitFilter(result); + result = new IndicNormalizationFilter(result); + result = new BengaliNormalizationFilter(result); + return result; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilter.java new file mode 100644 index 00000000000..46874b5b588 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilter.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +import java.io.IOException; + +/** + * A {@link TokenFilter} that applies {@link BengaliNormalizer} to normalize the + * orthography. + *
+ * In some cases the normalization may cause unrelated terms to conflate, so + * to prevent terms from being normalized use an instance of + * {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets + * the {@link KeywordAttribute} before this {@link TokenStream}. + *
+ * @see BengaliNormalizer + */ +public final class BengaliNormalizationFilter extends TokenFilter { + + private final BengaliNormalizer normalizer = new BengaliNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); + + public BengaliNormalizationFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAtt.isKeyword()) + termAtt.setLength(normalizer.normalize(termAtt.buffer(), + termAtt.length())); + return true; + } + return false; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilterFactory.java new file mode 100644 index 00000000000..43618d6dbb3 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliNormalizationFilterFactory.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.AbstractAnalysisFactory; +import org.apache.lucene.analysis.util.MultiTermAwareComponent; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +import java.util.Map; + +/** + * Factory for {@link BengaliNormalizationFilter}. + *+ * <fieldType name="text_bnnormal" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.BengaliNormalizationFilterFactory"/> + * </analyzer> + * </fieldType>+ */ +public class BengaliNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent { + + public BengaliNormalizationFilterFactory(Map
+ * Implements the Bengali-language specific algorithm specified in: + * A Double Metaphone encoding for Bangla and its application in spelling checker + * Naushad UzZaman and Mumit Khan. + * http://www.panl10n.net/english/final%20reports/pdf%20files/Bangladesh/BAN16.pdf + *
+ */ +public class BengaliNormalizer { + /** + * Normalize an input buffer of Bengali text + * + * @param s input buffer + * @param len length of input buffer + * @return length of input buffer after normalization + */ + public int normalize(char s[], int len) { + + for (int i = 0; i < len; i++) { + switch (s[i]) { + // delete Chandrabindu + case '\u0981': + len = delete(s, i, len); + i--; + break; + + // DirghoI kar -> RosshoI kar + case '\u09C0': + s[i] = '\u09BF'; + break; + + // DirghoU kar -> RosshoU kar + case '\u09C2': + s[i] = '\u09C1'; + break; + + // Khio (Ka + Hoshonto + Murdorno Sh) + case '\u0995': + if(i + 2 < len && s[i+1] == '\u09CD' && s[i+2] == '\u09BF') { + if (i == 0) { + s[i] = '\u0996'; + len = delete(s, i + 2, len); + len = delete(s, i + 1, len); + } else { + s[i+1] = '\u0996'; + len = delete(s, i + 2, len); + } + } + break; + + // Nga to Anusvara + case '\u0999': + s[i] = '\u0982'; + break; + + // Ja Phala + case '\u09AF': + if(i - 2 == 0 && s[i-1] == '\u09CD') { + s[i - 1] = '\u09C7'; + + if(i + 1 < len && s[i+1] == '\u09BE') { + len = delete(s, i+1, len); + } + len = delete(s, i, len); + i --; + } else if(i - 1 >= 0 && s[i-1] == '\u09CD' ){ + len = delete(s, i, len); + len = delete(s, i-1, len); + i -=2; + } + break; + + // Ba Phalaa + case '\u09AC': + if((i >= 1 && s[i-1] != '\u09CD') || i == 0) + break; + if(i - 2 == 0) { + len = delete(s, i, len); + len = delete(s, i - 1, len); + i -= 2; + } else if(i - 5 >= 0 && s[i - 3] == '\u09CD') { + len = delete(s, i, len); + len = delete(s, i-1, len); + i -=2; + } else if(i - 2 >= 0){ + s[i - 1] = s[i - 2]; + len = delete(s, i, len); + i --; + } + break; + + // Visarga + case '\u0983': + if(i == len -1) { + if(len <= 3) { + s[i] = '\u09B9'; + } else { + len = delete(s, i, len); + } + } else { + s[i] = s[i+1]; + } + break; + + //All sh + case '\u09B6': + case '\u09B7': + s[i] = '\u09B8'; + break; + + //check na + case '\u09A3': + s[i] = '\u09A8'; + break; + + //check ra + case '\u09DC': + case '\u09DD': + s[i] = '\u09B0'; + break; + + case '\u09CE': + s[i] = '\u09A4'; + break; + + default: + break; + } + } + + return len; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilter.java new file mode 100644 index 00000000000..97870272136 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilter.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; + +import java.io.IOException; + +/** + * A {@link TokenFilter} that applies {@link BengaliStemmer} to stem Bengali words. + */ +public final class BengaliStemFilter extends TokenFilter { + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttribute = addAttribute(KeywordAttribute.class); + private final BengaliStemmer bengaliStemmer = new BengaliStemmer(); + + public BengaliStemFilter(TokenStream input) { + super(input); + } + + @Override + public boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (!keywordAttribute.isKeyword()) + termAttribute.setLength(bengaliStemmer.stem(termAttribute.buffer(), termAttribute.length())); + return true; + } else { + return false; + } + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilterFactory.java new file mode 100644 index 00000000000..b082d9e5b77 --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/BengaliStemFilterFactory.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.TokenFilterFactory; + +import java.util.Map; + +/** + * Factory for {@link BengaliStemFilter}. + *+ * <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.BengaliStemFilterFactory"/> + * </analyzer> + * </fieldType>+ */ +public class BengaliStemFilterFactory extends TokenFilterFactory { + + public BengaliStemFilterFactory(Map
+ * The algorithm is based on the report in: + * Natural Language Processing in an Indian Language (Bengali)-I: Verb Phrase Analysis + * P Sengupta and B B Chaudhuri + *
+ * + *+ * Few Stemmer criteria are taken from: + * http://members.unine.ch/jacques.savoy/clef/BengaliStemmerLight.java.txt + *
+ */ +public class BengaliStemmer { + public int stem(char buffer[], int len) { + + // 8 + if (len > 9 && (endsWith(buffer, len, "িয়াছিলাম") + || endsWith(buffer, len, "িতেছিলাম") + || endsWith(buffer, len, "িতেছিলেন") + || endsWith(buffer, len, "ইতেছিলেন") + || endsWith(buffer, len, "িয়াছিলেন") + || endsWith(buffer, len, "ইয়াছিলেন") + )) + return len - 8; + + // 7 + if ((len > 8) && (endsWith(buffer, len, "িতেছিলি") + || endsWith(buffer, len, "িতেছিলে") + || endsWith(buffer, len, "িয়াছিলা") + || endsWith(buffer, len, "িয়াছিলে") + || endsWith(buffer, len, "িতেছিলা") + || endsWith(buffer, len, "িয়াছিলি") + + || endsWith(buffer, len, "য়েদেরকে") + )) + return len - 7; + + // 6 + if ((len > 7) && (endsWith(buffer, len, "িতেছিস") + || endsWith(buffer, len, "িতেছেন") + || endsWith(buffer, len, "িয়াছিস") + || endsWith(buffer, len, "িয়াছেন") + || endsWith(buffer, len, "েছিলাম") + || endsWith(buffer, len, "েছিলেন") + + || endsWith(buffer, len, "েদেরকে") + )) + return len - 6; + + // 5 + if ((len > 6) && (endsWith(buffer, len, "িতেছি") + || endsWith(buffer, len, "িতেছা") + || endsWith(buffer, len, "িতেছে") + || endsWith(buffer, len, "ছিলাম") + || endsWith(buffer, len, "ছিলেন") + || endsWith(buffer, len, "িয়াছি") + || endsWith(buffer, len, "িয়াছা") + || endsWith(buffer, len, "িয়াছে") + || endsWith(buffer, len, "েছিলে") + || endsWith(buffer, len, "েছিলা") + + || endsWith(buffer, len, "য়েদের") + || endsWith(buffer, len, "দেরকে") + )) + return len - 5; + + // 4 + if ((len > 5) && (endsWith(buffer, len, "িলাম") + || endsWith(buffer, len, "িলেন") + || endsWith(buffer, len, "িতাম") + || endsWith(buffer, len, "িতেন") + || endsWith(buffer, len, "িবেন") + || endsWith(buffer, len, "ছিলি") + || endsWith(buffer, len, "ছিলে") + || endsWith(buffer, len, "ছিলা") + || endsWith(buffer, len, "তেছে") + || endsWith(buffer, len, "িতেছ") + + || endsWith(buffer, len, "খানা") + || endsWith(buffer, len, "খানি") + || endsWith(buffer, len, "গুলো") + || endsWith(buffer, len, "গুলি") + || endsWith(buffer, len, "য়েরা") + || endsWith(buffer, len, "েদের") + )) + return len - 4; + + // 3 + if ((len > 4) && (endsWith(buffer, len, "লাম") + || endsWith(buffer, len, "িলি") + || endsWith(buffer, len, "ইলি") + || endsWith(buffer, len, "িলে") + || endsWith(buffer, len, "ইলে") + || endsWith(buffer, len, "লেন") + || endsWith(buffer, len, "িলা") + || endsWith(buffer, len, "ইলা") + || endsWith(buffer, len, "তাম") + || endsWith(buffer, len, "িতি") + || endsWith(buffer, len, "ইতি") + || endsWith(buffer, len, "িতে") + || endsWith(buffer, len, "ইতে") + || endsWith(buffer, len, "তেন") + || endsWith(buffer, len, "িতা") + || endsWith(buffer, len, "িবা") + || endsWith(buffer, len, "ইবা") + || endsWith(buffer, len, "িবি") + || endsWith(buffer, len, "ইবি") + || endsWith(buffer, len, "বেন") + || endsWith(buffer, len, "িবে") + || endsWith(buffer, len, "ইবে") + || endsWith(buffer, len, "ছেন") + + || endsWith(buffer, len, "য়োন") + || endsWith(buffer, len, "য়ের") + || endsWith(buffer, len, "েরা") + || endsWith(buffer, len, "দের") + )) + return len - 3; + + // 2 + if ((len > 3) && (endsWith(buffer, len, "িস") + || endsWith(buffer, len, "েন") + || endsWith(buffer, len, "লি") + || endsWith(buffer, len, "লে") + || endsWith(buffer, len, "লা") + || endsWith(buffer, len, "তি") + || endsWith(buffer, len, "তে") + || endsWith(buffer, len, "তা") + || endsWith(buffer, len, "বি") + || endsWith(buffer, len, "বে") + || endsWith(buffer, len, "বা") + || endsWith(buffer, len, "ছি") + || endsWith(buffer, len, "ছা") + || endsWith(buffer, len, "ছে") + || endsWith(buffer, len, "ুন") + || endsWith(buffer, len, "ুক") + + || endsWith(buffer, len, "টা") + || endsWith(buffer, len, "টি") + || endsWith(buffer, len, "নি") + || endsWith(buffer, len, "ের") + || endsWith(buffer, len, "তে") + || endsWith(buffer, len, "রা") + || endsWith(buffer, len, "কে") + )) + return len - 2; + + // 1 + if ((len > 2) && (endsWith(buffer, len, "ি") + || endsWith(buffer, len, "ী") + || endsWith(buffer, len, "া") + || endsWith(buffer, len, "ো") + || endsWith(buffer, len, "ে") + || endsWith(buffer, len, "ব") + || endsWith(buffer, len, "ত") + )) + return len - 1; + + return len; + } +} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/package-info.java new file mode 100644 index 00000000000..eea39a9fdfb --- /dev/null +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bn/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Analyzer for Bengali Language. + */ +package org.apache.lucene.analysis.bn; diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory index bc19c4ac320..d871ad649d1 100644 --- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory +++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory @@ -17,6 +17,8 @@ org.apache.lucene.analysis.tr.ApostropheFilterFactory org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory org.apache.lucene.analysis.ar.ArabicStemFilterFactory org.apache.lucene.analysis.bg.BulgarianStemFilterFactory +org.apache.lucene.analysis.bn.BengaliNormalizationFilterFactory +org.apache.lucene.analysis.bn.BengaliStemFilterFactory org.apache.lucene.analysis.br.BrazilianStemFilterFactory org.apache.lucene.analysis.cjk.CJKBigramFilterFactory org.apache.lucene.analysis.cjk.CJKWidthFilterFactory diff --git a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt new file mode 100644 index 00000000000..84d1d2ad732 --- /dev/null +++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt @@ -0,0 +1,121 @@ +# See http://members.unine.ch/jacques.savoy/clef/index.html. +# This file was created by Jacques Savoy and is distributed under the BSD license +এই +ও +থেকে +করে +এ +না +ওই +এক্ +নিয়ে +করা +বলেন +সঙ্গে +যে +এব +তা +আর +কোনো +বলে +সেই +দিন +হয় +কি +দু +পরে +সব +দেওয়া +মধ্যে +এর +সি +শুরু +কাজ +কিছু +কাছে +সে +তবে +বা +বন +আগে +জ্নজন +পি +পর +তো +ছিল +এখন +আমরা +প্রায় +দুই +আমাদের +তাই +অন্য +গিয়ে +প্রযন্ত +মনে +নতুন +মতো +কেখা +প্রথম +আজ +টি +ধামার +অনেক +বিভিন্ন +র +হাজার +জানা +নয় +অবশ্য +বেশি +এস +করে +কে +হতে +বি +কয়েক +সহ +বেশ +এমন +এমনি +কেন +কেউ +নেওয়া +চেষ্টা +লক্ষ +বলা +কারণ +আছে +শুধু +তখন +যা +এসে +চার +ছিল +যদি +আবার +কোটি +উত্তর +সামনে +উপর +বক্তব্য +এত +প্রাথমিক +উপরে +আছে +প্রতি +কাজে +যখন +খুব +বহু +গেল +পেয়্র্ +চালু +ই +নাগাদ +থাকা +পাচ +যাওয়া +রকম +সাধারণ +কমনে \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliAnalyzer.java new file mode 100644 index 00000000000..e04f209746e --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliAnalyzer.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; + +/** + * Tests the BengaliAnalyzer + */ +public class TestBengaliAnalyzer extends BaseTokenStreamTestCase { + + public void testResourcesAvailable() { + new BengaliAnalyzer().close(); + } + + public void testBasics() throws Exception { + Analyzer a = new BengaliAnalyzer(); + + checkOneTerm(a, "বাড়ী", "বার"); + checkOneTerm(a, "বারী", "বার"); + a.close(); + } + /** + * test Digits + */ + public void testDigits() throws Exception { + BengaliAnalyzer a = new BengaliAnalyzer(); + checkOneTerm(a, "১২৩৪৫৬৭৮৯০", "1234567890"); + a.close(); + } + + /** blast some random strings through the analyzer */ + public void testRandomStrings() throws Exception { + Analyzer analyzer = new BengaliAnalyzer(); + checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER); + analyzer.close(); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliFilters.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliFilters.java new file mode 100644 index 00000000000..3ed1a07e14f --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliFilters.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import java.io.Reader; +import java.io.StringReader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase; + +/** + * Test Bengali Filter Factory + */ +public class TestBengaliFilters extends BaseTokenStreamFactoryTestCase { + /** + * Test IndicNormalizationFilterFactory + */ + public void testIndicNormalizer() throws Exception { + Reader reader = new StringReader("ত্ আমি"); + TokenStream stream = whitespaceMockTokenizer(reader); + stream = tokenFilterFactory("IndicNormalization").create(stream); + assertTokenStreamContents(stream, new String[] { "ৎ", "আমি" }); + } + + /** + * Test BengaliNormalizationFilterFactory + */ + public void testBengaliNormalizer() throws Exception { + Reader reader = new StringReader("বাড়ী"); + TokenStream stream = whitespaceMockTokenizer(reader); + stream = tokenFilterFactory("IndicNormalization").create(stream); + stream = tokenFilterFactory("BengaliNormalization").create(stream); + assertTokenStreamContents(stream, new String[] {"বারি"}); + } + + /** + * Test BengaliStemFilterFactory + */ + public void testStemmer() throws Exception { + Reader reader = new StringReader("বাড়ী"); + TokenStream stream = whitespaceMockTokenizer(reader); + stream = tokenFilterFactory("IndicNormalization").create(stream); + stream = tokenFilterFactory("BengaliNormalization").create(stream); + stream = tokenFilterFactory("BengaliStem").create(stream); + assertTokenStreamContents(stream, new String[] {"বার"}); + } + + /** Test that bogus arguments result in exception */ + public void testBogusArguments() throws Exception { + IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { + tokenFilterFactory("IndicNormalization", "bogusArg", "bogusValue"); + }); + assertTrue(expected.getMessage().contains("Unknown parameters")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + tokenFilterFactory("BengaliNormalization", "bogusArg", "bogusValue"); + }); + assertTrue(expected.getMessage().contains("Unknown parameters")); + + expected = expectThrows(IllegalArgumentException.class, () -> { + tokenFilterFactory("BengaliStem", "bogusArg", "bogusValue"); + }); + assertTrue(expected.getMessage().contains("Unknown parameters")); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliNormalizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliNormalizer.java new file mode 100644 index 00000000000..b8073c9dda4 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliNormalizer.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.util.TestUtil; + +import java.io.IOException; + +/** + * Test BengaliNormalizer + */ +public class TestBengaliNormalizer extends BaseTokenStreamTestCase { + /** + * Test some basic normalization, with an example from the paper. + */ + public void testChndrobindu() throws IOException { + check("চাঁদ", "চাদ"); + } + + public void testRosshoIKar() throws IOException { + check("বাড়ী", "বারি"); + check("তীর", "তির"); + } + + public void testRosshoUKar() throws IOException { + check("ভূল", "ভুল"); + check("অনূপ", "অনুপ"); + } + + public void testNga() throws IOException { + check("বাঙলা", "বাংলা"); + } + + public void testJaPhaala() throws IOException { + check("ব্যাক্তি", "বেক্তি"); + check( "সন্ধ্যা", "সন্ধা"); + } + + public void testBaPhalaa() throws IOException { + check("স্বদেশ", "সদেস"); + check("তত্ত্ব", "তত্ত"); + check("বিশ্ব", "বিসস"); + } + + public void testVisarga() throws IOException { + check("দুঃখ", "দুখখ"); + check("উঃ", "উহ"); + check("পুনঃ", "পুন"); + } + + public void testBasics() throws IOException { + check("কণা", "কনা"); + check("শরীর", "সরির"); + check("বাড়ি", "বারি"); + } + + /** creates random strings in the bengali block and ensures the normalizer doesn't trip up on them */ + public void testRandom() throws IOException { + BengaliNormalizer normalizer = new BengaliNormalizer(); + for (int i = 0; i < 100000; i++) { + String randomBengali = TestUtil.randomSimpleStringRange(random(), '\u0980', '\u09FF', 7); + try { + int newLen = normalizer.normalize(randomBengali.toCharArray(), randomBengali.length()); + assertTrue(newLen >= 0); // should not return negative length + assertTrue(newLen <= randomBengali.length()); // should not increase length of string + } catch (Exception e) { + System.err.println("normalizer failed on input: '" + randomBengali + "' (" + escape(randomBengali) + ")"); + throw e; + } + } + } + + private void check(String input, String output) throws IOException { + Tokenizer tokenizer = whitespaceMockTokenizer(input); + TokenFilter tf = new BengaliNormalizationFilter(tokenizer); + assertTokenStreamContents(tf, new String[] { output }); + } + + public void testEmptyTerm() throws IOException { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new TokenStreamComponents(tokenizer, new BengaliNormalizationFilter(tokenizer)); + } + }; + checkOneTerm(a, "", ""); + a.close(); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliStemmer.java new file mode 100644 index 00000000000..4f7617236f3 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bn/TestBengaliStemmer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.bn; + + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; + +import java.io.IOException; + +/** + * Test Codes for BengaliStemmer + */ +public class TestBengaliStemmer extends BaseTokenStreamTestCase { + + /** + * Testing few verbal words + */ + public void testVerbsInShadhuForm() throws IOException { + check("করেছিলাম", "কর"); + check("করিতেছিলে", "কর"); + check("খাইতাম", "খাই"); + check("যাইবে", "যা"); + } + + public void testVerbsInCholitoForm() throws IOException { + check("করছিলাম", "কর"); + check("করছিলে", "কর"); + check("করতাম", "কর"); + check("যাব", "যা"); + check("যাবে", "যা"); + check("করি", "কর"); + check("করো", "কর"); + } + + public void testNouns() throws IOException { + check("মেয়েরা", "মে"); + check("মেয়েদেরকে", "মে"); + check("মেয়েদের", "মে"); + + check("একটি", "এক"); + check("মানুষগুলি", "মানুষ"); + } + + private void check(String input, String output) throws IOException { + Tokenizer tokenizer = whitespaceMockTokenizer(input); + TokenFilter tf = new BengaliStemFilter(tokenizer); + assertTokenStreamContents(tf, new String[] { output }); + } + + public void testEmptyTerm() throws IOException { + Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new TokenStreamComponents(tokenizer, new BengaliStemFilter(tokenizer)); + } + }; + checkOneTerm(a, "", ""); + a.close(); + } +} diff --git a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java index 706550a0f71..07b1c88c04b 100644 --- a/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/java/org/apache/lucene/analysis/icu/ICUNormalizer2CharFilter.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.io.Reader; import java.util.Objects; +import org.apache.lucene.analysis.CharacterUtils; import org.apache.lucene.analysis.charfilter.BaseCharFilter; import com.ibm.icu.text.Normalizer2; @@ -61,7 +62,7 @@ public final class ICUNormalizer2CharFilter extends BaseCharFilter { ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) { super(in); this.normalizer = Objects.requireNonNull(normalizer); - this.tmpBuffer = new char[bufferSize]; + this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize); } @Override @@ -94,23 +95,31 @@ public final class ICUNormalizer2CharFilter extends BaseCharFilter { return -1; } - private final char[] tmpBuffer; + private final CharacterUtils.CharacterBuffer tmpBuffer; - private int readInputToBuffer() throws IOException { - final int len = input.read(tmpBuffer); - if (len == -1) { - inputFinished = true; - return 0; + private void readInputToBuffer() throws IOException { + while (true) { + // CharacterUtils.fill is supplementary char aware + final boolean hasRemainingChars = CharacterUtils.fill(tmpBuffer, input); + + assert tmpBuffer.getOffset() == 0; + inputBuffer.append(tmpBuffer.getBuffer(), 0, tmpBuffer.getLength()); + + if (hasRemainingChars == false) { + inputFinished = true; + break; + } + + final int lastCodePoint = Character.codePointBefore(tmpBuffer.getBuffer(), tmpBuffer.getLength(), 0); + if (normalizer.isInert(lastCodePoint)) { + // we require an inert char so that we can normalize content before and + // after this character independently + break; + } } - inputBuffer.append(tmpBuffer, 0, len); // if checkedInputBoundary was at the end of a buffer, we need to check that char again checkedInputBoundary = Math.max(checkedInputBoundary - 1, 0); - // this loop depends on 'isInert' (changes under normalization) but looks only at characters. - // so we treat all surrogates as non-inert for simplicity - if (normalizer.isInert(tmpBuffer[len - 1]) && !Character.isSurrogate(tmpBuffer[len-1])) { - return len; - } else return len + readInputToBuffer(); } private int readAndNormalizeFromInput() { diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java index 438a93179c2..822466f0192 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/TestICUNormalizer2CharFilter.java @@ -20,12 +20,14 @@ package org.apache.lucene.analysis.icu; import java.io.IOException; import java.io.Reader; import java.io.StringReader; +import java.util.Arrays; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenizer; import org.apache.lucene.util.TestUtil; @@ -418,4 +420,23 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase { } a.close(); } + + // https://issues.apache.org/jira/browse/LUCENE-7956 + public void testVeryLargeInputOfNonInertChars() throws Exception { + char[] text = new char[1000000]; + Arrays.fill(text, 'a'); + try (Analyzer a = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new KeywordTokenizer()); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + return new ICUNormalizer2CharFilter(reader, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE)); + } + }) { + checkAnalysisConsistency(random(), a, false, new String(text)); + } + } } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java index 21ad7d134a4..6bc8573c094 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/document/SimpleNaiveBayesDocumentClassifier.java @@ -113,24 +113,26 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi Map{!bool should=title:lucene should=title:solr must_not=id:1}
+ */
+public class BoolQParserPlugin extends QParserPlugin {
+ public static final String NAME = "bool";
+
+ @Override
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ return new QParser(qstr, localParams, params, req) {
+ @Override
+ public Query parse() throws SyntaxError {
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ SolrParams solrParams = SolrParams.wrapDefaults(localParams, params);
+ addQueries(builder, solrParams.getParams("must"), BooleanClause.Occur.MUST);
+ addQueries(builder, solrParams.getParams("must_not"), BooleanClause.Occur.MUST_NOT);
+ addQueries(builder, solrParams.getParams("filter"), BooleanClause.Occur.FILTER);
+ addQueries(builder, solrParams.getParams("should"), BooleanClause.Occur.SHOULD);
+ return builder.build();
+ }
+
+ private void addQueries(BooleanQuery.Builder builder, String[] subQueries, BooleanClause.Occur occur) throws SyntaxError {
+ if (subQueries != null) {
+ for (String subQuery : subQueries) {
+ builder.add(subQuery(subQuery, null).parse(), occur);
+ }
+ }
+ }
+ };
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/FunctionQParser.java b/solr/core/src/java/org/apache/solr/search/FunctionQParser.java
index 7e6a706403d..d3a311d936a 100644
--- a/solr/core/src/java/org/apache/solr/search/FunctionQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/FunctionQParser.java
@@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.facet.AggValueSource;
+import org.apache.solr.search.function.FieldNameValueSource;
import java.util.ArrayList;
import java.util.List;
@@ -33,6 +34,7 @@ public class FunctionQParser extends QParser {
public static final int FLAG_CONSUME_DELIMITER = 0x01; // consume delimiter after parsing arg
public static final int FLAG_IS_AGG = 0x02;
+ public static final int FLAG_USE_FIELDNAME_SOURCE = 0x04; // When a field name is encountered, use the placeholder FieldNameValueSource instead of resolving to a real ValueSource
public static final int FLAG_DEFAULT = FLAG_CONSUME_DELIMITER;
/** @lucene.internal */
@@ -374,8 +376,13 @@ public class FunctionQParser extends QParser {
} else if ("false".equals(id)) {
valueSource = new BoolConstValueSource(false);
} else {
- SchemaField f = req.getSchema().getField(id);
- valueSource = f.getType().getValueSource(f, this);
+ if ((flags & FLAG_USE_FIELDNAME_SOURCE) != 0) {
+ // Don't try to create a ValueSource for the field, just use a placeholder.
+ valueSource = new FieldNameValueSource(id);
+ } else {
+ SchemaField f = req.getSchema().getField(id);
+ valueSource = f.getType().getValueSource(f, this);
+ }
}
}
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index 2ee63cf65ea..893783d8e3a 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -81,6 +81,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
map.put(SignificantTermsQParserPlugin.NAME, SignificantTermsQParserPlugin.class);
map.put(PayloadScoreQParserPlugin.NAME, PayloadScoreQParserPlugin.class);
map.put(PayloadCheckQParserPlugin.NAME, PayloadCheckQParserPlugin.class);
+ map.put(BoolQParserPlugin.NAME, BoolQParserPlugin.class);
standardPlugins = Collections.unmodifiableMap(map);
}
diff --git a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
index 7d6d162ce1a..51048d2aa6b 100644
--- a/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
+++ b/solr/core/src/java/org/apache/solr/search/ValueSourceParser.java
@@ -1017,14 +1017,14 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
addParser("agg_min", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new MinMaxAgg("min", fp.parseValueSource());
+ return new MinMaxAgg("min", fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
addParser("agg_max", new ValueSourceParser() {
@Override
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
- return new MinMaxAgg("max", fp.parseValueSource());
+ return new MinMaxAgg("max", fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
}
});
diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
index a6d6b9744b5..008d0fd4445 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java
@@ -18,6 +18,7 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.Arrays;
+import java.util.Date;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
@@ -25,9 +26,12 @@ import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
+import org.apache.solr.common.SolrException;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrFieldSource;
+import org.apache.solr.search.function.FieldNameValueSource;
public class MinMaxAgg extends SimpleAggValueSource {
final int minmax; // a multiplier to reverse the normal order of compare if this is max instead of min (i.e. max will be -1)
@@ -41,28 +45,46 @@ public class MinMaxAgg extends SimpleAggValueSource {
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
ValueSource vs = getArg();
- if (vs instanceof StrFieldSource) {
- String field = ((StrFieldSource) vs).getField();
- SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
+ SchemaField sf = null;
+
+ if (vs instanceof FieldNameValueSource) {
+ String field = ((FieldNameValueSource)vs).getFieldName();
+ sf = fcontext.qcontext.searcher().getSchema().getField(field);
+
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
- if (sf.hasDocValues()) {
- // dv
- } else {
- // uif
- }
+ vs = null;
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field);
} else {
- return new SingleValuedOrdAcc(fcontext, sf, numSlots);
+ vs = sf.getType().getValueSource(sf, null);
+ }
+ }
+
+ if (vs instanceof StrFieldSource) {
+ return new SingleValuedOrdAcc(fcontext, sf, numSlots);
+ }
+
+ // Since functions don't currently have types, we rely on the type of the field
+ if (sf != null && sf.getType().getNumberType() != null) {
+ switch (sf.getType().getNumberType()) {
+ case FLOAT:
+ case DOUBLE:
+ return new DFuncAcc(vs, fcontext, numSlots);
+ case INTEGER:
+ case LONG:
+ return new LFuncAcc(vs, fcontext, numSlots);
+ case DATE:
+ return new DateFuncAcc(vs, fcontext, numSlots);
}
}
// numeric functions
- return new ValSlotAcc(vs, fcontext, numSlots);
+ return new DFuncAcc(vs, fcontext, numSlots);
}
@Override
public FacetMerger createFacetMerger(Object prototype) {
- if (prototype instanceof Number)
- return new NumericMerger();
+ if (prototype instanceof Double)
+ return new NumericMerger(); // still use NumericMerger to handle NaN?
else if (prototype instanceof Comparable) {
return new ComparableMerger();
} else {
@@ -114,8 +136,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
}
}
- class ValSlotAcc extends DoubleFuncSlotAcc {
- public ValSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
+ class DFuncAcc extends DoubleFuncSlotAcc {
+ public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
super(values, fcontext, numSlots, Double.NaN);
}
@@ -129,6 +151,101 @@ public class MinMaxAgg extends SimpleAggValueSource {
result[slotNum] = val;
}
}
+
+ @Override
+ public Object getValue(int slot) {
+ double val = result[slot];
+ if (Double.isNaN(val)) {
+ return null;
+ } else {
+ return val;
+ }
+ }
+ }
+
+ class LFuncAcc extends LongFuncSlotAcc {
+ FixedBitSet exists;
+ public LFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
+ super(values, fcontext, numSlots, 0);
+ exists = new FixedBitSet(numSlots);
+ }
+
+ @Override
+ public void collect(int doc, int slotNum) throws IOException {
+ long val = values.longVal(doc);
+ if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query
+
+ long currVal = result[slotNum];
+ if (currVal == 0 && !exists.get(slotNum)) {
+ exists.set(slotNum);
+ result[slotNum] = val;
+ } else if (Long.compare(val, currVal) * minmax < 0) {
+ result[slotNum] = val;
+ }
+ }
+
+ @Override
+ public Object getValue(int slot) {
+ long val = result[slot];
+ if (val == 0 && !exists.get(slot)) {
+ return null;
+ } else {
+ return val;
+ }
+ }
+
+ @Override
+ public void resize(Resizer resizer) {
+ super.resize(resizer);
+ exists = resizer.resize(exists);
+ }
+
+ @Override
+ public int compare(int slotA, int slotB) {
+ long a = result[slotA];
+ long b = result[slotB];
+ boolean ea = a != 0 || exists.get(slotA);
+ boolean eb = b != 0 || exists.get(slotB);
+
+ if (ea != eb) {
+ if (ea) return 1; // a exists and b doesn't TODO: we need context to be able to sort missing last! SOLR-10618
+ if (eb) return -1; // b exists and a is missing
+ }
+
+ return Long.compare(a, b);
+ }
+
+ @Override
+ public void reset() {
+ super.reset();
+ exists.clear(0, exists.length());
+ }
+
+ }
+
+ class DateFuncAcc extends LongFuncSlotAcc {
+ private static final long MISSING = Long.MIN_VALUE;
+ public DateFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
+ super(values, fcontext, numSlots, MISSING);
+ }
+
+ @Override
+ public void collect(int doc, int slotNum) throws IOException {
+ long val = values.longVal(doc);
+ if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query
+
+ long currVal = result[slotNum];
+ if (Long.compare(val, currVal) * minmax < 0 || currVal == MISSING) {
+ result[slotNum] = val;
+ }
+ }
+
+ // let compare be the default for now (since we can't yet correctly handle sortMissingLast
+
+ @Override
+ public Object getValue(int slot) {
+ return result[slot] == MISSING ? null : new Date(result[slot]);
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
index 1240051be6e..578ef1796a9 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/SlotAcc.java
@@ -16,14 +16,6 @@
*/
package org.apache.solr.search.facet;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.queries.function.FunctionValues;
-import org.apache.lucene.queries.function.ValueSource;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.search.DocIterator;
-import org.apache.solr.search.DocSet;
-import org.apache.solr.search.SolrIndexSearcher;
-
import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Array;
@@ -32,6 +24,16 @@ import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.search.DocIterator;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.SolrIndexSearcher;
+
/**
* Accumulates statistics separated by a slot number.
* There is a separate statistic per slot. The slot is usually an ordinal into a set of values, e.g. tracking a count
@@ -140,6 +142,38 @@ public abstract class SlotAcc implements Closeable {
return values;
}
+ public long[] resize(long[] old, long defaultValue) {
+ long[] values = new long[getNewSize()];
+ if (defaultValue != 0) {
+ Arrays.fill(values, 0, values.length, defaultValue);
+ }
+ for (int i = 0; i < old.length; i++) {
+ long val = old[i];
+ if (val != defaultValue) {
+ int newSlot = getNewSlot(i);
+ if (newSlot >= 0) {
+ values[newSlot] = val;
+ }
+ }
+ }
+ return values;
+ }
+
+ public FixedBitSet resize(FixedBitSet old) {
+ FixedBitSet values = new FixedBitSet(getNewSize());
+ int oldSize = old.length();
+
+ for(int oldSlot = 0;;) {
+ oldSlot = values.nextSetBit(oldSlot);
+ if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break;
+ int newSlot = getNewSlot(oldSlot);
+ values.set(newSlot);
+ if (++oldSlot >= oldSize) break;
+ }
+
+ return values;
+ }
+
public For example, with the configuration listed below any documents
* containing String values (such as "abcdef
" or
* "xyz
") in a field declared in the schema using
- * TrieIntField
or TrieLongField
+ * IntPointField
or LongPointField
* would have those Strings replaced with the length of those fields as an
* Integer
* (ie: 6
and 3
respectively)
@@ -43,8 +43,8 @@ import static org.apache.solr.update.processor.FieldValueMutatingUpdateProcessor
*
* <processor class="solr.FieldLengthUpdateProcessorFactory"> * <arr name="typeClass"> - * <str>solr.TrieIntField</str> - * <str>solr.TrieLongField</str> + * <str>solr.IntPointField</str> + * <str>solr.LongPointField</str> * </arr> * </processor>*/ diff --git a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java index c9034f86da2..a8cb87dfd42 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/FieldMutatingUpdateProcessorFactory.java @@ -79,7 +79,7 @@ import static org.apache.solr.update.processor.FieldMutatingUpdateProcessor.SELE * In the ExampleFieldMutatingUpdateProcessorFactory configured below, * fields will be mutated if the name starts with "foo" or "bar"; * unless the field name contains the substring "SKIP" or - * the fieldType is (or subclasses) TrieDateField. Meaning a field named + * the fieldType is (or subclasses) DatePointField. Meaning a field named * "foo_SKIP" is guaranteed not to be selected, but a field named "bar_smith" * that uses StrField will be selected. * @@ -92,7 +92,7 @@ import static org.apache.solr.update.processor.FieldMutatingUpdateProcessor.SELE * <str name="fieldRegex">.*SKIP.*</str> * </lst> * <lst name="exclude"> - * <str name="typeClass">solr.TrieDateField</str> + * <str name="typeClass">solr.DatePointField</str> * </lst> * </processor> * diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java index 9d0311c4a6b..5958f3a3b70 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.java @@ -47,8 +47,8 @@ import org.slf4j.LoggerFactory; * *
* The default selection behavior is to mutate both those fields that don't match - * a schema field, as well as those fields that match a schema field with a field - * type that uses class solr.TrieDateField. + * a schema field, as well as those fields that match a schema field with a date + * field type. *
** If all values are parseable as dates (or are already Date), then the field will diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java index c2d2e8ec476..93badadf8f2 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseDoubleFieldUpdateProcessorFactory.java @@ -38,8 +38,8 @@ import java.util.Locale; *
** The default selection behavior is to mutate both those fields that don't match - * a schema field, as well as those fields that match a schema field with a field - * type that uses class solr.TrieDoubleField. + * a schema field, as well as those fields that match a schema field with a double + * field type. *
** If all values are parseable as double (or are already Double), then the field diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java index 778e7775cd6..311b4aec8b7 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseFloatFieldUpdateProcessorFactory.java @@ -38,8 +38,8 @@ import java.util.Locale; *
** The default selection behavior is to mutate both those fields that don't match - * a schema field, as well as those fields that match a schema field with a field - * type that uses class solr.TrieFloatField. + * a schema field, as well as those fields that match a schema field with a float + * field type. *
** If all values are parseable as float (or are already Float), then the field diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java index eebc7ff54d7..fa7e1caf59d 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseIntFieldUpdateProcessorFactory.java @@ -35,8 +35,8 @@ import java.util.Locale; *
** The default selection behavior is to mutate both those fields that don't match - * a schema field, as well as those fields that match a schema field with a field - * type that uses class solr.TrieIntField. + * a schema field, as well as those fields that match a schema field with an int + * field type. *
** If all values are parseable as int (or are already Integer), then the field diff --git a/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java index bc7d1da142b..78863c1b100 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/ParseLongFieldUpdateProcessorFactory.java @@ -35,8 +35,8 @@ import java.util.Locale; *
** The default selection behavior is to mutate both those fields that don't match - * a schema field, as well as those fields that match a schema field with a field - * type that uses class solr.TrieLongField. + * a schema field, as well as those fields that match a schema field with a long + * field type. *
*
* If all values are parseable as long (or are already Long), then the field
diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
index 52e2e781819..84280ee3a82 100644
--- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
+++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
@@ -869,7 +869,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
// testing everything from query level is hard because
// time marches on ... and there is no easy way to reach into the
- // bowels of TrieDateField and muck with the definition of "now"
+ // bowels of DatePointField and muck with the definition of "now"
// ...
// BUT: we can test that crazy combinations of "NOW" all work correctly,
// assuming the test doesn't take too long to run...
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedMissingSort.java b/solr/core/src/test/org/apache/solr/TestDistributedMissingSort.java
index 378ad0dcfc5..416556a1f95 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedMissingSort.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedMissingSort.java
@@ -30,10 +30,10 @@ public class TestDistributedMissingSort extends BaseDistributedSearchTestCase {
schemaString = "schema-distributed-missing-sort.xml";
}
- String sint1_ml = "one_i1_ml"; // TrieIntField, sortMissingLast=true, multiValued=false
- String sint1_mf = "two_i1_mf"; // TrieIntField, sortMissingFirst=true, multiValued=false
- String long1_ml = "three_l1_ml"; // TrieLongField, sortMissingLast=true, multiValued=false
- String long1_mf = "four_l1_mf"; // TrieLongField, sortMissingFirst=true, multiValued=false
+ String sint1_ml = "one_i1_ml"; // int field, sortMissingLast=true, multiValued=false
+ String sint1_mf = "two_i1_mf"; // int field, sortMissingFirst=true, multiValued=false
+ String long1_ml = "three_l1_ml"; // long field, sortMissingLast=true, multiValued=false
+ String long1_mf = "four_l1_mf"; // long field, sortMissingFirst=true, multiValued=false
String string1_ml = "five_s1_ml"; // StringField, sortMissingLast=true, multiValued=false
String string1_mf = "six_s1_mf"; // StringField, sortMissingFirst=true, multiValued=false
diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java
index 6959bd825d5..a5b37d8a5d0 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java
@@ -239,6 +239,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
}
}
+ @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11278")
public void testBootstrapWithContinousIndexingOnSourceCluster() throws Exception {
// start the target first so that we know its zkhost
MiniSolrCloudCluster target = new MiniSolrCloudCluster(1, createTempDir("cdcr-target"), buildJettyConfig("/solr"));
diff --git a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
index 4c50480af0d..6a5697a93f4 100644
--- a/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
+++ b/solr/core/src/test/org/apache/solr/core/TestLazyCores.java
@@ -781,7 +781,6 @@ public class TestLazyCores extends SolrTestCaseJ4 {
}
}
- @BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-10101")
// Insure that when a core is aged out of the transient cache, any uncommitted docs are preserved.
// Note, this needs FS-based indexes to persist!
// Cores 2, 3, 6, 7, 8, 9 are transient
@@ -814,7 +813,8 @@ public class TestLazyCores extends SolrTestCaseJ4 {
openCores.clear();
// We still should have 6, 7, 8, 9 loaded, their reference counts have NOT dropped to zero
- checkInCores(cc, "collection6", "collection7", "collection8", "collection9");
+ checkInCores(cc, "collection1", "collection5",
+ "collection6", "collection7", "collection8", "collection9");
for (String coreName : coreList) {
// The point of this test is to insure that when cores are aged out and re-opened
diff --git a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
index 1999965f339..70ebd0aa788 100644
--- a/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
+++ b/solr/core/src/test/org/apache/solr/handler/TestSQLHandler.java
@@ -37,7 +37,6 @@ import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
-
import org.apache.solr.common.util.NamedList;
import org.junit.After;
import org.junit.Before;
diff --git a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java
index dab47ca4b0e..19722be77ea 100644
--- a/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java
+++ b/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java
@@ -230,15 +230,15 @@ public class CursorMarkTest extends SolrTestCaseJ4 {
random().nextBytes(randBytes);
val = new BytesRef(randBytes);
} else if (fieldName.contains("int")) {
- val = random().nextInt(); // TrieIntField
+ val = random().nextInt();
} else if (fieldName.contains("long")) {
- val = random().nextLong(); // TrieLongField
+ val = random().nextLong();
} else if (fieldName.contains("float")) {
- val = random().nextFloat() * random().nextInt(); // TrieFloatField
+ val = random().nextFloat() * random().nextInt();
} else if (fieldName.contains("double")) {
- val = random().nextDouble() * random().nextInt(); // TrieDoubleField
+ val = random().nextDouble() * random().nextInt();
} else if (fieldName.contains("date")) {
- val = random().nextLong(); // TrieDateField
+ val = random().nextLong();
} else if (fieldName.startsWith("currency")) {
val = random().nextDouble();
} else if (fieldName.startsWith("uuid")) {
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 6e747001cd5..eab254fb3f4 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -1190,6 +1190,27 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
}
}
+ public void testBoolQuery() throws Exception {
+ assertQueryEquals("bool",
+ "{!bool must='{!lucene}foo_s:a' must='{!lucene}foo_s:b'}",
+ "{!bool must='{!lucene}foo_s:b' must='{!lucene}foo_s:a'}");
+ assertQueryEquals("bool",
+ "{!bool must_not='{!lucene}foo_s:a' should='{!lucene}foo_s:b' " +
+ "must='{!lucene}foo_s:c' filter='{!lucene}foo_s:d' filter='{!lucene}foo_s:e'}",
+ "{!bool must='{!lucene}foo_s:c' filter='{!lucene}foo_s:d' " +
+ "must_not='{!lucene}foo_s:a' should='{!lucene}foo_s:b' filter='{!lucene}foo_s:e'}");
+ try {
+ assertQueryEquals
+ ("bool"
+ , "{!bool must='{!lucene}foo_s:a'}"
+ , "{!bool should='{!lucene}foo_s:a'}"
+ );
+ fail("queries should not have been equal");
+ } catch(AssertionFailedError e) {
+ assertTrue("queries were not equal, as expected", true);
+ }
+ }
+
// Override req to add df param
public static SolrQueryRequest req(String... q) {
return SolrTestCaseJ4.req(q, "df", "text");
diff --git a/solr/core/src/test/org/apache/solr/search/TestSmileRequest.java b/solr/core/src/test/org/apache/solr/search/TestSmileRequest.java
index c6d72ee3497..0bf46e756d3 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSmileRequest.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSmileRequest.java
@@ -81,7 +81,7 @@ public class TestSmileRequest extends SolrTestCaseJ4 {
}
};
client.queryDefaults().set("shards", servers.getShards());
- TestJsonRequest.doJsonRequest(client);
+ TestJsonRequest.doJsonRequest(client, true);
}
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
index 65c4d8f28aa..1db6b5a7c22 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java
@@ -278,7 +278,7 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
q = qParser.getQuery();
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
- // large numeric filter query should use TermsQuery (for trie fields)
+ // large numeric filter query should use TermsQuery
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
diff --git a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
index 3ee069f6a75..33d7fa89c78 100644
--- a/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
+++ b/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
@@ -467,28 +467,29 @@ public class TestJsonFacets extends SolrTestCaseHS {
// single valued strings
- doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") );
+ doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "num_l","long_l", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") );
// multi-valued strings, long/float substitute for int/double
- doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
+ doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_l","long_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
// multi-valued strings, method=dv for terms facets
- doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
+ doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "num_l","long_l","super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
// single valued docvalues for strings, and single valued numeric doc values for numeric fields
- doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
+ doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_l","long_ld", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
// multi-valued docvalues
FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage
- doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
+ doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
// multi-valued docvalues
FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = true;
- doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
+ doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
}
public static void doStatsTemplated(Client client, ModifiableSolrParams p) throws Exception {
p.set("Z_num_i", "Z_" + p.get("num_i") );
+ p.set("Z_num_l", "Z_" + p.get("num_l") );
p.set("sparse_num_d", "sparse_" + p.get("num_d") );
if (p.get("num_is") == null) p.add("num_is","num_is");
if (p.get("num_fs") == null) p.add("num_fs","num_fs");
@@ -528,6 +529,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
String num_is = m.expand("${num_is}");
String num_fs = m.expand("${num_fs}");
String Z_num_i = m.expand("${Z_num_i}");
+ String Z_num_l = m.expand("${Z_num_l}");
String val_b = m.expand("${val_b}");
String date = m.expand("${date}");
String super_s = m.expand("${super_s}");
@@ -553,13 +555,13 @@ public class TestJsonFacets extends SolrTestCaseHS {
iclient.add(doc, null);
iclient.add(doc, null);
iclient.add(doc, null); // a couple of deleted docs
- iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0"), null);
+ iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0", Z_num_l,"0"), null);
iclient.add(sdoc("id", "3"), null);
iclient.commit();
- iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE), null);
+ iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE, Z_num_l,Long.MIN_VALUE), null);
iclient.add(sdoc("id", "5", cat_s, "B", where_s, "NJ", num_d, "11", num_i, "7", num_is,"0", num_fs,"0", super_s,"batman" , date,"2001-02-03T01:02:03Z" ,sparse_s,"two", multi_ss, "a"), null);
iclient.commit();
- iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE), null);
+ iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE, Z_num_l,Long.MAX_VALUE), null);
iclient.commit();
client.commit();
@@ -685,6 +687,35 @@ public class TestJsonFacets extends SolrTestCaseHS {
", f2:{ 'buckets':[{ val:'B', count:3, n1:-2.0}, { val:'A', count:2, n1:6.0 }]} }"
);
+ // facet on numbers to test resize from hashing (may need to be sorting by the metric to test that)
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ " f1:{${terms} type:field, field:${num_is}, facet:{a:'min(${num_i})'}, sort:'a asc' }" +
+ ",f2:{${terms} type:field, field:${num_is}, facet:{a:'max(${num_i})'}, sort:'a desc' }" +
+ "}"
+ )
+ , "facets=={count:6 " +
+ ",f1:{ buckets:[{val:-1,count:2,a:-5},{val:3,count:2,a:-5},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:0,count:2,a:3} ] } " +
+ ",f2:{ buckets:[{val:0,count:2,a:7},{val:3,count:2,a:3},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:-1,count:2,a:-5} ] } " +
+ "}"
+ );
+
+
+ // Same thing for dates
+ // test min/max of string field
+ if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently...
+ client.testJQ(params(p, "q", "*:*"
+ , "json.facet", "{" +
+ " f3:{${terms} type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" +
+ ",f4:{${terms} type:field, field:${num_is}, facet:{a:'max(${date})'}, sort:'a asc' }" +
+ "}"
+ )
+ , "facets=={count:6 " +
+ ",f3:{ buckets:[{val:-1,count:2,a:'2002-02-02T02:02:02Z'},{val:3,count:2,a:'2002-02-02T02:02:02Z'},{val:0,count:2,a:'2001-02-03T01:02:03Z'},{val:-5,count:1,a:'2001-01-01T01:01:01Z'},{val:2,count:1,a:'2001-01-01T01:01:01Z'} ] } " +
+ ",f4:{ buckets:[{val:-5,count:1,a:'2001-01-01T01:01:01Z'},{val:2,count:1,a:'2001-01-01T01:01:01Z'},{val:-1,count:2,a:'2002-03-01T03:02:01Z'},{val:0,count:2,a:'2003-03-03T03:03:03Z'},{val:3,count:2,a:'2003-03-03T03:03:03Z'} ] } " +
+ "}"
+ );
+ }
// percentiles 0,10,50,90,100
@@ -983,16 +1014,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
// stats at top level
client.testJQ(params(p, "q", "*:*"
- , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', min1:'min(${num_d})', max1:'max(${num_d})'" +
+ , "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" +
", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" +
- ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})' }"
+ ", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})'" +
+ ", mini:'min(${num_i})', maxi:'max(${num_i})'" +
+ " }"
)
, "facets=={ 'count':6, " +
- "sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, min1:-9.0, max1:11.0" +
+ "sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, mind:-9.0, maxd:11.0" +
", numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5" +
", where_hll:2, hll_num_i:4, hll_num_d:5, hll_date:5" +
- ", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639}"
+ ", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639" +
+ ", mini:-5, maxi:7" +
+ "}"
);
// stats at top level, no matches
@@ -1019,8 +1054,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
",sum1:0.0," +
" sumsq1:0.0," +
" avg1:0.0," + // TODO: undesirable. omit?
- " min1:'NaN'," + // TODO: undesirable. omit?
- " max1:'NaN'," +
+ // " min1:'NaN'," +
+ // " max1:'NaN'," +
" numwhere:0," +
" unique_num_i:0," +
" unique_num_d:0," +
@@ -1307,16 +1342,26 @@ public class TestJsonFacets extends SolrTestCaseHS {
"}"
);
- // test 0, min/max int
+ // test 0, min/max int/long
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{" +
- " u : 'unique(${Z_num_i})'" +
+ " u : 'unique(${Z_num_i})'" +
+ ", u2 : 'unique(${Z_num_l})'" +
+ ", min1 : 'min(${Z_num_i})', max1 : 'max(${Z_num_i})'" +
+ ", min2 : 'min(${Z_num_l})', max2 : 'max(${Z_num_l})'" +
", f1:{${terms} type:field, field:${Z_num_i} }" +
+ ", f2:{${terms} type:field, field:${Z_num_l} }" +
"}"
)
, "facets=={count:6 " +
",u:3" +
+ ",u2:3" +
+ ",min1:" + Integer.MIN_VALUE +
+ ",max1:" + Integer.MAX_VALUE +
+ ",min2:" + Long.MIN_VALUE +
+ ",max2:" + Long.MAX_VALUE +
",f1:{ buckets:[{val:" + Integer.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Integer.MAX_VALUE+",count:1}]} " +
+ ",f2:{ buckets:[{val:" + Long.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Long.MAX_VALUE+",count:1}]} " +
"}"
);
@@ -1394,11 +1439,12 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test acc reuse (i.e. reset() method). This is normally used for stats that are not calculated in the first phase,
// currently non-sorting stats.
client.testJQ(params(p, "q", "*:*"
- , "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}"
+ , "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', mini:'min(${num_i})', maxi:'max(${num_i})'" +
+ ", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}"
)
, "facets=={ 'count':6, " +
- "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," +
- " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } "
+ "'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," +
+ " {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } "
);
diff --git a/solr/core/src/test/org/apache/solr/search/json/TestJsonRequest.java b/solr/core/src/test/org/apache/solr/search/json/TestJsonRequest.java
index 9c151c1d133..4f47f8a2652 100644
--- a/solr/core/src/test/org/apache/solr/search/json/TestJsonRequest.java
+++ b/solr/core/src/test/org/apache/solr/search/json/TestJsonRequest.java
@@ -53,7 +53,7 @@ public class TestJsonRequest extends SolrTestCaseHS {
@Test
public void testLocalJsonRequest() throws Exception {
- doJsonRequest(Client.localClient);
+ doJsonRequest(Client.localClient, false);
}
@Test
@@ -62,11 +62,10 @@ public class TestJsonRequest extends SolrTestCaseHS {
initServers();
Client client = servers.getClient( random().nextInt() );
client.queryDefaults().set( "shards", servers.getShards() );
- doJsonRequest(client);
+ doJsonRequest(client, true);
}
-
- public static void doJsonRequest(Client client) throws Exception {
+ public static void doJsonRequest(Client client, boolean isDistrib) throws Exception {
client.deleteByQuery("*:*", null);
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY"), null);
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ"), null);
@@ -217,6 +216,178 @@ public class TestJsonRequest extends SolrTestCaseHS {
, "debug/json=={query:'cat_s:A', filter:'where_s:NY'}"
);
+ // test query dsl
+ client.testJQ( params("json", "{'query':'{!lucene}id:1'}")
+ , "response/numFound==1"
+ );
+
+ client.testJQ( params("json", "{" +
+ " 'query': {" +
+ " 'bool' : {" +
+ " 'should' : [" +
+ " {'lucene' : {'query' : 'id:1'}}," +
+ " 'id:2'" +
+ " ]" +
+ " }" +
+ " }" +
+ "}")
+ , "response/numFound==2"
+ );
+
+ client.testJQ( params("json", "{" +
+ " 'query': {" +
+ " 'bool' : {" +
+ " 'should' : [" +
+ " 'id:1'," +
+ " 'id:2'" +
+ " ]" +
+ " }" +
+ " }" +
+ "}")
+ , "response/numFound==2"
+ );
+
+ client.testJQ( params("json", "{ " +
+ " query : {" +
+ " boost : {" +
+ " query : {" +
+ " lucene : { " +
+ " df : cat_s, " +
+ " query : A " +
+ " }" +
+ " }, " +
+ " b : 1.5 " +
+ " } " +
+ " } " +
+ "}")
+ , "response/numFound==2"
+ );
+
+ client.testJQ( params("json","{ " +
+ " query : {" +
+ " bool : {" +
+ " must : {" +
+ " lucene : {" +
+ " q.op : AND," +
+ " df : cat_s," +
+ " query : A" +
+ " }" +
+ " }" +
+ " must_not : {lucene : {query:'id: 1'}}" +
+ " }" +
+ " }" +
+ "}")
+ , "response/numFound==1"
+ );
+
+ client.testJQ( params("json","{ " +
+ " query : {" +
+ " bool : {" +
+ " must : {" +
+ " lucene : {" +
+ " q.op : AND," +
+ " df : cat_s," +
+ " query : A" +
+ " }" +
+ " }" +
+ " must_not : [{lucene : {query:'id: 1'}}]" +
+ " }" +
+ " }" +
+ "}")
+ , "response/numFound==1"
+ );
+
+ client.testJQ( params("json","{ " +
+ " query : {" +
+ " bool : {" +
+ " must : '{!lucene q.op=AND df=cat_s}A'" +
+ " must_not : '{!lucene v=\\'id:1\\'}'" +
+ " }" +
+ " }" +
+ "}")
+ , "response/numFound==1"
+ );
+
+
+ client.testJQ( params("json","{" +
+ " query : '*:*'," +
+ " filter : {" +
+ " collapse : {" +
+ " field : cat_s" +
+ " } " +
+ " } " +
+ "}")
+ , isDistrib ? "" : "response/numFound==2"
+ );
+
+ client.testJQ( params("json","{" +
+ " query : {" +
+ " edismax : {" +
+ " query : 'A'," +
+ " qf : 'cat_s'," +
+ " bq : {" +
+ " edismax : {" +
+ " query : 'NJ'" +
+ " qf : 'where_s'" +
+ " }" +
+ " }" +
+ " }" +
+ " }, " +
+ " fields : id" +
+ "}")
+ , "response/numFound==2", isDistrib? "" : "response/docs==[{id:'4'},{id:'1'}]"
+ );
+
+ client.testJQ( params("json","{" +
+ " query : {" +
+ " edismax : {" +
+ " query : 'A'," +
+ " qf : 'cat_s'," +
+ " bq : {" +
+ " edismax : {" +
+ " query : 'NY'" +
+ " qf : 'where_s'" +
+ " }" +
+ " }" +
+ " }" +
+ " }, " +
+ " fields : id" +
+ "}")
+ , "response/numFound==2", isDistrib? "" : "response/docs==[{id:'1'},{id:'4'}]"
+ );
+
+ client.testJQ( params("json","{" +
+ " query : {" +
+ " dismax : {" +
+ " query : 'A NJ'" +
+ " qf : 'cat_s^0.1 where_s^100'" +
+ " } " +
+ " }, " +
+ " filter : '-id:2'," +
+ " fields : id" +
+ "}")
+ , "response/numFound==3", isDistrib? "" : "response/docs==[{id:'4'},{id:'5'},{id:'1'}]"
+ );
+
+ client.testJQ( params("json","{" +
+ " query : {" +
+ " dismax : {" +
+ " query : 'A NJ'" +
+ " qf : ['cat_s^100', 'where_s^0.1']" +
+ " } " +
+ " }, " +
+ " filter : '-id:2'," +
+ " fields : id" +
+ "}")
+ , "response/numFound==3", isDistrib? "" : "response/docs==[{id:'4'},{id:'1'},{id:'5'}]"
+ );
+
+ try {
+ client.testJQ(params("json", "{query:{'lucene':'id:1'}}"));
+ fail();
+ } catch (Exception e) {
+ assertTrue(e.getMessage().contains("id:1"));
+ }
try {
// test failure on unknown parameter
diff --git a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java
index cb2affabed6..84ec63d6473 100644
--- a/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java
+++ b/solr/core/src/test/org/apache/solr/update/SoftAutoCommitTest.java
@@ -152,12 +152,6 @@ public class SoftAutoCommitTest extends AbstractSolrTestCase {
minHardCommitNanos + "ns",
minHardCommitNanos < firstHardNanos);
- final Long firstSearcherNanos = monitor.searcher.poll(5000, MILLISECONDS);
- assertNotNull("didn't get a single new searcher", firstSearcherNanos);
- for (int i = 0; i <= softCommitMaxDocs; i++) {
- assertQ("should find one", req("id:"+(8000 + i)) ,"//result[@numFound=1]" );
- }
-
// wait a bit, w/o other action we shouldn't see any new hard/soft commits
assertNull("Got a hard commit we weren't expecting",
monitor.hard.poll(1000, MILLISECONDS));
diff --git a/solr/example/example-DIH/solr/db/conf/managed-schema b/solr/example/example-DIH/solr/db/conf/managed-schema
index df01bc02d3d..88ed6d01b9b 100644
--- a/solr/example/example-DIH/solr/db/conf/managed-schema
+++ b/solr/example/example-DIH/solr/db/conf/managed-schema
@@ -218,7 +218,7 @@