mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
1fe611d094
|
@ -23,6 +23,8 @@ New Features
|
|||
* LUCENE-7927: Add LongValueFacetCounts, to compute facet counts for individual
|
||||
numeric values (Mike McCandless)
|
||||
|
||||
* LUCENE-7940: Add BengaliAnalyzer. (Md. Abdulla-Al-Sun via Robert Muir)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7905: Optimize how OrdinalMap (used by
|
||||
|
@ -52,12 +54,23 @@ Bug Fixes
|
|||
not recommended, lucene-analyzers-icu contains binary data structures
|
||||
specific to ICU/Unicode versions it is built against. (Chris Koenig, Robert Muir)
|
||||
|
||||
* LUCENE-7891: Lucene's taxonomy facets now uses a non-buggy LRU cache
|
||||
by default. (Jan-Willem van den Broek via Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first
|
||||
instead of locally installing first, to workaround a double repository push of
|
||||
*-sources.jar and *-javadoc.jar files. (Lynn Monson via Steve Rowe)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7948, LUCENE-7937: Upgrade randomizedtesting to 2.5.3 (minor fixes
|
||||
in test filtering for IDEs). (Mike Sokolov, Dawid Weiss)
|
||||
|
||||
* LUCENE-7933: LongBitSet now validates the numBits parameter (Won
|
||||
Jonghoon, Mike McCandless)
|
||||
|
||||
======================= Lucene 7.0.0 =======================
|
||||
|
||||
New Features
|
||||
|
@ -184,6 +197,9 @@ Bug Fixes
|
|||
* LUCENE-7864: IndexMergeTool is not using intermediate hard links (even
|
||||
if possible). (Dawid Weiss)
|
||||
|
||||
* LUCENE-7956: Fixed potential stack overflow error in ICUNormalizer2CharFilter.
|
||||
(Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7489: Better storage of sparse doc-values fields with the default
|
||||
|
|
|
@ -54,13 +54,14 @@ The KStem stemmer in
|
|||
was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
|
||||
under the BSD-license.
|
||||
|
||||
The Arabic,Persian,Romanian,Bulgarian, and Hindi analyzers (common) come with a default
|
||||
The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
|
||||
stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
|
||||
analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
|
||||
See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
|
||||
The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
|
||||
|
|
|
@ -0,0 +1,132 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.DecimalDigitFilter;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Analyzer for Bengali.
|
||||
*/
|
||||
public final class BengaliAnalyzer extends StopwordAnalyzerBase {
|
||||
private final CharArraySet stemExclusionSet;
|
||||
|
||||
/**
|
||||
* File containing default Bengali stopwords.
|
||||
*
|
||||
* Default stopword list is from http://members.unine.ch/jacques.savoy/clef/bengaliST.txt
|
||||
* The stopword list is BSD-Licensed.
|
||||
*/
|
||||
public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
|
||||
private static final String STOPWORDS_COMMENT = "#";
|
||||
|
||||
/**
|
||||
* Returns an unmodifiable instance of the default stop-words set.
|
||||
* @return an unmodifiable instance of the default stop-words set.
|
||||
*/
|
||||
public static CharArraySet getDefaultStopSet(){
|
||||
return DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
|
||||
* accesses the static final set the first time.;
|
||||
*/
|
||||
private static class DefaultSetHolder {
|
||||
static final CharArraySet DEFAULT_STOP_SET;
|
||||
|
||||
static {
|
||||
try {
|
||||
DEFAULT_STOP_SET = loadStopwordSet(false, BengaliAnalyzer.class, DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException("Unable to load default stopword set");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words
|
||||
*
|
||||
* @param stopwords a stopword set
|
||||
* @param stemExclusionSet a stemming exclusion set
|
||||
*/
|
||||
public BengaliAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
|
||||
super(stopwords);
|
||||
this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words
|
||||
*
|
||||
* @param stopwords a stopword set
|
||||
*/
|
||||
public BengaliAnalyzer(CharArraySet stopwords) {
|
||||
this(stopwords, CharArraySet.EMPTY_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words:
|
||||
* {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public BengaliAnalyzer() {
|
||||
this(DefaultSetHolder.DEFAULT_STOP_SET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link DecimalDigitFilter}, {@link IndicNormalizationFilter},
|
||||
* {@link BengaliNormalizationFilter}, {@link SetKeywordMarkerFilter}
|
||||
* if a stem exclusion set is provided, {@link BengaliStemFilter}, and
|
||||
* Bengali Stop words
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final Tokenizer source = new StandardTokenizer();
|
||||
TokenStream result = new LowerCaseFilter(source);
|
||||
result = new DecimalDigitFilter(result);
|
||||
if (!stemExclusionSet.isEmpty())
|
||||
result = new SetKeywordMarkerFilter(result, stemExclusionSet);
|
||||
result = new IndicNormalizationFilter(result);
|
||||
result = new BengaliNormalizationFilter(result);
|
||||
result = new StopFilter(result, stopwords);
|
||||
result = new BengaliStemFilter(result);
|
||||
return new TokenStreamComponents(source, result);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = new StandardFilter(in);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new DecimalDigitFilter(result);
|
||||
result = new IndicNormalizationFilter(result);
|
||||
result = new BengaliNormalizationFilter(result);
|
||||
return result;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link TokenFilter} that applies {@link BengaliNormalizer} to normalize the
|
||||
* orthography.
|
||||
* <p>
|
||||
* In some cases the normalization may cause unrelated terms to conflate, so
|
||||
* to prevent terms from being normalized use an instance of
|
||||
* {@link SetKeywordMarkerFilter} or a custom {@link TokenFilter} that sets
|
||||
* the {@link KeywordAttribute} before this {@link TokenStream}.
|
||||
* </p>
|
||||
* @see BengaliNormalizer
|
||||
*/
|
||||
public final class BengaliNormalizationFilter extends TokenFilter {
|
||||
|
||||
private final BengaliNormalizer normalizer = new BengaliNormalizer();
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
|
||||
|
||||
public BengaliNormalizationFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (!keywordAtt.isKeyword())
|
||||
termAtt.setLength(normalizer.normalize(termAtt.buffer(),
|
||||
termAtt.length()));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link BengaliNormalizationFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_bnnormal" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.BengaliNormalizationFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class BengaliNormalizationFilterFactory extends TokenFilterFactory implements MultiTermAwareComponent {
|
||||
|
||||
public BengaliNormalizationFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new BengaliNormalizationFilter(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import static org.apache.lucene.analysis.util.StemmerUtil.delete;
|
||||
|
||||
/**
|
||||
* Normalizer for Bengali.
|
||||
* <p>
|
||||
* Implements the Bengali-language specific algorithm specified in:
|
||||
* <i>A Double Metaphone encoding for Bangla and its application in spelling checker</i>
|
||||
* Naushad UzZaman and Mumit Khan.
|
||||
* http://www.panl10n.net/english/final%20reports/pdf%20files/Bangladesh/BAN16.pdf
|
||||
* </p>
|
||||
*/
|
||||
public class BengaliNormalizer {
|
||||
/**
|
||||
* Normalize an input buffer of Bengali text
|
||||
*
|
||||
* @param s input buffer
|
||||
* @param len length of input buffer
|
||||
* @return length of input buffer after normalization
|
||||
*/
|
||||
public int normalize(char s[], int len) {
|
||||
|
||||
for (int i = 0; i < len; i++) {
|
||||
switch (s[i]) {
|
||||
// delete Chandrabindu
|
||||
case '\u0981':
|
||||
len = delete(s, i, len);
|
||||
i--;
|
||||
break;
|
||||
|
||||
// DirghoI kar -> RosshoI kar
|
||||
case '\u09C0':
|
||||
s[i] = '\u09BF';
|
||||
break;
|
||||
|
||||
// DirghoU kar -> RosshoU kar
|
||||
case '\u09C2':
|
||||
s[i] = '\u09C1';
|
||||
break;
|
||||
|
||||
// Khio (Ka + Hoshonto + Murdorno Sh)
|
||||
case '\u0995':
|
||||
if(i + 2 < len && s[i+1] == '\u09CD' && s[i+2] == '\u09BF') {
|
||||
if (i == 0) {
|
||||
s[i] = '\u0996';
|
||||
len = delete(s, i + 2, len);
|
||||
len = delete(s, i + 1, len);
|
||||
} else {
|
||||
s[i+1] = '\u0996';
|
||||
len = delete(s, i + 2, len);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
// Nga to Anusvara
|
||||
case '\u0999':
|
||||
s[i] = '\u0982';
|
||||
break;
|
||||
|
||||
// Ja Phala
|
||||
case '\u09AF':
|
||||
if(i - 2 == 0 && s[i-1] == '\u09CD') {
|
||||
s[i - 1] = '\u09C7';
|
||||
|
||||
if(i + 1 < len && s[i+1] == '\u09BE') {
|
||||
len = delete(s, i+1, len);
|
||||
}
|
||||
len = delete(s, i, len);
|
||||
i --;
|
||||
} else if(i - 1 >= 0 && s[i-1] == '\u09CD' ){
|
||||
len = delete(s, i, len);
|
||||
len = delete(s, i-1, len);
|
||||
i -=2;
|
||||
}
|
||||
break;
|
||||
|
||||
// Ba Phalaa
|
||||
case '\u09AC':
|
||||
if((i >= 1 && s[i-1] != '\u09CD') || i == 0)
|
||||
break;
|
||||
if(i - 2 == 0) {
|
||||
len = delete(s, i, len);
|
||||
len = delete(s, i - 1, len);
|
||||
i -= 2;
|
||||
} else if(i - 5 >= 0 && s[i - 3] == '\u09CD') {
|
||||
len = delete(s, i, len);
|
||||
len = delete(s, i-1, len);
|
||||
i -=2;
|
||||
} else if(i - 2 >= 0){
|
||||
s[i - 1] = s[i - 2];
|
||||
len = delete(s, i, len);
|
||||
i --;
|
||||
}
|
||||
break;
|
||||
|
||||
// Visarga
|
||||
case '\u0983':
|
||||
if(i == len -1) {
|
||||
if(len <= 3) {
|
||||
s[i] = '\u09B9';
|
||||
} else {
|
||||
len = delete(s, i, len);
|
||||
}
|
||||
} else {
|
||||
s[i] = s[i+1];
|
||||
}
|
||||
break;
|
||||
|
||||
//All sh
|
||||
case '\u09B6':
|
||||
case '\u09B7':
|
||||
s[i] = '\u09B8';
|
||||
break;
|
||||
|
||||
//check na
|
||||
case '\u09A3':
|
||||
s[i] = '\u09A8';
|
||||
break;
|
||||
|
||||
//check ra
|
||||
case '\u09DC':
|
||||
case '\u09DD':
|
||||
s[i] = '\u09B0';
|
||||
break;
|
||||
|
||||
case '\u09CE':
|
||||
s[i] = '\u09A4';
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A {@link TokenFilter} that applies {@link BengaliStemmer} to stem Bengali words.
|
||||
*/
|
||||
public final class BengaliStemFilter extends TokenFilter {
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
private final KeywordAttribute keywordAttribute = addAttribute(KeywordAttribute.class);
|
||||
private final BengaliStemmer bengaliStemmer = new BengaliStemmer();
|
||||
|
||||
public BengaliStemFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (!keywordAttribute.isKeyword())
|
||||
termAttribute.setLength(bengaliStemmer.stem(termAttribute.buffer(), termAttribute.length()));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link BengaliStemFilter}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_histem" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.BengaliStemFilterFactory"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class BengaliStemFilterFactory extends TokenFilterFactory {
|
||||
|
||||
public BengaliStemFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new BengaliStemFilter(input);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import static org.apache.lucene.analysis.util.StemmerUtil.endsWith;
|
||||
|
||||
/**
|
||||
* Stemmer for Bengali.
|
||||
* <p>
|
||||
* The algorithm is based on the report in:
|
||||
* <i>Natural Language Processing in an Indian Language (Bengali)-I: Verb Phrase Analysis</i>
|
||||
* P Sengupta and B B Chaudhuri
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Few Stemmer criteria are taken from:
|
||||
* <i>http://members.unine.ch/jacques.savoy/clef/BengaliStemmerLight.java.txt</i>
|
||||
* </p>
|
||||
*/
|
||||
public class BengaliStemmer {
|
||||
public int stem(char buffer[], int len) {
|
||||
|
||||
// 8
|
||||
if (len > 9 && (endsWith(buffer, len, "িয়াছিলাম")
|
||||
|| endsWith(buffer, len, "িতেছিলাম")
|
||||
|| endsWith(buffer, len, "িতেছিলেন")
|
||||
|| endsWith(buffer, len, "ইতেছিলেন")
|
||||
|| endsWith(buffer, len, "িয়াছিলেন")
|
||||
|| endsWith(buffer, len, "ইয়াছিলেন")
|
||||
))
|
||||
return len - 8;
|
||||
|
||||
// 7
|
||||
if ((len > 8) && (endsWith(buffer, len, "িতেছিলি")
|
||||
|| endsWith(buffer, len, "িতেছিলে")
|
||||
|| endsWith(buffer, len, "িয়াছিলা")
|
||||
|| endsWith(buffer, len, "িয়াছিলে")
|
||||
|| endsWith(buffer, len, "িতেছিলা")
|
||||
|| endsWith(buffer, len, "িয়াছিলি")
|
||||
|
||||
|| endsWith(buffer, len, "য়েদেরকে")
|
||||
))
|
||||
return len - 7;
|
||||
|
||||
// 6
|
||||
if ((len > 7) && (endsWith(buffer, len, "িতেছিস")
|
||||
|| endsWith(buffer, len, "িতেছেন")
|
||||
|| endsWith(buffer, len, "িয়াছিস")
|
||||
|| endsWith(buffer, len, "িয়াছেন")
|
||||
|| endsWith(buffer, len, "েছিলাম")
|
||||
|| endsWith(buffer, len, "েছিলেন")
|
||||
|
||||
|| endsWith(buffer, len, "েদেরকে")
|
||||
))
|
||||
return len - 6;
|
||||
|
||||
// 5
|
||||
if ((len > 6) && (endsWith(buffer, len, "িতেছি")
|
||||
|| endsWith(buffer, len, "িতেছা")
|
||||
|| endsWith(buffer, len, "িতেছে")
|
||||
|| endsWith(buffer, len, "ছিলাম")
|
||||
|| endsWith(buffer, len, "ছিলেন")
|
||||
|| endsWith(buffer, len, "িয়াছি")
|
||||
|| endsWith(buffer, len, "িয়াছা")
|
||||
|| endsWith(buffer, len, "িয়াছে")
|
||||
|| endsWith(buffer, len, "েছিলে")
|
||||
|| endsWith(buffer, len, "েছিলা")
|
||||
|
||||
|| endsWith(buffer, len, "য়েদের")
|
||||
|| endsWith(buffer, len, "দেরকে")
|
||||
))
|
||||
return len - 5;
|
||||
|
||||
// 4
|
||||
if ((len > 5) && (endsWith(buffer, len, "িলাম")
|
||||
|| endsWith(buffer, len, "িলেন")
|
||||
|| endsWith(buffer, len, "িতাম")
|
||||
|| endsWith(buffer, len, "িতেন")
|
||||
|| endsWith(buffer, len, "িবেন")
|
||||
|| endsWith(buffer, len, "ছিলি")
|
||||
|| endsWith(buffer, len, "ছিলে")
|
||||
|| endsWith(buffer, len, "ছিলা")
|
||||
|| endsWith(buffer, len, "তেছে")
|
||||
|| endsWith(buffer, len, "িতেছ")
|
||||
|
||||
|| endsWith(buffer, len, "খানা")
|
||||
|| endsWith(buffer, len, "খানি")
|
||||
|| endsWith(buffer, len, "গুলো")
|
||||
|| endsWith(buffer, len, "গুলি")
|
||||
|| endsWith(buffer, len, "য়েরা")
|
||||
|| endsWith(buffer, len, "েদের")
|
||||
))
|
||||
return len - 4;
|
||||
|
||||
// 3
|
||||
if ((len > 4) && (endsWith(buffer, len, "লাম")
|
||||
|| endsWith(buffer, len, "িলি")
|
||||
|| endsWith(buffer, len, "ইলি")
|
||||
|| endsWith(buffer, len, "িলে")
|
||||
|| endsWith(buffer, len, "ইলে")
|
||||
|| endsWith(buffer, len, "লেন")
|
||||
|| endsWith(buffer, len, "িলা")
|
||||
|| endsWith(buffer, len, "ইলা")
|
||||
|| endsWith(buffer, len, "তাম")
|
||||
|| endsWith(buffer, len, "িতি")
|
||||
|| endsWith(buffer, len, "ইতি")
|
||||
|| endsWith(buffer, len, "িতে")
|
||||
|| endsWith(buffer, len, "ইতে")
|
||||
|| endsWith(buffer, len, "তেন")
|
||||
|| endsWith(buffer, len, "িতা")
|
||||
|| endsWith(buffer, len, "িবা")
|
||||
|| endsWith(buffer, len, "ইবা")
|
||||
|| endsWith(buffer, len, "িবি")
|
||||
|| endsWith(buffer, len, "ইবি")
|
||||
|| endsWith(buffer, len, "বেন")
|
||||
|| endsWith(buffer, len, "িবে")
|
||||
|| endsWith(buffer, len, "ইবে")
|
||||
|| endsWith(buffer, len, "ছেন")
|
||||
|
||||
|| endsWith(buffer, len, "য়োন")
|
||||
|| endsWith(buffer, len, "য়ের")
|
||||
|| endsWith(buffer, len, "েরা")
|
||||
|| endsWith(buffer, len, "দের")
|
||||
))
|
||||
return len - 3;
|
||||
|
||||
// 2
|
||||
if ((len > 3) && (endsWith(buffer, len, "িস")
|
||||
|| endsWith(buffer, len, "েন")
|
||||
|| endsWith(buffer, len, "লি")
|
||||
|| endsWith(buffer, len, "লে")
|
||||
|| endsWith(buffer, len, "লা")
|
||||
|| endsWith(buffer, len, "তি")
|
||||
|| endsWith(buffer, len, "তে")
|
||||
|| endsWith(buffer, len, "তা")
|
||||
|| endsWith(buffer, len, "বি")
|
||||
|| endsWith(buffer, len, "বে")
|
||||
|| endsWith(buffer, len, "বা")
|
||||
|| endsWith(buffer, len, "ছি")
|
||||
|| endsWith(buffer, len, "ছা")
|
||||
|| endsWith(buffer, len, "ছে")
|
||||
|| endsWith(buffer, len, "ুন")
|
||||
|| endsWith(buffer, len, "ুক")
|
||||
|
||||
|| endsWith(buffer, len, "টা")
|
||||
|| endsWith(buffer, len, "টি")
|
||||
|| endsWith(buffer, len, "নি")
|
||||
|| endsWith(buffer, len, "ের")
|
||||
|| endsWith(buffer, len, "তে")
|
||||
|| endsWith(buffer, len, "রা")
|
||||
|| endsWith(buffer, len, "কে")
|
||||
))
|
||||
return len - 2;
|
||||
|
||||
// 1
|
||||
if ((len > 2) && (endsWith(buffer, len, "ি")
|
||||
|| endsWith(buffer, len, "ী")
|
||||
|| endsWith(buffer, len, "া")
|
||||
|| endsWith(buffer, len, "ো")
|
||||
|| endsWith(buffer, len, "ে")
|
||||
|| endsWith(buffer, len, "ব")
|
||||
|| endsWith(buffer, len, "ত")
|
||||
))
|
||||
return len - 1;
|
||||
|
||||
return len;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Analyzer for Bengali Language.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
|
@ -17,6 +17,8 @@ org.apache.lucene.analysis.tr.ApostropheFilterFactory
|
|||
org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory
|
||||
org.apache.lucene.analysis.ar.ArabicStemFilterFactory
|
||||
org.apache.lucene.analysis.bg.BulgarianStemFilterFactory
|
||||
org.apache.lucene.analysis.bn.BengaliNormalizationFilterFactory
|
||||
org.apache.lucene.analysis.bn.BengaliStemFilterFactory
|
||||
org.apache.lucene.analysis.br.BrazilianStemFilterFactory
|
||||
org.apache.lucene.analysis.cjk.CJKBigramFilterFactory
|
||||
org.apache.lucene.analysis.cjk.CJKWidthFilterFactory
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
# See http://members.unine.ch/jacques.savoy/clef/index.html.
|
||||
# This file was created by Jacques Savoy and is distributed under the BSD license
|
||||
এই
|
||||
ও
|
||||
থেকে
|
||||
করে
|
||||
এ
|
||||
না
|
||||
ওই
|
||||
এক্
|
||||
নিয়ে
|
||||
করা
|
||||
বলেন
|
||||
সঙ্গে
|
||||
যে
|
||||
এব
|
||||
তা
|
||||
আর
|
||||
কোনো
|
||||
বলে
|
||||
সেই
|
||||
দিন
|
||||
হয়
|
||||
কি
|
||||
দু
|
||||
পরে
|
||||
সব
|
||||
দেওয়া
|
||||
মধ্যে
|
||||
এর
|
||||
সি
|
||||
শুরু
|
||||
কাজ
|
||||
কিছু
|
||||
কাছে
|
||||
সে
|
||||
তবে
|
||||
বা
|
||||
বন
|
||||
আগে
|
||||
জ্নজন
|
||||
পি
|
||||
পর
|
||||
তো
|
||||
ছিল
|
||||
এখন
|
||||
আমরা
|
||||
প্রায়
|
||||
দুই
|
||||
আমাদের
|
||||
তাই
|
||||
অন্য
|
||||
গিয়ে
|
||||
প্রযন্ত
|
||||
মনে
|
||||
নতুন
|
||||
মতো
|
||||
কেখা
|
||||
প্রথম
|
||||
আজ
|
||||
টি
|
||||
ধামার
|
||||
অনেক
|
||||
বিভিন্ন
|
||||
র
|
||||
হাজার
|
||||
জানা
|
||||
নয়
|
||||
অবশ্য
|
||||
বেশি
|
||||
এস
|
||||
করে
|
||||
কে
|
||||
হতে
|
||||
বি
|
||||
কয়েক
|
||||
সহ
|
||||
বেশ
|
||||
এমন
|
||||
এমনি
|
||||
কেন
|
||||
কেউ
|
||||
নেওয়া
|
||||
চেষ্টা
|
||||
লক্ষ
|
||||
বলা
|
||||
কারণ
|
||||
আছে
|
||||
শুধু
|
||||
তখন
|
||||
যা
|
||||
এসে
|
||||
চার
|
||||
ছিল
|
||||
যদি
|
||||
আবার
|
||||
কোটি
|
||||
উত্তর
|
||||
সামনে
|
||||
উপর
|
||||
বক্তব্য
|
||||
এত
|
||||
প্রাথমিক
|
||||
উপরে
|
||||
আছে
|
||||
প্রতি
|
||||
কাজে
|
||||
যখন
|
||||
খুব
|
||||
বহু
|
||||
গেল
|
||||
পেয়্র্
|
||||
চালু
|
||||
ই
|
||||
নাগাদ
|
||||
থাকা
|
||||
পাচ
|
||||
যাওয়া
|
||||
রকম
|
||||
সাধারণ
|
||||
কমনে
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
/**
|
||||
* Tests the BengaliAnalyzer
|
||||
*/
|
||||
public class TestBengaliAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testResourcesAvailable() {
|
||||
new BengaliAnalyzer().close();
|
||||
}
|
||||
|
||||
public void testBasics() throws Exception {
|
||||
Analyzer a = new BengaliAnalyzer();
|
||||
|
||||
checkOneTerm(a, "বাড়ী", "বার");
|
||||
checkOneTerm(a, "বারী", "বার");
|
||||
a.close();
|
||||
}
|
||||
/**
|
||||
* test Digits
|
||||
*/
|
||||
public void testDigits() throws Exception {
|
||||
BengaliAnalyzer a = new BengaliAnalyzer();
|
||||
checkOneTerm(a, "১২৩৪৫৬৭৮৯০", "1234567890");
|
||||
a.close();
|
||||
}
|
||||
|
||||
/** blast some random strings through the analyzer */
|
||||
public void testRandomStrings() throws Exception {
|
||||
Analyzer analyzer = new BengaliAnalyzer();
|
||||
checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
|
||||
analyzer.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,80 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
|
||||
/**
|
||||
* Test Bengali Filter Factory
|
||||
*/
|
||||
public class TestBengaliFilters extends BaseTokenStreamFactoryTestCase {
|
||||
/**
|
||||
* Test IndicNormalizationFilterFactory
|
||||
*/
|
||||
public void testIndicNormalizer() throws Exception {
|
||||
Reader reader = new StringReader("ত্ আমি");
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = tokenFilterFactory("IndicNormalization").create(stream);
|
||||
assertTokenStreamContents(stream, new String[] { "ৎ", "আমি" });
|
||||
}
|
||||
|
||||
/**
|
||||
* Test BengaliNormalizationFilterFactory
|
||||
*/
|
||||
public void testBengaliNormalizer() throws Exception {
|
||||
Reader reader = new StringReader("বাড়ী");
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = tokenFilterFactory("IndicNormalization").create(stream);
|
||||
stream = tokenFilterFactory("BengaliNormalization").create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"বারি"});
|
||||
}
|
||||
|
||||
/**
|
||||
* Test BengaliStemFilterFactory
|
||||
*/
|
||||
public void testStemmer() throws Exception {
|
||||
Reader reader = new StringReader("বাড়ী");
|
||||
TokenStream stream = whitespaceMockTokenizer(reader);
|
||||
stream = tokenFilterFactory("IndicNormalization").create(stream);
|
||||
stream = tokenFilterFactory("BengaliNormalization").create(stream);
|
||||
stream = tokenFilterFactory("BengaliStem").create(stream);
|
||||
assertTokenStreamContents(stream, new String[] {"বার"});
|
||||
}
|
||||
|
||||
/** Test that bogus arguments result in exception */
|
||||
public void testBogusArguments() throws Exception {
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenFilterFactory("IndicNormalization", "bogusArg", "bogusValue");
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
|
||||
expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenFilterFactory("BengaliNormalization", "bogusArg", "bogusValue");
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
|
||||
expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
tokenFilterFactory("BengaliStem", "bogusArg", "bogusValue");
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Test BengaliNormalizer
|
||||
*/
|
||||
public class TestBengaliNormalizer extends BaseTokenStreamTestCase {
|
||||
/**
|
||||
* Test some basic normalization, with an example from the paper.
|
||||
*/
|
||||
public void testChndrobindu() throws IOException {
|
||||
check("চাঁদ", "চাদ");
|
||||
}
|
||||
|
||||
public void testRosshoIKar() throws IOException {
|
||||
check("বাড়ী", "বারি");
|
||||
check("তীর", "তির");
|
||||
}
|
||||
|
||||
public void testRosshoUKar() throws IOException {
|
||||
check("ভূল", "ভুল");
|
||||
check("অনূপ", "অনুপ");
|
||||
}
|
||||
|
||||
public void testNga() throws IOException {
|
||||
check("বাঙলা", "বাংলা");
|
||||
}
|
||||
|
||||
public void testJaPhaala() throws IOException {
|
||||
check("ব্যাক্তি", "বেক্তি");
|
||||
check( "সন্ধ্যা", "সন্ধা");
|
||||
}
|
||||
|
||||
public void testBaPhalaa() throws IOException {
|
||||
check("স্বদেশ", "সদেস");
|
||||
check("তত্ত্ব", "তত্ত");
|
||||
check("বিশ্ব", "বিসস");
|
||||
}
|
||||
|
||||
public void testVisarga() throws IOException {
|
||||
check("দুঃখ", "দুখখ");
|
||||
check("উঃ", "উহ");
|
||||
check("পুনঃ", "পুন");
|
||||
}
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
check("কণা", "কনা");
|
||||
check("শরীর", "সরির");
|
||||
check("বাড়ি", "বারি");
|
||||
}
|
||||
|
||||
/** creates random strings in the bengali block and ensures the normalizer doesn't trip up on them */
|
||||
public void testRandom() throws IOException {
|
||||
BengaliNormalizer normalizer = new BengaliNormalizer();
|
||||
for (int i = 0; i < 100000; i++) {
|
||||
String randomBengali = TestUtil.randomSimpleStringRange(random(), '\u0980', '\u09FF', 7);
|
||||
try {
|
||||
int newLen = normalizer.normalize(randomBengali.toCharArray(), randomBengali.length());
|
||||
assertTrue(newLen >= 0); // should not return negative length
|
||||
assertTrue(newLen <= randomBengali.length()); // should not increase length of string
|
||||
} catch (Exception e) {
|
||||
System.err.println("normalizer failed on input: '" + randomBengali + "' (" + escape(randomBengali) + ")");
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void check(String input, String output) throws IOException {
|
||||
Tokenizer tokenizer = whitespaceMockTokenizer(input);
|
||||
TokenFilter tf = new BengaliNormalizationFilter(tokenizer);
|
||||
assertTokenStreamContents(tf, new String[] { output });
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new BengaliNormalizationFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.bn;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Test Codes for BengaliStemmer
|
||||
*/
|
||||
public class TestBengaliStemmer extends BaseTokenStreamTestCase {
|
||||
|
||||
/**
|
||||
* Testing few verbal words
|
||||
*/
|
||||
public void testVerbsInShadhuForm() throws IOException {
|
||||
check("করেছিলাম", "কর");
|
||||
check("করিতেছিলে", "কর");
|
||||
check("খাইতাম", "খাই");
|
||||
check("যাইবে", "যা");
|
||||
}
|
||||
|
||||
public void testVerbsInCholitoForm() throws IOException {
|
||||
check("করছিলাম", "কর");
|
||||
check("করছিলে", "কর");
|
||||
check("করতাম", "কর");
|
||||
check("যাব", "যা");
|
||||
check("যাবে", "যা");
|
||||
check("করি", "কর");
|
||||
check("করো", "কর");
|
||||
}
|
||||
|
||||
public void testNouns() throws IOException {
|
||||
check("মেয়েরা", "মে");
|
||||
check("মেয়েদেরকে", "মে");
|
||||
check("মেয়েদের", "মে");
|
||||
|
||||
check("একটি", "এক");
|
||||
check("মানুষগুলি", "মানুষ");
|
||||
}
|
||||
|
||||
private void check(String input, String output) throws IOException {
|
||||
Tokenizer tokenizer = whitespaceMockTokenizer(input);
|
||||
TokenFilter tf = new BengaliStemFilter(tokenizer);
|
||||
assertTokenStreamContents(tf, new String[] { output });
|
||||
}
|
||||
|
||||
public void testEmptyTerm() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new BengaliStemFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
a.close();
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.analysis.charfilter.BaseCharFilter;
|
||||
|
||||
import com.ibm.icu.text.Normalizer2;
|
||||
|
@ -61,7 +62,7 @@ public final class ICUNormalizer2CharFilter extends BaseCharFilter {
|
|||
ICUNormalizer2CharFilter(Reader in, Normalizer2 normalizer, int bufferSize) {
|
||||
super(in);
|
||||
this.normalizer = Objects.requireNonNull(normalizer);
|
||||
this.tmpBuffer = new char[bufferSize];
|
||||
this.tmpBuffer = CharacterUtils.newCharacterBuffer(bufferSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -94,23 +95,31 @@ public final class ICUNormalizer2CharFilter extends BaseCharFilter {
|
|||
return -1;
|
||||
}
|
||||
|
||||
private final char[] tmpBuffer;
|
||||
private final CharacterUtils.CharacterBuffer tmpBuffer;
|
||||
|
||||
private int readInputToBuffer() throws IOException {
|
||||
final int len = input.read(tmpBuffer);
|
||||
if (len == -1) {
|
||||
inputFinished = true;
|
||||
return 0;
|
||||
private void readInputToBuffer() throws IOException {
|
||||
while (true) {
|
||||
// CharacterUtils.fill is supplementary char aware
|
||||
final boolean hasRemainingChars = CharacterUtils.fill(tmpBuffer, input);
|
||||
|
||||
assert tmpBuffer.getOffset() == 0;
|
||||
inputBuffer.append(tmpBuffer.getBuffer(), 0, tmpBuffer.getLength());
|
||||
|
||||
if (hasRemainingChars == false) {
|
||||
inputFinished = true;
|
||||
break;
|
||||
}
|
||||
|
||||
final int lastCodePoint = Character.codePointBefore(tmpBuffer.getBuffer(), tmpBuffer.getLength(), 0);
|
||||
if (normalizer.isInert(lastCodePoint)) {
|
||||
// we require an inert char so that we can normalize content before and
|
||||
// after this character independently
|
||||
break;
|
||||
}
|
||||
}
|
||||
inputBuffer.append(tmpBuffer, 0, len);
|
||||
|
||||
// if checkedInputBoundary was at the end of a buffer, we need to check that char again
|
||||
checkedInputBoundary = Math.max(checkedInputBoundary - 1, 0);
|
||||
// this loop depends on 'isInert' (changes under normalization) but looks only at characters.
|
||||
// so we treat all surrogates as non-inert for simplicity
|
||||
if (normalizer.isInert(tmpBuffer[len - 1]) && !Character.isSurrogate(tmpBuffer[len-1])) {
|
||||
return len;
|
||||
} else return len + readInputToBuffer();
|
||||
}
|
||||
|
||||
private int readAndNormalizeFromInput() {
|
||||
|
|
|
@ -20,12 +20,14 @@ package org.apache.lucene.analysis.icu;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
@ -418,4 +420,23 @@ public class TestICUNormalizer2CharFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
a.close();
|
||||
}
|
||||
|
||||
// https://issues.apache.org/jira/browse/LUCENE-7956
|
||||
public void testVeryLargeInputOfNonInertChars() throws Exception {
|
||||
char[] text = new char[1000000];
|
||||
Arrays.fill(text, 'a');
|
||||
try (Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new KeywordTokenizer());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader initReader(String fieldName, Reader reader) {
|
||||
return new ICUNormalizer2CharFilter(reader, Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));
|
||||
}
|
||||
}) {
|
||||
checkAnalysisConsistency(random(), a, false, new String(text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -113,24 +113,26 @@ public class SimpleNaiveBayesDocumentClassifier extends SimpleNaiveBayesClassifi
|
|||
Map<String, List<String[]>> fieldName2tokensArray = new LinkedHashMap<>();
|
||||
Map<String, Float> fieldName2boost = new LinkedHashMap<>();
|
||||
Terms classes = MultiFields.getTerms(indexReader, classFieldName);
|
||||
TermsEnum classesEnum = classes.iterator();
|
||||
BytesRef c;
|
||||
if (classes != null) {
|
||||
TermsEnum classesEnum = classes.iterator();
|
||||
BytesRef c;
|
||||
|
||||
analyzeSeedDocument(inputDocument, fieldName2tokensArray, fieldName2boost);
|
||||
analyzeSeedDocument(inputDocument, fieldName2tokensArray, fieldName2boost);
|
||||
|
||||
int docsWithClassSize = countDocsWithClass();
|
||||
while ((c = classesEnum.next()) != null) {
|
||||
double classScore = 0;
|
||||
Term term = new Term(this.classFieldName, c);
|
||||
for (String fieldName : textFieldNames) {
|
||||
List<String[]> tokensArrays = fieldName2tokensArray.get(fieldName);
|
||||
double fieldScore = 0;
|
||||
for (String[] fieldTokensArray : tokensArrays) {
|
||||
fieldScore += calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(fieldTokensArray, fieldName, term, docsWithClassSize) * fieldName2boost.get(fieldName);
|
||||
int docsWithClassSize = countDocsWithClass();
|
||||
while ((c = classesEnum.next()) != null) {
|
||||
double classScore = 0;
|
||||
Term term = new Term(this.classFieldName, c);
|
||||
for (String fieldName : textFieldNames) {
|
||||
List<String[]> tokensArrays = fieldName2tokensArray.get(fieldName);
|
||||
double fieldScore = 0;
|
||||
for (String[] fieldTokensArray : tokensArrays) {
|
||||
fieldScore += calculateLogPrior(term, docsWithClassSize) + calculateLogLikelihood(fieldTokensArray, fieldName, term, docsWithClassSize) * fieldName2boost.get(fieldName);
|
||||
}
|
||||
classScore += fieldScore;
|
||||
}
|
||||
classScore += fieldScore;
|
||||
assignedClasses.add(new ClassificationResult<>(term.bytes(), classScore));
|
||||
}
|
||||
assignedClasses.add(new ClassificationResult<>(term.bytes(), classScore));
|
||||
}
|
||||
return normClassificationResults(assignedClasses);
|
||||
}
|
||||
|
|
|
@ -71,6 +71,9 @@ public class CharTermAttributeImpl extends AttributeImpl implements CharTermAttr
|
|||
|
||||
@Override
|
||||
public final CharTermAttribute setLength(int length) {
|
||||
if (length < 0) {
|
||||
throw new IllegalArgumentException("length " + length + " must not be negative");
|
||||
}
|
||||
if (length > termBuffer.length)
|
||||
throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
|
||||
termLength = length;
|
||||
|
|
|
@ -55,9 +55,15 @@ public final class LongBitSet {
|
|||
return new LongBitSet(arr, (long)arr.length << 6);
|
||||
}
|
||||
}
|
||||
|
||||
/** The maximum {@code numBits} supported. */
|
||||
public static final long MAX_NUM_BITS = 64 * (long) ArrayUtil.MAX_ARRAY_LENGTH;
|
||||
|
||||
/** returns the number of 64 bit words it would take to hold numBits */
|
||||
/** Returns the number of 64 bit words it would take to hold numBits */
|
||||
public static int bits2words(long numBits) {
|
||||
if (numBits < 0 || numBits > MAX_NUM_BITS) {
|
||||
throw new IllegalArgumentException("numBits must be 0 .. " + MAX_NUM_BITS + "; got: " + numBits);
|
||||
}
|
||||
return (int)((numBits - 1) >> 6) + 1; // I.e.: get the word-offset of the last bit and add one (make sure to use >> so 0 returns 0!)
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,16 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testSetLength() {
|
||||
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||
char[] content = "hello".toCharArray();
|
||||
t.copyBuffer(content, 0, content.length);
|
||||
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
|
||||
t.setLength(-1);
|
||||
});
|
||||
assertTrue(expected.getMessage().contains("must not be negative"));
|
||||
}
|
||||
|
||||
public void testGrow() {
|
||||
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||
StringBuilder buf = new StringBuilder("ab");
|
||||
|
|
|
@ -218,7 +218,23 @@ public class TestLongBitSet extends LuceneTestCase {
|
|||
assertEquals(b1.hashCode(), b2.hashCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testTooLarge() {
|
||||
Exception e = expectThrows(IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LongBitSet(LongBitSet.MAX_NUM_BITS + 1);
|
||||
});
|
||||
assertTrue(e.getMessage().startsWith("numBits must be 0 .. "));
|
||||
}
|
||||
|
||||
public void testNegativeNumBits() {
|
||||
Exception e = expectThrows(IllegalArgumentException.class,
|
||||
() -> {
|
||||
new LongBitSet(-17);
|
||||
});
|
||||
assertTrue(e.getMessage().startsWith("numBits must be 0 .. "));
|
||||
}
|
||||
|
||||
public void testSmallBitSets() {
|
||||
// Make sure size 0-10 bit sets are OK:
|
||||
|
@ -344,7 +360,9 @@ public class TestLongBitSet extends LuceneTestCase {
|
|||
// ...
|
||||
assertEquals(1 << (32-6), LongBitSet.bits2words(1L << 32));
|
||||
assertEquals((1 << (32-6)) + 1, LongBitSet.bits2words((1L << 32)) + 1);
|
||||
// ...
|
||||
assertEquals(Integer.MAX_VALUE, LongBitSet.bits2words((1L << 37) - 64));
|
||||
|
||||
// ensure the claimed max num_bits doesn't throw exc; we can't enforce exact values here
|
||||
// because the value variees with JVM:
|
||||
assertTrue(LongBitSet.bits2words(LongBitSet.MAX_NUM_BITS) > 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,8 +32,12 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
* function, LRU_STRING should be used.
|
||||
*/
|
||||
public enum LRUType {
|
||||
/** Use the label's hash as the key; this can lead to
|
||||
* silent conflicts! */
|
||||
/** Use only the label's 64 bit longHashCode as the hash key. Do not
|
||||
* check equals, unlike most hash maps.
|
||||
* Note that while these hashes are very likely to be unique, the chance
|
||||
* of a collision is still greater than zero. If such an unlikely event
|
||||
* occurs, your document will get an incorrect facet.
|
||||
*/
|
||||
LRU_HASHED,
|
||||
|
||||
/** Use the label as the hash key; this is always
|
||||
|
@ -43,15 +47,15 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
|
||||
private NameIntCacheLRU cache;
|
||||
|
||||
/** Creates this with {@link LRUType#LRU_HASHED} method. */
|
||||
/** Creates this with {@link LRUType#LRU_STRING} method. */
|
||||
public LruTaxonomyWriterCache(int cacheSize) {
|
||||
// TODO (Facet): choose between NameHashIntCacheLRU and NameIntCacheLRU.
|
||||
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||
// function, NameIntCacheLRU should be used:
|
||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||
// are collisions (which we never found) two different paths would be
|
||||
// mapped to the same ordinal...
|
||||
this(cacheSize, LRUType.LRU_HASHED);
|
||||
// are collisions two different paths would be mapped to the same
|
||||
// ordinal...
|
||||
this(cacheSize, LRUType.LRU_STRING);
|
||||
}
|
||||
|
||||
/** Creates this with the specified method. */
|
||||
|
@ -60,8 +64,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
// For guaranteed correctness - not relying on no-collisions in the hash
|
||||
// function, NameIntCacheLRU should be used:
|
||||
// On the other hand, NameHashIntCacheLRU takes less RAM but if there
|
||||
// are collisions (which we never found) two different paths would be
|
||||
// mapped to the same ordinal...
|
||||
// are collisions two different paths would be mapped to the same
|
||||
// ordinal...
|
||||
if (lruType == LRUType.LRU_HASHED) {
|
||||
this.cache = new NameHashIntCacheLRU(cacheSize);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.facet.taxonomy.writercache;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestLruTaxonomyWriterCache extends FacetTestCase {
|
||||
|
||||
@Test
|
||||
public void testDefaultLRUTypeIsCollisionSafe() {
|
||||
// These labels are clearly different, but have identical longHashCodes.
|
||||
// Note that these labels are clearly contrived. We did encounter
|
||||
// collisions in actual production data, but we aren't allowed to publish
|
||||
// those.
|
||||
final FacetLabel a = new FacetLabel("\0", "\u0003\uFFE2");
|
||||
final FacetLabel b = new FacetLabel("\1", "\0");
|
||||
// If this fails, then the longHashCode implementation has changed. This
|
||||
// cannot prevent collisions. (All hashes must allow for collisions.) It
|
||||
// will however stop the rest of this test from making sense. To fix, find
|
||||
// new colliding labels, or make a subclass of FacetLabel that produces
|
||||
// collisions.
|
||||
assertEquals(a.longHashCode(), b.longHashCode());
|
||||
// Make a cache with capacity > 2 so both our labels will fit. Don't
|
||||
// specify an LRUType, since we want to check if the default is
|
||||
// collision-safe.
|
||||
final LruTaxonomyWriterCache cache = new LruTaxonomyWriterCache(10);
|
||||
cache.put(a, 0);
|
||||
cache.put(b, 1);
|
||||
assertEquals(cache.get(a), 0);
|
||||
assertEquals(cache.get(b), 1);
|
||||
}
|
||||
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
/antlr/antlr = 2.7.7
|
||||
/com.adobe.xmp/xmpcore = 5.1.2
|
||||
|
||||
com.carrotsearch.randomizedtesting.version = 2.5.2
|
||||
com.carrotsearch.randomizedtesting.version = 2.5.3
|
||||
/com.carrotsearch.randomizedtesting/junit4-ant = ${com.carrotsearch.randomizedtesting.version}
|
||||
/com.carrotsearch.randomizedtesting/randomizedtesting-runner = ${com.carrotsearch.randomizedtesting.version}
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
91f3284993b44dcb2f003b5f28617abba13971d2
|
|
@ -0,0 +1 @@
|
|||
053da66a10597283d48266d1f09d572f8608ae3f
|
|
@ -30,11 +30,9 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.SegmentCommitInfo;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.StandardDirectoryReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SearcherFactory;
|
||||
import org.apache.lucene.search.SearcherManager;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
|
@ -113,14 +111,6 @@ public abstract class PrimaryNode extends Node {
|
|||
setCurrentInfos(Collections.<String>emptySet());
|
||||
message("init: infos version=" + curInfos.getVersion());
|
||||
|
||||
IndexSearcher s = mgr.acquire();
|
||||
try {
|
||||
// TODO: this is test code specific!!
|
||||
message("init: marker count: " + s.count(new TermQuery(new Term("marker", "marker"))));
|
||||
} finally {
|
||||
mgr.release(s);
|
||||
}
|
||||
|
||||
} catch (Throwable t) {
|
||||
message("init: exception");
|
||||
t.printStackTrace(printStream);
|
||||
|
@ -231,8 +221,6 @@ public abstract class PrimaryNode extends Node {
|
|||
try {
|
||||
searcher = mgr.acquire();
|
||||
infos = ((StandardDirectoryReader) searcher.getIndexReader()).getSegmentInfos();
|
||||
// TODO: this is test code specific!!
|
||||
message("setCurrentInfos: marker count: " + searcher.count(new TermQuery(new Term("marker", "marker"))) + " version=" + infos.getVersion() + " searcher=" + searcher);
|
||||
} finally {
|
||||
if (searcher != null) {
|
||||
mgr.release(searcher);
|
||||
|
|
|
@ -33,7 +33,6 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import java.util.concurrent.ConcurrentMap;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
|
@ -287,15 +286,6 @@ public abstract class ReplicaNode extends Node {
|
|||
// Finally, we are open for business, since our index now "agrees" with the primary:
|
||||
mgr = new SegmentInfosSearcherManager(dir, this, infos, searcherFactory);
|
||||
|
||||
IndexSearcher searcher = mgr.acquire();
|
||||
try {
|
||||
// TODO: this is test specific:
|
||||
int hitCount = searcher.count(new TermQuery(new Term("marker", "marker")));
|
||||
message("top: marker count=" + hitCount + " version=" + ((DirectoryReader) searcher.getIndexReader()).getVersion());
|
||||
} finally {
|
||||
mgr.release(searcher);
|
||||
}
|
||||
|
||||
// Must commit after init mgr:
|
||||
if (doCommit) {
|
||||
// Very important to commit what we just sync'd over, because we removed the pre-existing commit point above if we had to
|
||||
|
|
|
@ -81,6 +81,15 @@ New Features
|
|||
|
||||
* SOLR-11215: Make a metric accessible through a single param. (ab)
|
||||
|
||||
* SOLR-11244: Query DSL for Solr (Cao Manh Dat)
|
||||
|
||||
* SOLR-11317: JSON Facet API: min/max aggregations on numeric fields are now typed better so int/long
|
||||
fields return an appropriate integral type rather than a double. (yonik)
|
||||
|
||||
* SOLR-11316: JSON Facet API: min/max aggregations are now supported on single-valued date fields.
|
||||
(yonik)
|
||||
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -118,6 +127,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-11293: Potential data loss in TLOG replicas after replication failures (noble)
|
||||
|
||||
* SOLR-10101: TestLazyCores hangs (Erick Erickson)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
@ -126,6 +137,8 @@ Optimizations
|
|||
|
||||
* SOLR-11124: MoveReplicaCmd should skip deleting old replica in case of its node is not live (Cao Manh Dat)
|
||||
|
||||
* SOLR-11242: QueryParser: re-use the LookaheadSuccess exception. (Michael Braun via David Smiley)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -179,6 +192,10 @@ Other Changes
|
|||
|
||||
* SOLR-11209: Upgrade HttpClient to 4.5.3. (Hrishikesh Gadre via Mark Miller)
|
||||
|
||||
* SOLR-11322: JSON Facet API: instead of returning NaN, min & max aggregations omit
|
||||
the value for any bucket with no values in the numeric field. (yonik)
|
||||
|
||||
|
||||
================== 7.0.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
@ -786,7 +803,7 @@ Other Changes
|
|||
* SOLR-10494: Make default response format JSON (wt=json), and also indent text responses formats
|
||||
(indent=on) by default (Trey Grainger & Cassandra Targett via hossman)
|
||||
|
||||
* SOLR-10760: Remove trie field types and fields from example schemas. (Steve Rowe)
|
||||
* SOLR-10760,SOLR-11315,SOLR-11313: Remove trie field types and fields from example schemas. (Steve Rowe)
|
||||
|
||||
* SOLR-11056: Add random range query test that compares results across Trie*, *Point and DocValue-only fields
|
||||
(Tomás Fernández Löbbe)
|
||||
|
@ -879,6 +896,8 @@ Other Changes
|
|||
|
||||
* SOLR-11261, SOLR-10966: Upgrade to Hadoop 2.7.4 to fix incompatibility with Java 9.
|
||||
(Uwe Schindler)
|
||||
|
||||
* SOLR-11324: Clean up mention of trie fields in documentation and source comments. (Steve Rowe)
|
||||
|
||||
================== 6.6.1 ==================
|
||||
|
||||
|
|
|
@ -83,6 +83,11 @@
|
|||
byline="true"
|
||||
match="public QueryParser\(QueryParserTokenManager "
|
||||
replace="protected QueryParser(QueryParserTokenManager "/>
|
||||
<!-- change an exception used for signaling to be static -->
|
||||
<replaceregexp file="src/java/org/apache/solr/parser/QueryParser.java"
|
||||
byline="true"
|
||||
match="final private LookaheadSuccess jj_ls ="
|
||||
replace="static final private LookaheadSuccess jj_ls =" />
|
||||
<replace token="StringBuffer" value="StringBuilder" encoding="UTF-8">
|
||||
<fileset dir="src/java/org/apache/solr/parser" includes="ParseException.java TokenMgrError.java"/>
|
||||
</replace>
|
||||
|
|
|
@ -33,84 +33,7 @@ import org.apache.solr.client.solrj.io.ModelCache;
|
|||
import org.apache.solr.client.solrj.io.SolrClientCache;
|
||||
import org.apache.solr.client.solrj.io.Tuple;
|
||||
import org.apache.solr.client.solrj.io.comp.StreamComparator;
|
||||
import org.apache.solr.client.solrj.io.eval.AbsoluteValueEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.AddEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.AndEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.AnovaEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.AppendEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ArcCosineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ArcSineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ArcTangentEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ArrayEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.AscEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CeilingEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CoalesceEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ColumnEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ConversionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ConvolutionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CopyOfEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CopyOfRangeEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CorrelationEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CosineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CovarianceEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CubedRootEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.CumulativeProbabilityEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.DescribeEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.DivideEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.EmpiricalDistributionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.EqualToEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.EuclideanDistanceEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ExclusiveOrEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.FindDelayEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.FloorEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.GreaterThanEqualToEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.GreaterThanEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.HistogramEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.HyperbolicCosineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.HyperbolicSineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.HyperbolicTangentEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.IfThenElseEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.KolmogorovSmirnovEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.LengthEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.LessThanEqualToEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.LessThanEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ModuloEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.MovingAverageEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.MultiplyEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.NaturalLogEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.NormalDistributionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.NormalizeEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.NotEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.OrEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.PercentileEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.PowerEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.PredictEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.RankEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.RawValueEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.RegressionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ResidualsEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ReverseEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.RoundEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.SampleEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.ScaleEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.SequenceEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.SineEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.SquareRootEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.SubtractEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.TangentEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorDay;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorDayOfQuarter;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorDayOfYear;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorEpoch;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorHour;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorMinute;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorMonth;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorQuarter;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorSecond;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorWeek;
|
||||
import org.apache.solr.client.solrj.io.eval.TemporalEvaluatorYear;
|
||||
import org.apache.solr.client.solrj.io.eval.UniformDistributionEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.UuidEvaluator;
|
||||
import org.apache.solr.client.solrj.io.eval.*;
|
||||
import org.apache.solr.client.solrj.io.graph.GatherNodesStream;
|
||||
import org.apache.solr.client.solrj.io.graph.ShortestPathStream;
|
||||
import org.apache.solr.client.solrj.io.ops.ConcatOperation;
|
||||
|
@ -352,6 +275,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
.withFunctionName("ks", KolmogorovSmirnovEvaluator.class)
|
||||
.withFunctionName("asc", AscEvaluator.class)
|
||||
.withFunctionName("cumulativeProbability", CumulativeProbabilityEvaluator.class)
|
||||
.withFunctionName("ebeAdd", EBEAddEvaluator.class)
|
||||
.withFunctionName("ebeSubtract", EBESubtractEvaluator.class)
|
||||
.withFunctionName("ebeMultiply", EBEMultiplyEvaluator.class)
|
||||
.withFunctionName("ebeDivide", EBEDivideEvaluator.class)
|
||||
.withFunctionName("dotProduct", DotProductEvaluator.class)
|
||||
.withFunctionName("cosineSimilarity", CosineSimilarityEvaluator.class)
|
||||
|
||||
// Boolean Stream Evaluators
|
||||
.withFunctionName("and", AndEvaluator.class)
|
||||
|
|
|
@ -767,7 +767,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
|||
}
|
||||
|
||||
static private final class LookaheadSuccess extends java.lang.Error { }
|
||||
final private LookaheadSuccess jj_ls = new LookaheadSuccess();
|
||||
static final private LookaheadSuccess jj_ls = new LookaheadSuccess();
|
||||
private boolean jj_scan_token(int kind) {
|
||||
if (jj_scanpos == jj_lastpos) {
|
||||
jj_la--;
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.request.json;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/**
|
||||
* Convert json query object to local params.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
class JsonQueryConverter {
|
||||
private int numParams = 0;
|
||||
|
||||
String toLocalParams(Object jsonQueryObject, Map<String, String[]> additionalParams) {
|
||||
if (jsonQueryObject instanceof String) return jsonQueryObject.toString();
|
||||
StringBuilder builder = new StringBuilder();
|
||||
buildLocalParams(builder, jsonQueryObject, true, additionalParams);
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
private String putParam(String val, Map<String, String[]> additionalParams) {
|
||||
String name = "_tt"+(numParams++);
|
||||
additionalParams.put(name, new String[]{val});
|
||||
return name;
|
||||
}
|
||||
|
||||
private void buildLocalParams(StringBuilder builder, Object val, boolean isQParser, Map<String, String[]> additionalParams) {
|
||||
if (!isQParser && !(val instanceof Map)) {
|
||||
// val is value of a query parser, and it is not a map
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error when parsing json query, expect a json object here, but found : "+val);
|
||||
}
|
||||
if (val instanceof String) {
|
||||
builder.append('$').append(putParam(val.toString(), additionalParams));
|
||||
return;
|
||||
}
|
||||
if (val instanceof Number) {
|
||||
builder.append(val);
|
||||
return;
|
||||
}
|
||||
if (!(val instanceof Map)) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error when parsing json query, expect a json object here, but found : "+val);
|
||||
}
|
||||
|
||||
Map<String,Object> map = (Map<String, Object>) val;
|
||||
if (isQParser) {
|
||||
if (map.size() != 1) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error when parsing json query, expect only one query parser here, but found : "+map.keySet());
|
||||
}
|
||||
String qtype = map.keySet().iterator().next();
|
||||
Object subVal = map.get(qtype);
|
||||
|
||||
// We don't want to introduce unnecessary variable at root level
|
||||
boolean useSubBuilder = builder.length() > 0;
|
||||
StringBuilder subBuilder = builder;
|
||||
|
||||
if (useSubBuilder) subBuilder = new StringBuilder();
|
||||
|
||||
subBuilder = subBuilder.append("{!").append(qtype).append(' ');;
|
||||
buildLocalParams(subBuilder, subVal, false, additionalParams);
|
||||
subBuilder.append("}");
|
||||
|
||||
if (useSubBuilder) builder.append('$').append(putParam(subBuilder.toString(), additionalParams));
|
||||
} else {
|
||||
for (Map.Entry<String, Object> entry : map.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
if (entry.getValue() instanceof List) {
|
||||
if (key.equals("query")) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Error when parsing json query, value of query field should not be a list, found : " + entry.getValue());
|
||||
}
|
||||
List l = (List) entry.getValue();
|
||||
for (Object subVal : l) {
|
||||
builder.append(key).append("=");
|
||||
buildLocalParams(builder, subVal, true, additionalParams);
|
||||
builder.append(" ");
|
||||
}
|
||||
} else {
|
||||
if (key.equals("query")) {
|
||||
key = "v";
|
||||
}
|
||||
builder.append(key).append("=");
|
||||
buildLocalParams(builder, entry.getValue(), true, additionalParams);
|
||||
builder.append(" ");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -190,16 +190,20 @@ public class RequestUtil {
|
|||
}
|
||||
|
||||
// implement compat for existing components...
|
||||
JsonQueryConverter jsonQueryConverter = new JsonQueryConverter();
|
||||
if (json != null && !isShard) {
|
||||
for (Map.Entry<String,Object> entry : json.entrySet()) {
|
||||
String key = entry.getKey();
|
||||
String out = null;
|
||||
boolean isQuery = false;
|
||||
boolean arr = false;
|
||||
if ("query".equals(key)) {
|
||||
out = "q";
|
||||
isQuery = true;
|
||||
} else if ("filter".equals(key)) {
|
||||
out = "fq";
|
||||
arr = true;
|
||||
isQuery = true;
|
||||
} else if ("fields".equals(key)) {
|
||||
out = "fl";
|
||||
arr = true;
|
||||
|
@ -230,14 +234,14 @@ public class RequestUtil {
|
|||
if (lst != null) {
|
||||
for (int i = 0; i < jsonSize; i++) {
|
||||
Object v = lst.get(i);
|
||||
newval[existingSize + i] = v.toString();
|
||||
newval[existingSize + i] = isQuery ? jsonQueryConverter.toLocalParams(v, newMap) : v.toString();
|
||||
}
|
||||
} else {
|
||||
newval[newval.length-1] = val.toString();
|
||||
newval[newval.length-1] = isQuery ? jsonQueryConverter.toLocalParams(val, newMap) : val.toString();
|
||||
}
|
||||
newMap.put(out, newval);
|
||||
} else {
|
||||
newMap.put(out, new String[]{val.toString()});
|
||||
newMap.put(out, new String[]{isQuery ? jsonQueryConverter.toLocalParams(val, newMap) : val.toString()});
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.solr.util.DateMathParser;
|
|||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
/**
|
||||
* A field for indexed dates and date ranges. It's mostly compatible with TrieDateField. It has the potential to allow
|
||||
* A field for indexed dates and date ranges. It's mostly compatible with DatePointField. It has the potential to allow
|
||||
* efficient faceting, similar to facet.enum.
|
||||
*
|
||||
* @see NumberRangePrefixTreeStrategy
|
||||
|
@ -75,7 +75,7 @@ public class DateRangeField extends AbstractSpatialPrefixTreeFieldType<NumberRan
|
|||
if (shape instanceof UnitNRShape) {
|
||||
UnitNRShape unitShape = (UnitNRShape) shape;
|
||||
if (unitShape.getLevel() == tree.getMaxLevels()) {
|
||||
//fully precise date. We can be fully compatible with TrieDateField (incl. 'Z')
|
||||
//fully precise date. We can be fully compatible with DatePointField (incl. 'Z')
|
||||
return shape.toString() + 'Z';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
||||
/**
|
||||
* Create a boolean query from sub queries.
|
||||
* Sub queries can be marked as must, must_not, filter or should
|
||||
*
|
||||
* <p>Example: <code>{!bool should=title:lucene should=title:solr must_not=id:1}</code>
|
||||
*/
|
||||
public class BoolQParserPlugin extends QParserPlugin {
|
||||
public static final String NAME = "bool";
|
||||
|
||||
@Override
|
||||
public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
|
||||
return new QParser(qstr, localParams, params, req) {
|
||||
@Override
|
||||
public Query parse() throws SyntaxError {
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
SolrParams solrParams = SolrParams.wrapDefaults(localParams, params);
|
||||
addQueries(builder, solrParams.getParams("must"), BooleanClause.Occur.MUST);
|
||||
addQueries(builder, solrParams.getParams("must_not"), BooleanClause.Occur.MUST_NOT);
|
||||
addQueries(builder, solrParams.getParams("filter"), BooleanClause.Occur.FILTER);
|
||||
addQueries(builder, solrParams.getParams("should"), BooleanClause.Occur.SHOULD);
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
private void addQueries(BooleanQuery.Builder builder, String[] subQueries, BooleanClause.Occur occur) throws SyntaxError {
|
||||
if (subQueries != null) {
|
||||
for (String subQuery : subQueries) {
|
||||
builder.add(subQuery(subQuery, null).parse(), occur);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.facet.AggValueSource;
|
||||
import org.apache.solr.search.function.FieldNameValueSource;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -33,6 +34,7 @@ public class FunctionQParser extends QParser {
|
|||
|
||||
public static final int FLAG_CONSUME_DELIMITER = 0x01; // consume delimiter after parsing arg
|
||||
public static final int FLAG_IS_AGG = 0x02;
|
||||
public static final int FLAG_USE_FIELDNAME_SOURCE = 0x04; // When a field name is encountered, use the placeholder FieldNameValueSource instead of resolving to a real ValueSource
|
||||
public static final int FLAG_DEFAULT = FLAG_CONSUME_DELIMITER;
|
||||
|
||||
/** @lucene.internal */
|
||||
|
@ -374,8 +376,13 @@ public class FunctionQParser extends QParser {
|
|||
} else if ("false".equals(id)) {
|
||||
valueSource = new BoolConstValueSource(false);
|
||||
} else {
|
||||
SchemaField f = req.getSchema().getField(id);
|
||||
valueSource = f.getType().getValueSource(f, this);
|
||||
if ((flags & FLAG_USE_FIELDNAME_SOURCE) != 0) {
|
||||
// Don't try to create a ValueSource for the field, just use a placeholder.
|
||||
valueSource = new FieldNameValueSource(id);
|
||||
} else {
|
||||
SchemaField f = req.getSchema().getField(id);
|
||||
valueSource = f.getType().getValueSource(f, this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -81,6 +81,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
|
|||
map.put(SignificantTermsQParserPlugin.NAME, SignificantTermsQParserPlugin.class);
|
||||
map.put(PayloadScoreQParserPlugin.NAME, PayloadScoreQParserPlugin.class);
|
||||
map.put(PayloadCheckQParserPlugin.NAME, PayloadCheckQParserPlugin.class);
|
||||
map.put(BoolQParserPlugin.NAME, BoolQParserPlugin.class);
|
||||
|
||||
standardPlugins = Collections.unmodifiableMap(map);
|
||||
}
|
||||
|
|
|
@ -1017,14 +1017,14 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
|||
addParser("agg_min", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
return new MinMaxAgg("min", fp.parseValueSource());
|
||||
return new MinMaxAgg("min", fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||
}
|
||||
});
|
||||
|
||||
addParser("agg_max", new ValueSourceParser() {
|
||||
@Override
|
||||
public ValueSource parse(FunctionQParser fp) throws SyntaxError {
|
||||
return new MinMaxAgg("max", fp.parseValueSource());
|
||||
return new MinMaxAgg("max", fp.parseValueSource(FunctionQParser.FLAG_DEFAULT | FunctionQParser.FLAG_USE_FIELDNAME_SOURCE));
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.search.facet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
|
@ -25,9 +26,12 @@ import org.apache.lucene.index.OrdinalMap;
|
|||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.StrFieldSource;
|
||||
import org.apache.solr.search.function.FieldNameValueSource;
|
||||
|
||||
public class MinMaxAgg extends SimpleAggValueSource {
|
||||
final int minmax; // a multiplier to reverse the normal order of compare if this is max instead of min (i.e. max will be -1)
|
||||
|
@ -41,28 +45,46 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
|
||||
ValueSource vs = getArg();
|
||||
|
||||
if (vs instanceof StrFieldSource) {
|
||||
String field = ((StrFieldSource) vs).getField();
|
||||
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||
SchemaField sf = null;
|
||||
|
||||
if (vs instanceof FieldNameValueSource) {
|
||||
String field = ((FieldNameValueSource)vs).getFieldName();
|
||||
sf = fcontext.qcontext.searcher().getSchema().getField(field);
|
||||
|
||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||
if (sf.hasDocValues()) {
|
||||
// dv
|
||||
} else {
|
||||
// uif
|
||||
}
|
||||
vs = null;
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "min/max aggregations can't be used on multi-valued field " + field);
|
||||
} else {
|
||||
return new SingleValuedOrdAcc(fcontext, sf, numSlots);
|
||||
vs = sf.getType().getValueSource(sf, null);
|
||||
}
|
||||
}
|
||||
|
||||
if (vs instanceof StrFieldSource) {
|
||||
return new SingleValuedOrdAcc(fcontext, sf, numSlots);
|
||||
}
|
||||
|
||||
// Since functions don't currently have types, we rely on the type of the field
|
||||
if (sf != null && sf.getType().getNumberType() != null) {
|
||||
switch (sf.getType().getNumberType()) {
|
||||
case FLOAT:
|
||||
case DOUBLE:
|
||||
return new DFuncAcc(vs, fcontext, numSlots);
|
||||
case INTEGER:
|
||||
case LONG:
|
||||
return new LFuncAcc(vs, fcontext, numSlots);
|
||||
case DATE:
|
||||
return new DateFuncAcc(vs, fcontext, numSlots);
|
||||
}
|
||||
}
|
||||
|
||||
// numeric functions
|
||||
return new ValSlotAcc(vs, fcontext, numSlots);
|
||||
return new DFuncAcc(vs, fcontext, numSlots);
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetMerger createFacetMerger(Object prototype) {
|
||||
if (prototype instanceof Number)
|
||||
return new NumericMerger();
|
||||
if (prototype instanceof Double)
|
||||
return new NumericMerger(); // still use NumericMerger to handle NaN?
|
||||
else if (prototype instanceof Comparable) {
|
||||
return new ComparableMerger();
|
||||
} else {
|
||||
|
@ -114,8 +136,8 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
}
|
||||
}
|
||||
|
||||
class ValSlotAcc extends DoubleFuncSlotAcc {
|
||||
public ValSlotAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
||||
class DFuncAcc extends DoubleFuncSlotAcc {
|
||||
public DFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
||||
super(values, fcontext, numSlots, Double.NaN);
|
||||
}
|
||||
|
||||
|
@ -129,6 +151,101 @@ public class MinMaxAgg extends SimpleAggValueSource {
|
|||
result[slotNum] = val;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) {
|
||||
double val = result[slot];
|
||||
if (Double.isNaN(val)) {
|
||||
return null;
|
||||
} else {
|
||||
return val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class LFuncAcc extends LongFuncSlotAcc {
|
||||
FixedBitSet exists;
|
||||
public LFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
||||
super(values, fcontext, numSlots, 0);
|
||||
exists = new FixedBitSet(numSlots);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, int slotNum) throws IOException {
|
||||
long val = values.longVal(doc);
|
||||
if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query
|
||||
|
||||
long currVal = result[slotNum];
|
||||
if (currVal == 0 && !exists.get(slotNum)) {
|
||||
exists.set(slotNum);
|
||||
result[slotNum] = val;
|
||||
} else if (Long.compare(val, currVal) * minmax < 0) {
|
||||
result[slotNum] = val;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) {
|
||||
long val = result[slot];
|
||||
if (val == 0 && !exists.get(slot)) {
|
||||
return null;
|
||||
} else {
|
||||
return val;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resize(Resizer resizer) {
|
||||
super.resize(resizer);
|
||||
exists = resizer.resize(exists);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slotA, int slotB) {
|
||||
long a = result[slotA];
|
||||
long b = result[slotB];
|
||||
boolean ea = a != 0 || exists.get(slotA);
|
||||
boolean eb = b != 0 || exists.get(slotB);
|
||||
|
||||
if (ea != eb) {
|
||||
if (ea) return 1; // a exists and b doesn't TODO: we need context to be able to sort missing last! SOLR-10618
|
||||
if (eb) return -1; // b exists and a is missing
|
||||
}
|
||||
|
||||
return Long.compare(a, b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
super.reset();
|
||||
exists.clear(0, exists.length());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class DateFuncAcc extends LongFuncSlotAcc {
|
||||
private static final long MISSING = Long.MIN_VALUE;
|
||||
public DateFuncAcc(ValueSource values, FacetContext fcontext, int numSlots) {
|
||||
super(values, fcontext, numSlots, MISSING);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc, int slotNum) throws IOException {
|
||||
long val = values.longVal(doc);
|
||||
if (val == 0 && !values.exists(doc)) return; // depend on fact that non existing values return 0 for func query
|
||||
|
||||
long currVal = result[slotNum];
|
||||
if (Long.compare(val, currVal) * minmax < 0 || currVal == MISSING) {
|
||||
result[slotNum] = val;
|
||||
}
|
||||
}
|
||||
|
||||
// let compare be the default for now (since we can't yet correctly handle sortMissingLast
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) {
|
||||
return result[slot] == MISSING ? null : new Date(result[slot]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -16,14 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.search.facet;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Array;
|
||||
|
@ -32,6 +24,16 @@ import java.util.Arrays;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
||||
/**
|
||||
* Accumulates statistics separated by a slot number.
|
||||
* There is a separate statistic per slot. The slot is usually an ordinal into a set of values, e.g. tracking a count
|
||||
|
@ -140,6 +142,38 @@ public abstract class SlotAcc implements Closeable {
|
|||
return values;
|
||||
}
|
||||
|
||||
public long[] resize(long[] old, long defaultValue) {
|
||||
long[] values = new long[getNewSize()];
|
||||
if (defaultValue != 0) {
|
||||
Arrays.fill(values, 0, values.length, defaultValue);
|
||||
}
|
||||
for (int i = 0; i < old.length; i++) {
|
||||
long val = old[i];
|
||||
if (val != defaultValue) {
|
||||
int newSlot = getNewSlot(i);
|
||||
if (newSlot >= 0) {
|
||||
values[newSlot] = val;
|
||||
}
|
||||
}
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
||||
public FixedBitSet resize(FixedBitSet old) {
|
||||
FixedBitSet values = new FixedBitSet(getNewSize());
|
||||
int oldSize = old.length();
|
||||
|
||||
for(int oldSlot = 0;;) {
|
||||
oldSlot = values.nextSetBit(oldSlot);
|
||||
if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break;
|
||||
int newSlot = getNewSlot(oldSlot);
|
||||
values.set(newSlot);
|
||||
if (++oldSlot >= oldSize) break;
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
public <T> T[] resize(T[] old, T defaultValue) {
|
||||
T[] values = (T[]) Array.newInstance(old.getClass().getComponentType(), getNewSize());
|
||||
if (defaultValue != null) {
|
||||
|
@ -222,6 +256,40 @@ abstract class DoubleFuncSlotAcc extends FuncSlotAcc {
|
|||
}
|
||||
}
|
||||
|
||||
abstract class LongFuncSlotAcc extends FuncSlotAcc {
|
||||
long[] result;
|
||||
long initialValue;
|
||||
|
||||
public LongFuncSlotAcc(ValueSource values, FacetContext fcontext, int numSlots, long initialValue) {
|
||||
super(values, fcontext, numSlots);
|
||||
this.initialValue = initialValue;
|
||||
result = new long[numSlots];
|
||||
if (initialValue != 0) {
|
||||
reset();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slotA, int slotB) {
|
||||
return Long.compare(result[slotA], result[slotB]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getValue(int slot) {
|
||||
return result[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
Arrays.fill(result, initialValue);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resize(Resizer resizer) {
|
||||
result = resizer.resize(result, initialValue);
|
||||
}
|
||||
}
|
||||
|
||||
abstract class IntSlotAcc extends SlotAcc {
|
||||
int[] result; // use LongArray32
|
||||
int initialValue;
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
||||
/** Placeholder value source.
|
||||
* @lucene.internal */
|
||||
public class FieldNameValueSource extends ValueSource {
|
||||
private String fieldName;
|
||||
|
||||
public FieldNameValueSource(String fieldName) {
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
public String getFieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
throw new UnsupportedOperationException("FieldNameValueSource should not be directly used: " + this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return o instanceof FieldNameValueSource && fieldName.equals(((FieldNameValueSource)o).getFieldName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return fieldName.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "FIELDNAME(" + fieldName + ")";
|
||||
}
|
||||
}
|
|
@ -35,7 +35,7 @@ import static org.apache.solr.update.processor.FieldValueMutatingUpdateProcessor
|
|||
* <p>For example, with the configuration listed below any documents
|
||||
* containing String values (such as "<code>abcdef</code>" or
|
||||
* "<code>xyz</code>") in a field declared in the schema using
|
||||
* <code>TrieIntField</code> or <code>TrieLongField</code>
|
||||
* <code>IntPointField</code> or <code>LongPointField</code>
|
||||
* would have those Strings replaced with the length of those fields as an
|
||||
* Integer
|
||||
* (ie: <code>6</code> and <code>3</code> respectively)
|
||||
|
@ -43,8 +43,8 @@ import static org.apache.solr.update.processor.FieldValueMutatingUpdateProcessor
|
|||
* <pre class="prettyprint">
|
||||
* <processor class="solr.FieldLengthUpdateProcessorFactory">
|
||||
* <arr name="typeClass">
|
||||
* <str>solr.TrieIntField</str>
|
||||
* <str>solr.TrieLongField</str>
|
||||
* <str>solr.IntPointField</str>
|
||||
* <str>solr.LongPointField</str>
|
||||
* </arr>
|
||||
* </processor></pre>
|
||||
*/
|
||||
|
|
|
@ -79,7 +79,7 @@ import static org.apache.solr.update.processor.FieldMutatingUpdateProcessor.SELE
|
|||
* In the ExampleFieldMutatingUpdateProcessorFactory configured below,
|
||||
* fields will be mutated if the name starts with "foo" <i>or</i> "bar";
|
||||
* <b>unless</b> the field name contains the substring "SKIP" <i>or</i>
|
||||
* the fieldType is (or subclasses) TrieDateField. Meaning a field named
|
||||
* the fieldType is (or subclasses) DatePointField. Meaning a field named
|
||||
* "foo_SKIP" is guaranteed not to be selected, but a field named "bar_smith"
|
||||
* that uses StrField will be selected.
|
||||
* </p>
|
||||
|
@ -92,7 +92,7 @@ import static org.apache.solr.update.processor.FieldMutatingUpdateProcessor.SELE
|
|||
* <str name="fieldRegex">.*SKIP.*</str>
|
||||
* </lst>
|
||||
* <lst name="exclude">
|
||||
* <str name="typeClass">solr.TrieDateField</str>
|
||||
* <str name="typeClass">solr.DatePointField</str>
|
||||
* </lst>
|
||||
* </processor></pre>
|
||||
*
|
||||
|
|
|
@ -47,8 +47,8 @@ import org.slf4j.LoggerFactory;
|
|||
* </p>
|
||||
* <p>
|
||||
* The default selection behavior is to mutate both those fields that don't match
|
||||
* a schema field, as well as those fields that match a schema field with a field
|
||||
* type that uses class solr.TrieDateField.
|
||||
* a schema field, as well as those fields that match a schema field with a date
|
||||
* field type.
|
||||
* </p>
|
||||
* <p>
|
||||
* If all values are parseable as dates (or are already Date), then the field will
|
||||
|
|
|
@ -38,8 +38,8 @@ import java.util.Locale;
|
|||
* </p>
|
||||
* <p>
|
||||
* The default selection behavior is to mutate both those fields that don't match
|
||||
* a schema field, as well as those fields that match a schema field with a field
|
||||
* type that uses class solr.TrieDoubleField.
|
||||
* a schema field, as well as those fields that match a schema field with a double
|
||||
* field type.
|
||||
* </p>
|
||||
* <p>
|
||||
* If all values are parseable as double (or are already Double), then the field
|
||||
|
|
|
@ -38,8 +38,8 @@ import java.util.Locale;
|
|||
* </p>
|
||||
* <p>
|
||||
* The default selection behavior is to mutate both those fields that don't match
|
||||
* a schema field, as well as those fields that match a schema field with a field
|
||||
* type that uses class solr.TrieFloatField.
|
||||
* a schema field, as well as those fields that match a schema field with a float
|
||||
* field type.
|
||||
* </p>
|
||||
* <p>
|
||||
* If all values are parseable as float (or are already Float), then the field
|
||||
|
|
|
@ -35,8 +35,8 @@ import java.util.Locale;
|
|||
* </p>
|
||||
* <p>
|
||||
* The default selection behavior is to mutate both those fields that don't match
|
||||
* a schema field, as well as those fields that match a schema field with a field
|
||||
* type that uses class solr.TrieIntField.
|
||||
* a schema field, as well as those fields that match a schema field with an int
|
||||
* field type.
|
||||
* </p>
|
||||
* <p>
|
||||
* If all values are parseable as int (or are already Integer), then the field
|
||||
|
|
|
@ -35,8 +35,8 @@ import java.util.Locale;
|
|||
* </p>
|
||||
* <p>
|
||||
* The default selection behavior is to mutate both those fields that don't match
|
||||
* a schema field, as well as those fields that match a schema field with a field
|
||||
* type that uses class solr.TrieLongField.
|
||||
* a schema field, as well as those fields that match a schema field with a long
|
||||
* field type.
|
||||
* </p>
|
||||
* <p>
|
||||
* If all values are parseable as long (or are already Long), then the field
|
||||
|
|
|
@ -869,7 +869,7 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 {
|
|||
|
||||
// testing everything from query level is hard because
|
||||
// time marches on ... and there is no easy way to reach into the
|
||||
// bowels of TrieDateField and muck with the definition of "now"
|
||||
// bowels of DatePointField and muck with the definition of "now"
|
||||
// ...
|
||||
// BUT: we can test that crazy combinations of "NOW" all work correctly,
|
||||
// assuming the test doesn't take too long to run...
|
||||
|
|
|
@ -30,10 +30,10 @@ public class TestDistributedMissingSort extends BaseDistributedSearchTestCase {
|
|||
schemaString = "schema-distributed-missing-sort.xml";
|
||||
}
|
||||
|
||||
String sint1_ml = "one_i1_ml"; // TrieIntField, sortMissingLast=true, multiValued=false
|
||||
String sint1_mf = "two_i1_mf"; // TrieIntField, sortMissingFirst=true, multiValued=false
|
||||
String long1_ml = "three_l1_ml"; // TrieLongField, sortMissingLast=true, multiValued=false
|
||||
String long1_mf = "four_l1_mf"; // TrieLongField, sortMissingFirst=true, multiValued=false
|
||||
String sint1_ml = "one_i1_ml"; // int field, sortMissingLast=true, multiValued=false
|
||||
String sint1_mf = "two_i1_mf"; // int field, sortMissingFirst=true, multiValued=false
|
||||
String long1_ml = "three_l1_ml"; // long field, sortMissingLast=true, multiValued=false
|
||||
String long1_mf = "four_l1_mf"; // long field, sortMissingFirst=true, multiValued=false
|
||||
String string1_ml = "five_s1_ml"; // StringField, sortMissingLast=true, multiValued=false
|
||||
String string1_mf = "six_s1_mf"; // StringField, sortMissingFirst=true, multiValued=false
|
||||
|
||||
|
|
|
@ -239,6 +239,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-11278")
|
||||
public void testBootstrapWithContinousIndexingOnSourceCluster() throws Exception {
|
||||
// start the target first so that we know its zkhost
|
||||
MiniSolrCloudCluster target = new MiniSolrCloudCluster(1, createTempDir("cdcr-target"), buildJettyConfig("/solr"));
|
||||
|
|
|
@ -781,7 +781,6 @@ public class TestLazyCores extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-10101")
|
||||
// Insure that when a core is aged out of the transient cache, any uncommitted docs are preserved.
|
||||
// Note, this needs FS-based indexes to persist!
|
||||
// Cores 2, 3, 6, 7, 8, 9 are transient
|
||||
|
@ -814,7 +813,8 @@ public class TestLazyCores extends SolrTestCaseJ4 {
|
|||
openCores.clear();
|
||||
|
||||
// We still should have 6, 7, 8, 9 loaded, their reference counts have NOT dropped to zero
|
||||
checkInCores(cc, "collection6", "collection7", "collection8", "collection9");
|
||||
checkInCores(cc, "collection1", "collection5",
|
||||
"collection6", "collection7", "collection8", "collection9");
|
||||
|
||||
for (String coreName : coreList) {
|
||||
// The point of this test is to insure that when cores are aged out and re-opened
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.solr.common.cloud.Replica;
|
|||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
|
|
@ -230,15 +230,15 @@ public class CursorMarkTest extends SolrTestCaseJ4 {
|
|||
random().nextBytes(randBytes);
|
||||
val = new BytesRef(randBytes);
|
||||
} else if (fieldName.contains("int")) {
|
||||
val = random().nextInt(); // TrieIntField
|
||||
val = random().nextInt();
|
||||
} else if (fieldName.contains("long")) {
|
||||
val = random().nextLong(); // TrieLongField
|
||||
val = random().nextLong();
|
||||
} else if (fieldName.contains("float")) {
|
||||
val = random().nextFloat() * random().nextInt(); // TrieFloatField
|
||||
val = random().nextFloat() * random().nextInt();
|
||||
} else if (fieldName.contains("double")) {
|
||||
val = random().nextDouble() * random().nextInt(); // TrieDoubleField
|
||||
val = random().nextDouble() * random().nextInt();
|
||||
} else if (fieldName.contains("date")) {
|
||||
val = random().nextLong(); // TrieDateField
|
||||
val = random().nextLong();
|
||||
} else if (fieldName.startsWith("currency")) {
|
||||
val = random().nextDouble();
|
||||
} else if (fieldName.startsWith("uuid")) {
|
||||
|
|
|
@ -1190,6 +1190,27 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
public void testBoolQuery() throws Exception {
|
||||
assertQueryEquals("bool",
|
||||
"{!bool must='{!lucene}foo_s:a' must='{!lucene}foo_s:b'}",
|
||||
"{!bool must='{!lucene}foo_s:b' must='{!lucene}foo_s:a'}");
|
||||
assertQueryEquals("bool",
|
||||
"{!bool must_not='{!lucene}foo_s:a' should='{!lucene}foo_s:b' " +
|
||||
"must='{!lucene}foo_s:c' filter='{!lucene}foo_s:d' filter='{!lucene}foo_s:e'}",
|
||||
"{!bool must='{!lucene}foo_s:c' filter='{!lucene}foo_s:d' " +
|
||||
"must_not='{!lucene}foo_s:a' should='{!lucene}foo_s:b' filter='{!lucene}foo_s:e'}");
|
||||
try {
|
||||
assertQueryEquals
|
||||
("bool"
|
||||
, "{!bool must='{!lucene}foo_s:a'}"
|
||||
, "{!bool should='{!lucene}foo_s:a'}"
|
||||
);
|
||||
fail("queries should not have been equal");
|
||||
} catch(AssertionFailedError e) {
|
||||
assertTrue("queries were not equal, as expected", true);
|
||||
}
|
||||
}
|
||||
|
||||
// Override req to add df param
|
||||
public static SolrQueryRequest req(String... q) {
|
||||
return SolrTestCaseJ4.req(q, "df", "text");
|
||||
|
|
|
@ -81,7 +81,7 @@ public class TestSmileRequest extends SolrTestCaseJ4 {
|
|||
}
|
||||
};
|
||||
client.queryDefaults().set("shards", servers.getShards());
|
||||
TestJsonRequest.doJsonRequest(client);
|
||||
TestJsonRequest.doJsonRequest(client, true);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -278,7 +278,7 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
|||
q = qParser.getQuery();
|
||||
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
|
||||
|
||||
// large numeric filter query should use TermsQuery (for trie fields)
|
||||
// large numeric filter query should use TermsQuery
|
||||
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
|
||||
qParser.setIsFilter(true); // this may change in the future
|
||||
qParser.setParams(params);
|
||||
|
|
|
@ -467,28 +467,29 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
|
||||
|
||||
// single valued strings
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") );
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_s", "cat_s","cat_s", "where_s","where_s", "num_d","num_d", "num_i","num_i", "num_l","long_l", "super_s","super_s", "val_b","val_b", "date","date_dt", "sparse_s","sparse_s" ,"multi_ss","multi_ss") );
|
||||
|
||||
// multi-valued strings, long/float substitute for int/double
|
||||
doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
|
||||
doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_f", "num_i","num_l", "num_l","long_l", "num_is","num_ls", "num_fs", "num_ds", "super_s","super_ss", "val_b","val_b", "date","date_dt", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
|
||||
|
||||
// multi-valued strings, method=dv for terms facets
|
||||
doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
|
||||
doStatsTemplated(client, params(p, "terms_method", "method:dv,", "rows", "0", "noexist", "noexist_ss", "cat_s", "cat_ss", "where_s", "where_ss", "num_d", "num_f", "num_i", "num_l", "num_l","long_l","super_s", "super_ss", "val_b", "val_b", "date", "date_dt", "sparse_s", "sparse_ss", "multi_ss", "multi_ss"));
|
||||
|
||||
// single valued docvalues for strings, and single valued numeric doc values for numeric fields
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "num_is","num_lds", "num_l","long_ld", "num_fs","num_dds", "super_s","super_sd", "val_b","val_b", "date","date_dtd", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
|
||||
|
||||
// multi-valued docvalues
|
||||
FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
|
||||
|
||||
// multi-valued docvalues
|
||||
FacetFieldProcessorByArrayDV.unwrap_singleValued_multiDv = true;
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
|
||||
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "num_is","num_ids", "num_l","long_ld", "num_fs","num_fds", "super_s","super_sds", "val_b","val_b", "date","date_dtds", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
|
||||
}
|
||||
|
||||
public static void doStatsTemplated(Client client, ModifiableSolrParams p) throws Exception {
|
||||
p.set("Z_num_i", "Z_" + p.get("num_i") );
|
||||
p.set("Z_num_l", "Z_" + p.get("num_l") );
|
||||
p.set("sparse_num_d", "sparse_" + p.get("num_d") );
|
||||
if (p.get("num_is") == null) p.add("num_is","num_is");
|
||||
if (p.get("num_fs") == null) p.add("num_fs","num_fs");
|
||||
|
@ -528,6 +529,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
String num_is = m.expand("${num_is}");
|
||||
String num_fs = m.expand("${num_fs}");
|
||||
String Z_num_i = m.expand("${Z_num_i}");
|
||||
String Z_num_l = m.expand("${Z_num_l}");
|
||||
String val_b = m.expand("${val_b}");
|
||||
String date = m.expand("${date}");
|
||||
String super_s = m.expand("${super_s}");
|
||||
|
@ -553,13 +555,13 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
iclient.add(doc, null);
|
||||
iclient.add(doc, null);
|
||||
iclient.add(doc, null); // a couple of deleted docs
|
||||
iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0"), null);
|
||||
iclient.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", num_is,"3",num_is,"-1", num_fs,"3",num_fs,"-1.5", super_s,"superman", date,"2002-02-02T02:02:02Z", val_b, "false" , multi_ss,"a", multi_ss,"b" , Z_num_i, "0", Z_num_l,"0"), null);
|
||||
iclient.add(sdoc("id", "3"), null);
|
||||
iclient.commit();
|
||||
iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE), null);
|
||||
iclient.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", sparse_num_d,"-4",num_i, "3", num_is,"0",num_is,"3", num_fs,"0", num_fs,"3", super_s,"spiderman", date,"2003-03-03T03:03:03Z" , multi_ss, "b", Z_num_i, ""+Integer.MIN_VALUE, Z_num_l,Long.MIN_VALUE), null);
|
||||
iclient.add(sdoc("id", "5", cat_s, "B", where_s, "NJ", num_d, "11", num_i, "7", num_is,"0", num_fs,"0", super_s,"batman" , date,"2001-02-03T01:02:03Z" ,sparse_s,"two", multi_ss, "a"), null);
|
||||
iclient.commit();
|
||||
iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE), null);
|
||||
iclient.add(sdoc("id", "6", cat_s, "B", where_s, "NY", num_d, "-5", num_i, "-5", num_is,"-1", num_fs,"-1.5", super_s,"hulk" , date,"2002-03-01T03:02:01Z" , multi_ss, "b", multi_ss, "a", Z_num_i, ""+Integer.MAX_VALUE, Z_num_l,Long.MAX_VALUE), null);
|
||||
iclient.commit();
|
||||
client.commit();
|
||||
|
||||
|
@ -685,6 +687,35 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
", f2:{ 'buckets':[{ val:'B', count:3, n1:-2.0}, { val:'A', count:2, n1:6.0 }]} }"
|
||||
);
|
||||
|
||||
// facet on numbers to test resize from hashing (may need to be sorting by the metric to test that)
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" f1:{${terms} type:field, field:${num_is}, facet:{a:'min(${num_i})'}, sort:'a asc' }" +
|
||||
",f2:{${terms} type:field, field:${num_is}, facet:{a:'max(${num_i})'}, sort:'a desc' }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={count:6 " +
|
||||
",f1:{ buckets:[{val:-1,count:2,a:-5},{val:3,count:2,a:-5},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:0,count:2,a:3} ] } " +
|
||||
",f2:{ buckets:[{val:0,count:2,a:7},{val:3,count:2,a:3},{val:-5,count:1,a:2},{val:2,count:1,a:2},{val:-1,count:2,a:-5} ] } " +
|
||||
"}"
|
||||
);
|
||||
|
||||
|
||||
// Same thing for dates
|
||||
// test min/max of string field
|
||||
if (date.equals("date_dt") || date.equals("date_dtd")) { // supports only single valued currently...
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" f3:{${terms} type:field, field:${num_is}, facet:{a:'min(${date})'}, sort:'a desc' }" +
|
||||
",f4:{${terms} type:field, field:${num_is}, facet:{a:'max(${date})'}, sort:'a asc' }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={count:6 " +
|
||||
",f3:{ buckets:[{val:-1,count:2,a:'2002-02-02T02:02:02Z'},{val:3,count:2,a:'2002-02-02T02:02:02Z'},{val:0,count:2,a:'2001-02-03T01:02:03Z'},{val:-5,count:1,a:'2001-01-01T01:01:01Z'},{val:2,count:1,a:'2001-01-01T01:01:01Z'} ] } " +
|
||||
",f4:{ buckets:[{val:-5,count:1,a:'2001-01-01T01:01:01Z'},{val:2,count:1,a:'2001-01-01T01:01:01Z'},{val:-1,count:2,a:'2002-03-01T03:02:01Z'},{val:0,count:2,a:'2003-03-03T03:03:03Z'},{val:3,count:2,a:'2003-03-03T03:03:03Z'} ] } " +
|
||||
"}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
// percentiles 0,10,50,90,100
|
||||
|
@ -983,16 +1014,20 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
|
||||
// stats at top level
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', min1:'min(${num_d})', max1:'max(${num_d})'" +
|
||||
, "json.facet", "{ sum1:'sum(${num_d})', sumsq1:'sumsq(${num_d})', avg1:'avg(${num_d})', avg2:'avg(def(${num_d},0))', mind:'min(${num_d})', maxd:'max(${num_d})'" +
|
||||
", numwhere:'unique(${where_s})', unique_num_i:'unique(${num_i})', unique_num_d:'unique(${num_d})', unique_date:'unique(${date})'" +
|
||||
", where_hll:'hll(${where_s})', hll_num_i:'hll(${num_i})', hll_num_d:'hll(${num_d})', hll_date:'hll(${date})'" +
|
||||
", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})' }"
|
||||
", med:'percentile(${num_d},50)', perc:'percentile(${num_d},0,50.0,100)', variance:'variance(${num_d})', stddev:'stddev(${num_d})'" +
|
||||
", mini:'min(${num_i})', maxi:'max(${num_i})'" +
|
||||
" }"
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
"sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, min1:-9.0, max1:11.0" +
|
||||
"sum1:3.0, sumsq1:247.0, avg1:0.6, avg2:0.5, mind:-9.0, maxd:11.0" +
|
||||
", numwhere:2, unique_num_i:4, unique_num_d:5, unique_date:5" +
|
||||
", where_hll:2, hll_num_i:4, hll_num_d:5, hll_date:5" +
|
||||
", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639}"
|
||||
", med:2.0, perc:[-9.0,2.0,11.0], variance:49.04, stddev:7.002856560004639" +
|
||||
", mini:-5, maxi:7" +
|
||||
"}"
|
||||
);
|
||||
|
||||
// stats at top level, no matches
|
||||
|
@ -1019,8 +1054,8 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
",sum1:0.0," +
|
||||
" sumsq1:0.0," +
|
||||
" avg1:0.0," + // TODO: undesirable. omit?
|
||||
" min1:'NaN'," + // TODO: undesirable. omit?
|
||||
" max1:'NaN'," +
|
||||
// " min1:'NaN'," +
|
||||
// " max1:'NaN'," +
|
||||
" numwhere:0," +
|
||||
" unique_num_i:0," +
|
||||
" unique_num_d:0," +
|
||||
|
@ -1307,16 +1342,26 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
"}"
|
||||
);
|
||||
|
||||
// test 0, min/max int
|
||||
// test 0, min/max int/long
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{" +
|
||||
" u : 'unique(${Z_num_i})'" +
|
||||
" u : 'unique(${Z_num_i})'" +
|
||||
", u2 : 'unique(${Z_num_l})'" +
|
||||
", min1 : 'min(${Z_num_i})', max1 : 'max(${Z_num_i})'" +
|
||||
", min2 : 'min(${Z_num_l})', max2 : 'max(${Z_num_l})'" +
|
||||
", f1:{${terms} type:field, field:${Z_num_i} }" +
|
||||
", f2:{${terms} type:field, field:${Z_num_l} }" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={count:6 " +
|
||||
",u:3" +
|
||||
",u2:3" +
|
||||
",min1:" + Integer.MIN_VALUE +
|
||||
",max1:" + Integer.MAX_VALUE +
|
||||
",min2:" + Long.MIN_VALUE +
|
||||
",max2:" + Long.MAX_VALUE +
|
||||
",f1:{ buckets:[{val:" + Integer.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Integer.MAX_VALUE+",count:1}]} " +
|
||||
",f2:{ buckets:[{val:" + Long.MIN_VALUE + ",count:1},{val:0,count:1},{val:" + Long.MAX_VALUE+",count:1}]} " +
|
||||
"}"
|
||||
);
|
||||
|
||||
|
@ -1394,11 +1439,12 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
// test acc reuse (i.e. reset() method). This is normally used for stats that are not calculated in the first phase,
|
||||
// currently non-sorting stats.
|
||||
client.testJQ(params(p, "q", "*:*"
|
||||
, "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}"
|
||||
, "json.facet", "{f1:{type:terms, field:'${cat_s}', facet:{h:'hll(${where_s})' , u:'unique(${where_s})', mind:'min(${num_d})', maxd:'max(${num_d})', mini:'min(${num_i})', maxi:'max(${num_i})'" +
|
||||
", sumd:'sum(${num_d})', avgd:'avg(${num_d})', variance:'variance(${num_d})', stddev:'stddev(${num_d})' } }}"
|
||||
)
|
||||
, "facets=={ 'count':6, " +
|
||||
"'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," +
|
||||
" {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } "
|
||||
"'f1':{ buckets:[{val:B, count:3, h:2, u:2, mind:-9.0, maxd:11.0, mini:-5, maxi:7, sumd:-3.0, avgd:-1.0, variance:74.66666666666667, stddev:8.640987597877148}," +
|
||||
" {val:A, count:2, h:2, u:2, mind:2.0, maxd:4.0, mini:2, maxi:3, sumd:6.0, avgd:3.0, variance:1.0, stddev:1.0}] } } "
|
||||
|
||||
);
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ public class TestJsonRequest extends SolrTestCaseHS {
|
|||
|
||||
@Test
|
||||
public void testLocalJsonRequest() throws Exception {
|
||||
doJsonRequest(Client.localClient);
|
||||
doJsonRequest(Client.localClient, false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -62,11 +62,10 @@ public class TestJsonRequest extends SolrTestCaseHS {
|
|||
initServers();
|
||||
Client client = servers.getClient( random().nextInt() );
|
||||
client.queryDefaults().set( "shards", servers.getShards() );
|
||||
doJsonRequest(client);
|
||||
doJsonRequest(client, true);
|
||||
}
|
||||
|
||||
|
||||
public static void doJsonRequest(Client client) throws Exception {
|
||||
public static void doJsonRequest(Client client, boolean isDistrib) throws Exception {
|
||||
client.deleteByQuery("*:*", null);
|
||||
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY"), null);
|
||||
client.add(sdoc("id", "2", "cat_s", "B", "where_s", "NJ"), null);
|
||||
|
@ -217,6 +216,178 @@ public class TestJsonRequest extends SolrTestCaseHS {
|
|||
, "debug/json=={query:'cat_s:A', filter:'where_s:NY'}"
|
||||
);
|
||||
|
||||
// test query dsl
|
||||
client.testJQ( params("json", "{'query':'{!lucene}id:1'}")
|
||||
, "response/numFound==1"
|
||||
);
|
||||
|
||||
client.testJQ( params("json", "{" +
|
||||
" 'query': {" +
|
||||
" 'bool' : {" +
|
||||
" 'should' : [" +
|
||||
" {'lucene' : {'query' : 'id:1'}}," +
|
||||
" 'id:2'" +
|
||||
" ]" +
|
||||
" }" +
|
||||
" }" +
|
||||
"}")
|
||||
, "response/numFound==2"
|
||||
);
|
||||
|
||||
client.testJQ( params("json", "{" +
|
||||
" 'query': {" +
|
||||
" 'bool' : {" +
|
||||
" 'should' : [" +
|
||||
" 'id:1'," +
|
||||
" 'id:2'" +
|
||||
" ]" +
|
||||
" }" +
|
||||
" }" +
|
||||
"}")
|
||||
, "response/numFound==2"
|
||||
);
|
||||
|
||||
client.testJQ( params("json", "{ " +
|
||||
" query : {" +
|
||||
" boost : {" +
|
||||
" query : {" +
|
||||
" lucene : { " +
|
||||
" df : cat_s, " +
|
||||
" query : A " +
|
||||
" }" +
|
||||
" }, " +
|
||||
" b : 1.5 " +
|
||||
" } " +
|
||||
" } " +
|
||||
"}")
|
||||
, "response/numFound==2"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{ " +
|
||||
" query : {" +
|
||||
" bool : {" +
|
||||
" must : {" +
|
||||
" lucene : {" +
|
||||
" q.op : AND," +
|
||||
" df : cat_s," +
|
||||
" query : A" +
|
||||
" }" +
|
||||
" }" +
|
||||
" must_not : {lucene : {query:'id: 1'}}" +
|
||||
" }" +
|
||||
" }" +
|
||||
"}")
|
||||
, "response/numFound==1"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{ " +
|
||||
" query : {" +
|
||||
" bool : {" +
|
||||
" must : {" +
|
||||
" lucene : {" +
|
||||
" q.op : AND," +
|
||||
" df : cat_s," +
|
||||
" query : A" +
|
||||
" }" +
|
||||
" }" +
|
||||
" must_not : [{lucene : {query:'id: 1'}}]" +
|
||||
" }" +
|
||||
" }" +
|
||||
"}")
|
||||
, "response/numFound==1"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{ " +
|
||||
" query : {" +
|
||||
" bool : {" +
|
||||
" must : '{!lucene q.op=AND df=cat_s}A'" +
|
||||
" must_not : '{!lucene v=\\'id:1\\'}'" +
|
||||
" }" +
|
||||
" }" +
|
||||
"}")
|
||||
, "response/numFound==1"
|
||||
);
|
||||
|
||||
|
||||
client.testJQ( params("json","{" +
|
||||
" query : '*:*'," +
|
||||
" filter : {" +
|
||||
" collapse : {" +
|
||||
" field : cat_s" +
|
||||
" } " +
|
||||
" } " +
|
||||
"}")
|
||||
, isDistrib ? "" : "response/numFound==2"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{" +
|
||||
" query : {" +
|
||||
" edismax : {" +
|
||||
" query : 'A'," +
|
||||
" qf : 'cat_s'," +
|
||||
" bq : {" +
|
||||
" edismax : {" +
|
||||
" query : 'NJ'" +
|
||||
" qf : 'where_s'" +
|
||||
" }" +
|
||||
" }" +
|
||||
" }" +
|
||||
" }, " +
|
||||
" fields : id" +
|
||||
"}")
|
||||
, "response/numFound==2", isDistrib? "" : "response/docs==[{id:'4'},{id:'1'}]"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{" +
|
||||
" query : {" +
|
||||
" edismax : {" +
|
||||
" query : 'A'," +
|
||||
" qf : 'cat_s'," +
|
||||
" bq : {" +
|
||||
" edismax : {" +
|
||||
" query : 'NY'" +
|
||||
" qf : 'where_s'" +
|
||||
" }" +
|
||||
" }" +
|
||||
" }" +
|
||||
" }, " +
|
||||
" fields : id" +
|
||||
"}")
|
||||
, "response/numFound==2", isDistrib? "" : "response/docs==[{id:'1'},{id:'4'}]"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{" +
|
||||
" query : {" +
|
||||
" dismax : {" +
|
||||
" query : 'A NJ'" +
|
||||
" qf : 'cat_s^0.1 where_s^100'" +
|
||||
" } " +
|
||||
" }, " +
|
||||
" filter : '-id:2'," +
|
||||
" fields : id" +
|
||||
"}")
|
||||
, "response/numFound==3", isDistrib? "" : "response/docs==[{id:'4'},{id:'5'},{id:'1'}]"
|
||||
);
|
||||
|
||||
client.testJQ( params("json","{" +
|
||||
" query : {" +
|
||||
" dismax : {" +
|
||||
" query : 'A NJ'" +
|
||||
" qf : ['cat_s^100', 'where_s^0.1']" +
|
||||
" } " +
|
||||
" }, " +
|
||||
" filter : '-id:2'," +
|
||||
" fields : id" +
|
||||
"}")
|
||||
, "response/numFound==3", isDistrib? "" : "response/docs==[{id:'4'},{id:'1'},{id:'5'}]"
|
||||
);
|
||||
|
||||
try {
|
||||
client.testJQ(params("json", "{query:{'lucene':'id:1'}}"));
|
||||
fail();
|
||||
} catch (Exception e) {
|
||||
assertTrue(e.getMessage().contains("id:1"));
|
||||
}
|
||||
|
||||
try {
|
||||
// test failure on unknown parameter
|
||||
|
|
|
@ -152,12 +152,6 @@ public class SoftAutoCommitTest extends AbstractSolrTestCase {
|
|||
minHardCommitNanos + "ns",
|
||||
minHardCommitNanos < firstHardNanos);
|
||||
|
||||
final Long firstSearcherNanos = monitor.searcher.poll(5000, MILLISECONDS);
|
||||
assertNotNull("didn't get a single new searcher", firstSearcherNanos);
|
||||
for (int i = 0; i <= softCommitMaxDocs; i++) {
|
||||
assertQ("should find one", req("id:"+(8000 + i)) ,"//result[@numFound=1]" );
|
||||
}
|
||||
|
||||
// wait a bit, w/o other action we shouldn't see any new hard/soft commits
|
||||
assertNull("Got a hard commit we weren't expecting",
|
||||
monitor.hard.poll(1000, MILLISECONDS));
|
||||
|
|
|
@ -218,7 +218,7 @@
|
|||
<dynamicField name="*_ds" type="pdouble" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
||||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false" />
|
||||
|
||||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/>
|
||||
|
|
|
@ -154,7 +154,7 @@
|
|||
<dynamicField name="*_ds" type="pdouble" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
||||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false" />
|
||||
|
||||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/>
|
||||
|
|
|
@ -218,7 +218,7 @@
|
|||
<dynamicField name="*_ds" type="pdouble" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Type used to index the lat and lon components for the "location" FieldType -->
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false" />
|
||||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false" />
|
||||
|
||||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dts" type="pdate" indexed="true" stored="true" multiValued="true"/>
|
||||
|
|
|
@ -442,7 +442,7 @@
|
|||
<dynamicField name="*_descendent_path" type="descendent_path" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ancestor_path" type="ancestor_path" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_txt_en_split" type="text_en_splitting" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_coordinate" type="pdouble" indexed="true" stored="false"/>
|
||||
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
|
||||
<dynamicField name="*_txt_rev" type="text_general_rev" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_phon_en" type="phonetic_en" indexed="true" stored="true"/>
|
||||
|
@ -482,7 +482,7 @@
|
|||
<dynamicField name="*_point" type="point" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_srpt" type="location_rpt" indexed="true" stored="true"/>
|
||||
<dynamicField name="attr_*" type="text_general" multiValued="true" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_l_ns" type="long" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_l_ns" type="plong" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_s_ns" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dts" type="pdate" multiValued="true" indexed="true" stored="true"/>
|
||||
|
@ -493,10 +493,6 @@
|
|||
<dynamicField name="*_fs" type="pfloats" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ds" type="pdoubles" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ti" type="tint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tl" type="tlong" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tf" type="tfloat" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ws" type="text_ws" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_i" type="pint" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
|
|
|
@ -27,7 +27,7 @@ curl http://localhost:8983/solr/films/schema -X POST -H 'Content-type:applicatio
|
|||
},
|
||||
"add-field" : {
|
||||
"name":"initial_release_date",
|
||||
"type":"tdate",
|
||||
"type":"pdate",
|
||||
"stored":true
|
||||
}
|
||||
}'
|
||||
|
@ -83,7 +83,7 @@ FAQ:
|
|||
Why override the schema of the _name_ and _initial_release_date_ fields?
|
||||
|
||||
Without overriding those field types, the _name_ field would have been guessed as a multi-valued string field type
|
||||
and _initial_release_date_ would have been guessed as a multi-valued tdate type. It makes more sense with this
|
||||
and _initial_release_date_ would have been guessed as a multi-valued pdate type. It makes more sense with this
|
||||
particular data set domain to have the movie name be a single valued general full-text searchable field,
|
||||
and for the release date also to be single valued.
|
||||
|
||||
|
@ -109,7 +109,7 @@ curl http://localhost:8983/solr/films/schema -X POST -H 'Content-type:applicatio
|
|||
},
|
||||
"add-field" : {
|
||||
"name":"initial_release_date",
|
||||
"type":"tdate",
|
||||
"type":"pdate",
|
||||
"stored":true
|
||||
}
|
||||
}'
|
||||
|
|
|
@ -1 +1 @@
|
|||
889fd6d061bb63b99dd5c6aba35a555ae863de52
|
||||
889fd6d061bb63b99dd5c6aba35a555ae863de52
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
b8f91682cfeb8f9196aad56ace9c9a13330acef6
|
|
@ -0,0 +1 @@
|
|||
c3809c2c2bc135109a7e1e231463da303299b5dd
|
|
@ -1 +0,0 @@
|
|||
91f3284993b44dcb2f003b5f28617abba13971d2
|
|
@ -0,0 +1 @@
|
|||
053da66a10597283d48266d1f09d572f8608ae3f
|
|
@ -18,26 +18,40 @@
|
|||
|
||||
This is the source for the Solr Reference Guide.
|
||||
|
||||
Raw content is stored in Asciidoc (`.adoc`) formated files in the `src/` directory.
|
||||
Raw content is stored in Asciidoc (`.adoc`) formatted files in the `src/` directory.
|
||||
|
||||
== Prerequisites for Building
|
||||
These files are processed with AsciiDoctor in 2 different ways:
|
||||
|
||||
* Via 'Jekyll' to build an HTML browsable version of the Ref Guide
|
||||
** Prerequisites: `Ruby` and the following gems must be installed:
|
||||
*** `jekyll`
|
||||
*** `jekyll-asciidoc`
|
||||
*** `pygments.rb`
|
||||
* Via `asciidoctor-ant` to build the officially released PDF version of the Ref Guide
|
||||
** Prerequisites: None (except for those required to use the Lucene/Solr build: Java, Ant)
|
||||
* Via Jekyll to build an HTML browsable version of the Ref Guide.
|
||||
** Prerequisites: `Ruby` (v2.1 or higher) and the following gems must be installed:
|
||||
*** `jekyll`: v3.5, not v4.x. Use `gem install --force --version 3.5.0 jekyll` to force install of Jekyll 3.5.0.
|
||||
*** `jekyll-asciidoc`: v2.1 or higher. Use `gem install jekyll-asciidoc` to install.
|
||||
*** `pygments.rb`: v1.1.2 or higher. Use `gem install pygments.rb` to install.
|
||||
* Via `asciidoctor-ant` to build the officially released PDF version of the Ref Guide.
|
||||
** Prerequisites: None beyond those required to use the main Lucene/Solr build: Java, and Ant.
|
||||
|
||||
== Building the Guide
|
||||
For details on building the ref guide, see `ant -p`.
|
||||
|
||||
There are currently four available targets:
|
||||
|
||||
* `ant default`: builds both the PDF and HTML versions of the Solr Ref Guide.
|
||||
* `ant build-site`: builds only the HTML version.
|
||||
* `ant build-pdf`: builds only the PDF version.
|
||||
* `ant clean`: removes the `../build/solr-ref-guide` directory.
|
||||
|
||||
The output of all builds will be located in `../build/solr-ref-guide`.
|
||||
|
||||
== Key Directories
|
||||
Key directories to be aware of:
|
||||
|
||||
* `src` - where all human edited `*.adoc` files realted to the Guide live, as well as various configuration, theme, and template files.
|
||||
* `src` - where all human edited `*.adoc` files related to the Guide live, as well as various configuration, theme, and template files.
|
||||
* `tools` - custom Java code for parsing metadata in our `src/*.adoc` files to produce some `_data/` files for site & pdf navigation purposes.
|
||||
* `../build/solr-ref-guide/content` - a copy of the `src` dir generated by ant where:
|
||||
** `*.template` files are processed to replace ant properties with their runtime values
|
||||
** some `../build/solr-ref-guide/content/_data` files are generated by our java tools based header attributes from each of the `*.adoc` files
|
||||
* `../build/solr-ref-guide/html-site` - HTML generated version of the ref guide
|
||||
* `../build/solr-ref-guide/apache-solr-ref-guide-X.Y.pdf` - PDF generated version of the ref guide
|
||||
|
||||
See the additional documentation in `src/metadocs` for more information about how to edit files, build for releases, or modifying any Jekyll or PDF templates.
|
||||
|
|
|
@ -44,7 +44,6 @@ CDCR is configured to replicate from collections in the Source cluster to collec
|
|||
|
||||
CDCR can be configured to replicate from one collection to a second collection _within the same cluster_. That is a specialized scenario not covered in this document.
|
||||
|
||||
[glossary]
|
||||
== CDCR Glossary
|
||||
|
||||
Terms used in this document include:
|
||||
|
|
|
@ -44,16 +44,18 @@ If you have already indexed data into your Solr index, you will need to complete
|
|||
|
||||
DocValues are only available for specific field types. The types chosen determine the underlying Lucene docValue type that will be used. The available Solr field types are:
|
||||
|
||||
* `StrField` and `UUIDField`.
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the SORTED type.
|
||||
** If the field is multi-valued, Lucene will use the SORTED_SET type.
|
||||
* Any `Trie*` numeric fields, date fields and `EnumFieldType`.
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the NUMERIC type.
|
||||
** If the field is multi-valued, Lucene will use the SORTED_SET type.
|
||||
* Boolean fields
|
||||
* Int|Long|Float|Double|Date PointField
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the NUMERIC type.
|
||||
** If the field is multi-valued, Lucene will use the SORTED_NUMERIC type.
|
||||
* `StrField` and `UUIDField`:
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the `SORTED` type.
|
||||
** If the field is multi-valued, Lucene will use the `SORTED_SET` type.
|
||||
* `BoolField`:
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the `SORTED` type.
|
||||
** If the field is multi-valued, Lucene will use the `SORTED_BINARY` type.
|
||||
* Any `*PointField` Numeric or Date fields, `EnumFieldType`, and `CurrencyFieldType`:
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the `NUMERIC` type.
|
||||
** If the field is multi-valued, Lucene will use the `SORTED_NUMERIC` type.
|
||||
* Any of the deprecated `Trie*` Numeric or Date fields, `EnumField` and `CurrencyField`:
|
||||
** If the field is single-valued (i.e., multi-valued is false), Lucene will use the `NUMERIC` type.
|
||||
** If the field is multi-valued, Lucene will use the `SORTED_SET` type.
|
||||
|
||||
These Lucene types are related to how the {lucene-javadocs}/core/org/apache/lucene/index/DocValuesType.html[values are sorted and stored].
|
||||
|
||||
|
@ -86,4 +88,4 @@ In cases where the query is returning _only_ docValues fields performance may im
|
|||
When retrieving fields from their docValues form (using the <<exporting-result-sets.adoc#exporting-result-sets,/export handler>>, <<streaming-expressions.adoc#streaming-expressions,streaming expressions>> or if the field is requested in the `fl` parameter), two important differences between regular stored fields and docValues fields must be understood:
|
||||
|
||||
1. Order is _not_ preserved. For simply retrieving stored fields, the insertion order is the return order. For docValues, it is the _sorted_ order.
|
||||
2. Multiple identical entries are collapsed into a single value. Thus if I insert values 4, 5, 2, 4, 1, my return will be 1, 2, 4, 5.
|
||||
2. For field types using `SORTED_SET`, multiple identical entries are collapsed into a single value. Thus if I insert values 4, 5, 2, 4, 1, my return will be 1, 2, 4, 5.
|
||||
|
|
|
@ -62,7 +62,7 @@ The field type `class` determines most of the behavior of a field type, but opti
|
|||
|
||||
[source,xml]
|
||||
----
|
||||
<fieldType name="date" class="solr.TrieDateField"
|
||||
<fieldType name="date" class="solr.DatePointField"
|
||||
sortMissingLast="true" omitNorms="true"/>
|
||||
----
|
||||
|
||||
|
|
|
@ -26,33 +26,66 @@ The following table lists the field types that are available in Solr. The `org.a
|
|||
|===
|
||||
|Class |Description
|
||||
|BinaryField |Binary data.
|
||||
|BoolField |Contains either true or false. Values of "1", "t", or "T" in the first character are interpreted as true. Any other values in the first character are interpreted as false.
|
||||
|CollationField |Supports Unicode collation for sorting and range queries. ICUCollationField is a better choice if you can use ICU4J. See the section <<language-analysis.adoc#unicode-collation,Unicode Collation>>.
|
||||
|CurrencyField |Deprecated in favor of CurrencyFieldType.
|
||||
|CurrencyFieldType |Supports currencies and exchange rates. See the section <<working-with-currencies-and-exchange-rates.adoc#working-with-currencies-and-exchange-rates,Working with Currencies and Exchange Rates>>.
|
||||
|
||||
|BoolField |Contains either true or false. Values of `1`, `t`, or `T` in the first character are interpreted as `true`. Any other values in the first character are interpreted as `false`.
|
||||
|
||||
|CollationField |Supports Unicode collation for sorting and range queries. The ICUCollationField is a better choice if you can use ICU4J. See the section <<language-analysis.adoc#unicode-collation,Unicode Collation>> for more information.
|
||||
|
||||
|CurrencyField |*Deprecated*. Use CurrencyFieldType instead.
|
||||
|
||||
|CurrencyFieldType |Supports currencies and exchange rates. See the section <<working-with-currencies-and-exchange-rates.adoc#working-with-currencies-and-exchange-rates,Working with Currencies and Exchange Rates>> for more information.
|
||||
|
||||
|DateRangeField |Supports indexing date ranges, to include point in time date instances as well (single-millisecond durations). See the section <<working-with-dates.adoc#working-with-dates,Working with Dates>> for more detail on using this field type. Consider using this field type even if it's just for date instances, particularly when the queries typically fall on UTC year/month/day/hour, etc., boundaries.
|
||||
|ExternalFileField |Pulls values from a file on disk. See the section <<working-with-external-files-and-processes.adoc#working-with-external-files-and-processes,Working with External Files and Processes>>.
|
||||
|EnumField |Deprecated in favor of EnumFieldType
|
||||
|
||||
|DatePointField |Date field. Represents a point in time with millisecond precision, encoded using a "Dimensional Points" based data structure that allows for very efficient searches for specific values, or ranges of values. See the section <<working-with-dates.adoc#working-with-dates,Working with Dates>> for more details on the supported syntax. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|
||||
|DoublePointField |Double field (64-bit IEEE floating point). This class encodes double values using a "Dimensional Points" based data structure that allows for very efficient searches for specific values, or ranges of values. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|
||||
|ExternalFileField |Pulls values from a file on disk. See the section <<working-with-external-files-and-processes.adoc#working-with-external-files-and-processes,Working with External Files and Processes>> for more information.
|
||||
|
||||
|EnumField |*Deprecated*. Use EnumFieldType instead.
|
||||
|
||||
|EnumFieldType |Allows defining an enumerated set of values which may not be easily sorted by either alphabetic or numeric order (such as a list of severities, for example). This field type takes a configuration file, which lists the proper order of the field values. See the section <<working-with-enum-fields.adoc#working-with-enum-fields,Working with Enum Fields>> for more information.
|
||||
|ICUCollationField |Supports Unicode collation for sorting and range queries. See the section <<language-analysis.adoc#unicode-collation,Unicode Collation>>.
|
||||
|LatLonPointSpatialField |<<spatial-search.adoc#spatial-search,Spatial Search>>: a latitude/longitude coordinate pair; possibly multi-valued for multiple points. Usually it's specified as "lat,lon" order with a comma.
|
||||
|LatLonType |(deprecated) <<spatial-search.adoc#spatial-search,Spatial Search>>: a single-valued latitude/longitude coordinate pair. Usually it's specified as "lat,lon" order with a comma.
|
||||
|PointType |<<spatial-search.adoc#spatial-search,Spatial Search>>: A single-valued n-dimensional point. It's both for sorting spatial data that is _not_ lat-lon, and for some more rare use-cases. (NOTE: this is _not_ related to the "Point" based numeric fields)
|
||||
|PreAnalyzedField |Provides a way to send to Solr serialized token streams, optionally with independent stored values of a field, and have this information stored and indexed without any additional text processing. Configuration and usage of PreAnalyzedField is documented on the <<working-with-external-files-and-processes.adoc#the-preanalyzedfield-type,Working with External Files and Processes>> page.
|
||||
|
||||
|FloatPointField |Floating point field (32-bit IEEE floating point). This class encodes float values using a "Dimensional Points" based data structure that allows for very efficient searches for specific values, or ranges of values. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|
||||
|ICUCollationField |Supports Unicode collation for sorting and range queries. See the section <<language-analysis.adoc#unicode-collation,Unicode Collation>> for more information.
|
||||
|
||||
|IntPointField |Integer field (32-bit signed integer). This class encodes int values using a "Dimensional Points" based data structure that allows for very efficient searches for specific values, or ranges of values. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|
||||
|LatLonPointSpatialField |A latitude/longitude coordinate pair; possibly multi-valued for multiple points. Usually it's specified as "lat,lon" order with a comma. See the section <<spatial-search.adoc#spatial-search,Spatial Search>> for more information.
|
||||
|
||||
|LatLonType |*Deprecated*. Consider using the LatLonPointSpatialField instead. A single-valued latitude/longitude coordinate pair. Usually it's specified as "lat,lon" order with a comma. See the section <<spatial-search.adoc#spatial-search,Spatial Search>> for more information.
|
||||
|
||||
|LongPointField |Long field (64-bit signed integer). This class encodes foo values using a "Dimensional Points" based data structure that allows for very efficient searches for specific values, or ranges of values. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|
||||
|PointType |A single-valued n-dimensional point. It's both for sorting spatial data that is _not_ lat-lon, and for some more rare use-cases. (NOTE: this is _not_ related to the "Point" based numeric fields). See <<spatial-search.adoc#spatial-search,Spatial Search>> for more information.
|
||||
|
||||
|PreAnalyzedField |Provides a way to send to Solr serialized token streams, optionally with independent stored values of a field, and have this information stored and indexed without any additional text processing.
|
||||
|
||||
Configuration and usage of PreAnalyzedField is documented in the section <<working-with-external-files-and-processes.adoc#the-preanalyzedfield-type,Working with External Files and Processes>>.
|
||||
|
||||
|RandomSortField |Does not contain a value. Queries that sort on this field type will return results in random order. Use a dynamic field to use this feature.
|
||||
|SpatialRecursivePrefixTreeFieldType |(RPT for short) <<spatial-search.adoc#spatial-search,Spatial Search>>: Accepts latitude comma longitude strings or other shapes in WKT format.
|
||||
|
||||
|SpatialRecursivePrefixTreeFieldType |(RPT for short) Accepts latitude comma longitude strings or other shapes in WKT format. See <<spatial-search.adoc#spatial-search,Spatial Search>> for more information.
|
||||
|
||||
|StrField |String (UTF-8 encoded string or Unicode). Strings are intended for small fields and are _not_ tokenized or analyzed in any way. They have a hard limit of slightly less than 32K.
|
||||
|
||||
|TextField |Text, usually multiple words or tokens.
|
||||
|TrieDateField |Date field. Represents a point in time with millisecond precision. See the section <<working-with-dates.adoc#working-with-dates,Working with Dates>>. `precisionStep="0"` minimizes index size; `precisionStep="8"` (the default) enables more efficient range queries. For single valued fields, use `docValues="true"` for efficient sorting.
|
||||
|TrieDoubleField |Double field (64-bit IEEE floating point). `precisionStep="0"` minimizes index size; `precisionStep="8"` (the default) enables more efficient range queries. For single valued fields, use `docValues="true"` for efficient sorting.
|
||||
|TrieFloatField |Floating point field (32-bit IEEE floating point) . `precisionStep="0"` enables efficient numeric sorting and minimizes index size; `precisionStep="8"` (the default) enables efficient range queries. Use `docValues="true"` for efficient sorting. For single valued fields, use `docValues="true"` for efficient sorting.
|
||||
|TrieIntField |Integer field (32-bit signed integer). `precisionStep="0"` enables efficient numeric sorting and minimizes index size; `precisionStep="8"` (the default) enables efficient range queries. For single valued fields, use `docValues="true"` for efficient sorting.
|
||||
|TrieLongField |Long field (64-bit signed integer). `precisionStep="0"` minimizes index size; `precisionStep="8"` (the default) enables more efficient range queries. For single valued fields, use `docValues="true"` for efficient sorting.
|
||||
|TrieField |If this field type is used, a "type" attribute must also be specified, valid values are: `integer`, `long`, `float`, `double`, `date`. Using this field is the same as using any of the Trie fields mentioned above
|
||||
|DatePointField |Date field. Represents a point in time with millisecond precision. See the section <<working-with-dates.adoc#working-with-dates,Working with Dates>>. This class functions similarly to TrieDateField, but using a "Dimensional Points" based data structure instead of indexed terms, and doesn't require configuration of a precision step. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|DoublePointField |Double field (64-bit IEEE floating point). This class functions similarly to TrieDoubleField, but using a "Dimensional Points" based data structure instead of indexed terms, and doesn't require configuration of a precision step. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|FloatPointField |Floating point field (32-bit IEEE floating point). This class functions similarly to TrieFloatField, but using a "Dimensional Points" based data structure instead of indexed terms, and doesn't require configuration of a precision step. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|IntPointField |Integer field (32-bit signed integer). This class functions similarly to TrieIntField, but using a "Dimensional Points" based data structure instead of indexed terms, and doesn't require configuration of a precision step. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|LongPointField |Long field (64-bit signed integer). This class functions similarly to TrieLongField, but using a "Dimensional Points" based data structure instead of indexed terms, and doesn't require configuration of a precision step. For single valued fields, `docValues="true"` must be used to enable sorting.
|
||||
|UUIDField |Universally Unique Identifier (UUID). Pass in a value of "NEW" and Solr will create a new UUID. *Note*: configuring a UUIDField instance with a default value of "NEW" is not advisable for most users when using SolrCloud (and not possible if the UUID value is configured as the unique key field) since the result will be that each replica of each document will get a unique UUID value. Using UUIDUpdateProcessorFactory to generate UUID values when documents are added is recommended instead.
|
||||
|
||||
|TrieDateField |*Deprecated*. Use DatePointField instead.
|
||||
|
||||
|TrieDoubleField |*Deprecated*. Use DoublePointField instead.
|
||||
|
||||
|TrieFloatField |*Deprecated*. Use FloatPointField instead.
|
||||
|
||||
|TrieIntField |*Deprecated*. Use IntPointField instead.
|
||||
|
||||
|TrieLongField |*Deprecated*. Use LongPointField instead.
|
||||
|
||||
|TrieField |*Deprecated*. This field takes a `type` parameter to define the specific class of Trie* field to use; Use an appropriate Point Field type instead.
|
||||
|
||||
|UUIDField |Universally Unique Identifier (UUID). Pass in a value of `NEW` and Solr will create a new UUID.
|
||||
|
||||
*Note*: configuring a UUIDField instance with a default value of `NEW` is not advisable for most users when using SolrCloud (and not possible if the UUID value is configured as the unique key field) since the result will be that each replica of each document will get a unique UUID value. Using UUIDUpdateProcessorFactory to generate UUID values when documents are added is recommended instead.
|
||||
|===
|
||||
|
|
|
@ -254,7 +254,7 @@ Use the `field(myfield,min)` <<field Function,syntax for selecting the minimum v
|
|||
=== ms Function
|
||||
Returns milliseconds of difference between its arguments. Dates are relative to the Unix or POSIX time epoch, midnight, January 1, 1970 UTC.
|
||||
|
||||
Arguments may be the name of an indexed `TrieDateField`, or date math based on a <<working-with-dates.adoc#working-with-dates,constant date or `NOW`>>.
|
||||
Arguments may be the name of a `DatePointField`, `TrieDateField`, or date math based on a <<working-with-dates.adoc#working-with-dates,constant date or `NOW`>>.
|
||||
|
||||
* `ms()`: Equivalent to `ms(NOW)`, number of milliseconds since the epoch.
|
||||
* `ms(a):` Returns the number of milliseconds since the epoch that the argument represents.
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
= How SolrCloud Works
|
||||
:page-shortname: how-solrcloud-works
|
||||
:page-permalink: how-solrcloud-works.html
|
||||
:page-children: shards-and-indexing-data-in-solrcloud, distributed-requests, read-and-write-side-fault-tolerance
|
||||
:page-children: shards-and-indexing-data-in-solrcloud, distributed-requests
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
@ -23,7 +23,6 @@ The following sections cover provide general information about how various SolrC
|
|||
|
||||
* <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Shards and Indexing Data in SolrCloud>>
|
||||
* <<distributed-requests.adoc#distributed-requests,Distributed Requests>>
|
||||
* <<read-and-write-side-fault-tolerance.adoc#read-and-write-side-fault-tolerance,Read and Write Side Fault Tolerance>>
|
||||
|
||||
If you are already familiar with SolrCloud concepts and basic functionality, you can skip to the section covering <<solrcloud-configuration-and-parameters.adoc#solrcloud-configuration-and-parameters,SolrCloud Configuration and Parameters>>.
|
||||
|
||||
|
|
|
@ -41,7 +41,8 @@ include::meta-docs/asciidoc-syntax.adoc[leveloffset=+2]
|
|||
include::meta-docs/editing-tools.adoc[leveloffset=+2]
|
||||
|
||||
== Modifying the Output Formats
|
||||
The Solr Reference Guide is published in two formats, HTML and PDF. Different tools are used for each.
|
||||
The Solr Reference Guide is published in two formats: HTML and PDF. Different tools are used for each.
|
||||
|
||||
include::meta-docs/jekyll.adoc[leveloffset=+2]
|
||||
|
||||
include::meta-docs/pdf.adoc[leveloffset=+2]
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
= Apache Solr Reference Guide
|
||||
:page-shortname: index
|
||||
:page-permalink: index.html
|
||||
:page-children: about-this-guide, solr-tutorial, getting-started, solr-control-script-reference, using-the-solr-administration-user-interface, documents-fields-and-schema-design, understanding-analyzers-tokenizers-and-filters, indexing-and-basic-data-operations, searching, the-well-configured-solr-instance, managing-solr, solrcloud, legacy-scaling-and-distribution, client-apis, major-changes-from-solr-5-to-solr-6, further-assistance, solr-glossary, errata, how-to-contribute
|
||||
:page-children: about-this-guide, solr-tutorial, getting-started, solr-control-script-reference, using-the-solr-administration-user-interface, documents-fields-and-schema-design, understanding-analyzers-tokenizers-and-filters, indexing-and-basic-data-operations, searching, the-well-configured-solr-instance, managing-solr, solrcloud, legacy-scaling-and-distribution, client-apis, further-assistance, solr-glossary, errata, how-to-contribute
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
|
|
@ -66,7 +66,7 @@ Defines how merging segments is done.
|
|||
|
||||
The default in Solr is to use a `TieredMergePolicy`, which merges segments of approximately equal size, subject to an allowed number of segments per tier.
|
||||
|
||||
Other policies available are the `LogByteSizeMergePolicy` and `LogDocMergePolicy`. For more information on these policies, please see {lucene-javadocs}/core/org/apache/lucene/index/MergePolicy.html[the MergePolicy javadocs].
|
||||
Other policies available are the `LogByteSizeMergePolicy`, `LogDocMergePolicy`, and `UninvertDocValuesMergePolicy`. For more information on these policies, please see {lucene-javadocs}/core/org/apache/lucene/index/MergePolicy.html[the MergePolicy javadocs].
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
= Major Changes in Solr 7
|
||||
:page-shortname: major-changes-in-solr-7
|
||||
:page-permalink: major-changes-in-solr-7.html
|
||||
:page-tocclass: right
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
Solr 7 is a major new release of Solr which introduces new features and a number of other changes that may impact your existing installation.
|
||||
|
||||
== Upgrade Planning
|
||||
There are major changes in Solr 7 to consider before starting to migrate your configurations and indexes. This page is designed to highlight the biggest changes - new features you may want to be aware of, but also changes in default behavior, and deprecated features that have been removed.
|
||||
|
||||
There are many hundreds of changes in Solr 7, however, so a thorough review of the <<solr-upgrade-notes.adoc#solr-upgrade-notes,Solr Upgrade Notes>> as well as the {solr-javadocs}/changes/Changes.html[CHANGES.txt] file in your Solr instance will help you plan your migration to Solr 7. This section attempts to highlight some of the major changes you should be aware of.
|
||||
|
||||
You should also consider all changes that have been made to Solr in any version you have not upgraded to already. For example, if you are currently using Solr 6.2, you should review changes made in all subsequent 6.x releases in addition to changes for 7.0.
|
||||
|
||||
Re-indexing your data is considered the best practice and you should try to do so if possible. However, if re-indexing is not feasible, keep in mind you can only upgrade one major version at a time. Thus, Solr 6.x indexes will be compatible with Solr 7 but Solr 5.x indexes will not be.
|
||||
|
||||
If you do not re-index now, keep in mind that you will need to either re-index your data or upgrade your indexes before you will be able to move to Solr 8 when it is released in the future. See the section <<indexupgrader-tool.adoc#indexupgrader-tool,IndexUpgrader Tool>> for more details on how to upgrade your indexes.
|
||||
|
||||
See also the section <<upgrading-a-solr-cluster.adoc#upgrading-a-solr-cluster,Upgrading a Solr Cluster>> for details on how to upgrade a SolrCloud cluster.
|
||||
|
||||
== New Features & Enhancements
|
||||
|
||||
=== Replication Modes
|
||||
Until Solr 7, the SolrCloud model for replicas has been to allow any replica to become a leader when a leader is lost. This is highly effective for most users, providing reliable failover in case of issues in the cluster. However, it comes at a cost in large clusters because all replicas must be in sync at all times.
|
||||
|
||||
To provide additional flexibility, two new types of replicas have been added, named TLOG & PULL. These new types provide options to have replicas which only sync with the leader by copying index segments from the leader. The TLOG type has an additional benefit of maintaining a transaction log (the "tlog" of its name), which would allow it to recover and become a leader if necessary; the PULL type does not maintain a transaction log, so cannot become a leader.
|
||||
|
||||
As part of this change, the traditional type of replica is now named NRT. If you do not explicitly define a number of TLOG or PULL replicas, Solr defaults to creating NRT replicas. If this model is working for you, you will not have to change anything.
|
||||
|
||||
See the section <<shards-and-indexing-data-in-solrcloud.adoc#types-of-replicas,Types of Replicas>> for more details on the new replica modes, and how define the replica type in your cluster.
|
||||
|
||||
=== Autoscaling
|
||||
Solr autoscaling is a new suite of features in Solr to make managing a SolrCloud cluster easier and more automated.
|
||||
|
||||
At its core, Solr autoscaling provides users with a rule syntax to define preferences and policies for how to distribute nodes and shards in a cluster, with the goal of maintaining a balance in the cluster. As of Solr 7, Solr will take any policy or preference rules into account when determining where to place new shards and replicas created or moved with various Collections API commands.
|
||||
|
||||
See the section <<solrcloud-autoscaling.adoc#solrcloud-autoscaling,SolrCloud Autoscaling>> for details on the options available in 7.0. Expect more features to be released in subsequent 7.x releases in this area.
|
||||
|
||||
=== Other Features & Enhancements
|
||||
|
||||
* The Analytics Component has been refactored.
|
||||
|
||||
* There were several other new features released in earlier 6.x releases, which you may have missed:
|
||||
** <<learning-to-rank.adoc#learning-to-rank,Learning to Rank>>
|
||||
** <<highlighting.adoc#the-unified-highlighter,Unified Highlighter>>
|
||||
** <<metrics-reporting.adoc#metrics-reporting,Metrics API>>. See also information about related deprecations in the section <<JMX Support and MBeans>> below.
|
||||
** <<other-parsers.adoc#payload-query-parsers,Payload queries>>
|
||||
** <<stream-evaluators.adoc#stream-evaluators,Streaming Evaluators>>
|
||||
** <<v2-api.adoc#v2-api,/v2 API>>
|
||||
** <<graph-traversal.adoc#graph-traversal,Graph streaming expressions>>
|
||||
|
||||
== Configuration and Default Changes
|
||||
|
||||
=== New Default ConfigSet
|
||||
Several changes have been made to configSets that ship with Solr; not only their content but how Solr behaves in regard to them:
|
||||
|
||||
* The `data_driven_configset` and `basic_configset` have been removed, and replaced by the `_default` configset. The `sample_techproducts_configset` also remains, and is designed for use with the example documents shipped with Solr in the `example/exampledocs` directory.
|
||||
* When creating a new collection, if you do not specify a configSet, the `_default` will be used.
|
||||
** If you use SolrCloud, the `_default` configSet will be automatically uploaded to ZooKeeper.
|
||||
** If you use standalone mode, the instanceDir will be created automatically, using the `_default` configSet as it's basis.
|
||||
|
||||
=== Schemaless Improvements
|
||||
|
||||
To improve the functionality of Schemaless Mode, Solr now behaves differently when it detects that data in an incoming field should have a text-based field type.
|
||||
|
||||
* Incoming fields will be indexed as `text_general` by default (you can change this). The name of the field will be the same as the field name defined in the document.
|
||||
* A copy field rule will be inserted into your schema to copy the new `text_general` field to a new field with the name `<name>_str`. This field's type will be a `strings` field (to allow for multiple values). The first 256 characters of the text field will be inserted to the new `strings` field.
|
||||
|
||||
This behavior can be customized if you wish to remove the copy field rule, or to change the number of characters inserted to the string field, or the field type used. See the section <<schemaless-mode.adoc#schemaless-mode,Schemaless Mode>> for details.
|
||||
|
||||
TIP: Because copy field rules can slow indexing and increase index size, it's recommended you only use copy fields when you need to. If you do not need to sort or facet on a field, you should remove the automatically-generated copy field rule.
|
||||
|
||||
Automatic field creation can be disabled with the `update.autoCreateFields` property. To do this, you can use the Config API with a command such as:
|
||||
|
||||
[source,bash]
|
||||
curl http://host:8983/solr/mycollection/config -d '{"set-user-property": {"update.autoCreateFields":"false"}}'
|
||||
|
||||
=== Changes to Default Behaviors
|
||||
* JSON is now the default response format. If you rely on XML responses, you must now define `wt=xml` in your request. In addition, line indentation is enabled by default (`indent=on`).
|
||||
* The `sow` parameter (short for "Split on Whitespace") now defaults to `false`, which allows support for multi-word synonyms out of the box. This parameter is used with the eDismax and standard/"lucene" query parsers. If this parameter is not explicitly specified as `true`, query text will not be split on whitespace before analysis.
|
||||
* The `legacyCloud` parameter now defaults to `false`. If an entry for a replica does not exist in `state.json`, that replica will not get registered.
|
||||
+
|
||||
This may affect users who bring up replicas and they are automatically registered as a part of a shard. It is possible to fall back to the old behavior by setting the property `legacyCloud=true`, in the cluster properties using the following command:
|
||||
+
|
||||
`./server/scripts/cloud-scripts/zkcli.sh -zkhost 127.0.0.1:2181 -cmd clusterprop -name legacyCloud -val true`
|
||||
* The eDismax query parser parameter `lowercaseOperators` now defaults to `false` if the `luceneMatchVersion` in `solrconfig.xml` is 7.0.0 or above. Behavior for `luceneMatchVersion` lower than 7.0.0 is unchanged (so, `true`). This means that clients must sent boolean operators (such as AND, OR and NOT) in upper case in order to be recognized, or you must explicitly set this parameter to `true`.
|
||||
* The `handleSelect` parameter in `solrconfig.xml` now defaults to `false` if the `luceneMatchVersion` is 7.0.0 or above. This causes Solr to ignore the `qt` parameter if it is present in a request. If you have request handlers without a leading '/', you can set `handleSelect="true"` or consider migrating your configuration.
|
||||
+
|
||||
The `qt` parameter is still used as a SolrJ special parameter that specifies the request handler (tail URL path) to use.
|
||||
* The lucenePlusSort query parser (aka the "Old Lucene Query Parser") has been deprecated and is no longer implicitly defined. If you wish to continue using this parser until Solr 8 (when it will be removed), you must register it in your `solrconfig.xml`, as in: `<queryParser name="lucenePlusSort" class="solr.OldLuceneQParserPlugin"/>`.
|
||||
* The name of `TemplateUpdateRequestProcessorFactory` is changed to `template` from `Template` and the name of `AtomicUpdateProcessorFactory` is changed to `atomic` from `Atomic`
|
||||
** Also, `TemplateUpdateRequestProcessorFactory` now uses `{}` instead of `${}` for `template`.
|
||||
|
||||
|
||||
== Deprecations and Removed Features
|
||||
|
||||
=== Point Fields Are Default Numeric Types
|
||||
Solr has implemented \*PointField types across the board, to replace Trie* based numeric fields. All Trie* fields are now considered deprecated, and will be removed in Solr 8.
|
||||
|
||||
If you are using Trie* fields in your schema, you should consider moving to PointFields as soon as feasible. Changing to the new PointField types will require you to re-index your data.
|
||||
|
||||
=== Spatial Fields
|
||||
|
||||
The following spatial-related fields have been deprecated:
|
||||
|
||||
* `LatLonType`
|
||||
* `GeoHashField`
|
||||
* `SpatialVectorFieldType`
|
||||
* `SpatialTermQueryPrefixTreeFieldType`
|
||||
|
||||
Choose one of these field types instead:
|
||||
|
||||
* `LatLonPointSpatialField`
|
||||
* `SpatialRecursivePrefixTreeField`
|
||||
* `RptWithGeometrySpatialField`
|
||||
|
||||
See the section <<spatial-search.adoc#spatial-search,Spatial Search>> for more information.
|
||||
|
||||
=== JMX Support and MBeans
|
||||
* The `<jmx>` element in `solrconfig.xml` has been removed in favor of `<metrics><reporter>` elements defined in `solr.xml`.
|
||||
+
|
||||
Limited back-compatibility is offered by automatically adding a default instance of `SolrJmxReporter` if it's missing, AND when a local MBean server is found (which can be activated either via `ENABLE_REMOTE_JMX_OPTS` in `solr.in.sh` or via system properties, e.g., `-Dcom.sun.management.jmxremote`). This default instance exports all Solr metrics from all registries as hierarchical MBeans.
|
||||
+
|
||||
This behavior can be also disabled by specifying a `SolrJmxReporter` configuration with a boolean init argument `enabled` set to `false`. For a more fine-grained control users should explicitly specify at least one `SolrJmxReporter` configuration.
|
||||
+
|
||||
See also the section <<metrics-reporting.adoc#the-metrics-reporters-element,The <metrics><reporters> Element>>, which describes how to set up Metrics Reporters in `solr.xml`.
|
||||
|
||||
* MBean names and attributes now follow the hierarchical names used in metrics. This is reflected also in `/admin/mbeans` and `/admin/plugins` output, and can be observed in the UI Plugins tab, because now all these APIs get their data from the metrics API. The old (mostly flat) JMX view has been removed.
|
||||
|
||||
=== SolrJ
|
||||
The following changes were made in SolrJ.
|
||||
|
||||
* `HttpClientInterceptorPlugin` is now `HttpClientBuilderPlugin` and must work with a `SolrHttpClientBuilder` rather than an `HttpClientConfigurer`.
|
||||
* `HttpClientUtil` now allows configuring `HttpClient` instances via `SolrHttpClientBuilder` rather than an `HttpClientConfigurer`. Use of env variable `SOLR_AUTHENTICATION_CLIENT_CONFIGURER` no longer works, please use `SOLR_AUTHENTICATION_CLIENT_BUILDER`
|
||||
* `SolrClient` implementations now use their own internal configuration for socket timeouts, connect timeouts, and allowing redirects rather than what is set as the default when building the `HttpClient` instance. Use the appropriate setters on the `SolrClient` instance.
|
||||
* `HttpSolrClient#setAllowCompression` has been removed and compression must be enabled as a constructor param.
|
||||
* `HttpSolrClient#setDefaultMaxConnectionsPerHost` and `HttpSolrClient#setMaxTotalConnections` have been removed. These now default very high and can only be changed via param when creating an HttpClient instance.
|
||||
|
||||
=== Other Deprecations and Removals
|
||||
* The `defaultOperator` parameter in the schema is no longer supported. Use the `q.op` parameter instead. This option had been deprecated for several releases. See the section <<the-standard-query-parser.adoc#standard-query-parser-parameters,Standard Query Parser Parameters>> for more information.
|
||||
* The `defaultSearchField` parameter in the schema is no longer supported. Use the `df` parameter instead. This option had been deprecated for several releases. See the section <<the-standard-query-parser.adoc#standard-query-parser-parameters,Standard Query Parser Parameters>> for more information.
|
||||
* The `mergePolicy`, `mergeFactor` and `maxMergeDocs` parameters have been removed and are no longer supported. You should define a `mergePolicyFactory` instead. See the section <<indexconfig-in-solrconfig.adoc#mergepolicyfactory,the mergePolicyFactory>> for more information.
|
||||
* The PostingsSolrHighlighter has been deprecated. It's recommended that you move to using the UnifiedHighlighter instead. See the section <<highlighting.adoc#the-unified-highlighter,Unified Highlighter>> for more information about this highlighter.
|
||||
* Index-time boosts have been removed from Lucene, and are no longer available from Solr. If any boosts are provided, they will be ignored by the indexing chain. As a replacement, index-time scoring factors should be indexed in a separate field and combined with the query score using a function query. See the section <<function-queries.adoc#function-queries,Function Queries>> for more information.
|
||||
* The `StandardRequestHandler` is deprecated. Use `SearchHandler` instead.
|
||||
* To improve parameter consistency in the Collections API, the parameter names `fromNode` for the MOVEREPLICA command and `source`, `target` for the REPLACENODE command have been deprecated and replaced with `sourceNode` and `targetNode` instead. The old names will continue to work for back-compatibility but they will be removed in Solr 8.
|
||||
* The unused `valType` option has been removed from ExternalFileField, if you have this in your schema you can safely remove it.
|
||||
|
||||
== Major Changes in Earlier 6.x Versions
|
||||
The following summary of changes in earlier 6.x releases highlights significant changes released between Solr 6.0 and 6.6 that were listed in earlier versions of this Guide. Mentions of deprecations are likely superseded by removal in Solr 7, as noted in the above sections.
|
||||
|
||||
* The Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed.
|
||||
* JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any `mincount` greater than 1.
|
||||
* If you use historical dates, specifically on or before the year 1582, you should re-index for better date handling.
|
||||
* If you use the JSON Facet API (json.facet) with `method=stream`, you must now set `sort='index asc'` to get the streaming behavior; otherwise it won't stream. Reminder: `method` is a hint that doesn't change defaults of other parameters.
|
||||
* If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use `mincount=0` or if you set the prefix, you will now get an error as these options are incompatible with numeric faceting.
|
||||
* Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before.
|
||||
* We are no longer backing up `solr.log` and `solr_gc.log` files in date-stamped copies forever. If you relied on the `solr_log_<date>` or `solr_gc_log_<date>` being in the logs folder that will no longer be the case. See the section <<configuring-logging.adoc#configuring-logging,Configuring Logging>> for details on how log rotation works as of Solr 6.3.
|
||||
* The create/deleteCollection methods on `MiniSolrCloudCluster` have been deprecated. Clients should instead use the `CollectionAdminRequest` API. In addition, `MiniSolrCloudCluster#uploadConfigDir(File, String)` has been deprecated in favour of `#uploadConfigSet(Path, String)`.
|
||||
* The `bin/solr.in.sh` (`bin/solr.in.cmd` on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables.
|
||||
* The `\_version_` field is no longer indexed and is now defined with `indexed=false` by default, because the field has DocValues enabled.
|
||||
* The `/export` handler has been changed so it no longer returns zero (0) for numeric fields that are not in the original document. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document.
|
||||
* Metrics-related classes in `org.apache.solr.util.stats` have been removed in favor of the http://metrics.dropwizard.io/3.1.0/[Dropwizard metrics library]. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API:
|
||||
** The "totalTime" metric has been removed because it is no longer supported.
|
||||
** The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr.
|
||||
** The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
|
||||
* A new highlighter named UnifiedHighlighter has been added. You are encouraged to try out the UnifiedHighlighter by setting `hl.method=unified` and report feedback. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. See `HighlightParams.java` for a listing of highlight parameters annotated with which highlighters use them. `hl.useFastVectorHighlighter` is now considered deprecated in lieu of `hl.method=fastVector`.
|
||||
* The <<query-settings-in-solrconfig.adoc#query-settings-in-solrconfig,`maxWarmingSearchers` parameter>> now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing `maxWarmingSearchers` entries from their `solrconfig.xml` files.
|
||||
* The <<other-parsers.adoc#complex-phrase-query-parser,Complex Phrase query parser>> now supports leading wildcards. Beware of its possible heaviness, users are encouraged to use ReversedWildcardFilter in index time analysis.
|
||||
* The JMX metric "avgTimePerRequest" (and the corresponding metric in the metrics API for each handler) used to be a simple non-decaying average based on total cumulative time and the number of requests. The Codahale Metrics implementation applies exponential decay to this value, which heavily biases the average towards the last 5 minutes.
|
||||
* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to `facet` rather than `map_reduce`. There have also been changes to the SQL aggregate response and some SQL syntax changes. Consult the <<parallel-sql-interface.adoc#parallel-sql-interface,Parallel SQL Interface>> documentation for full details.
|
|
@ -478,7 +478,7 @@ q = {!join from=id to=manu_id_s}compName_s:Belkin
|
|||
fq = price:[* TO 12]
|
||||
----
|
||||
|
||||
The join operation is done on a term basis, so the "from" and "to" fields must use compatible field types. For example: joining between a `StrField` and a `TrieIntField` will not work, likewise joining between a `StrField` and a `TextField` that uses `LowerCaseFilterFactory` will only work for values that are already lower cased in the string field.
|
||||
The join operation is done on a term basis, so the "from" and "to" fields must use compatible field types. For example: joining between a `StrField` and a `IntPointField` will not work, likewise joining between a `StrField` and a `TextField` that uses `LowerCaseFilterFactory` will only work for values that are already lower cased in the string field.
|
||||
|
||||
=== Join Parser Scoring
|
||||
|
||||
|
|
|
@ -46,9 +46,9 @@ Note that the `types` and `fields` sections are optional, meaning you are free t
|
|||
|
||||
== Choosing Appropriate Numeric Types
|
||||
|
||||
For general numeric needs, consider using one of the` IntPointField`, `LongPointField`, `FloatPointField`, or `DoublePointField` classes, depending on the specific values you expect. These "Dimensional Point" based numeric classes use specially encoded data structures to support efficient range queries regardless of the size of the ranges used. Enable <<docvalues.adoc#docvalues,DocValues>> on these fields as needed for sorting and/or faceting.
|
||||
For general numeric needs, consider using one of the `IntPointField`, `LongPointField`, `FloatPointField`, or `DoublePointField` classes, depending on the specific values you expect. These "Dimensional Point" based numeric classes use specially encoded data structures to support efficient range queries regardless of the size of the ranges used. Enable <<docvalues.adoc#docvalues,DocValues>> on these fields as needed for sorting and/or faceting.
|
||||
|
||||
Some Solr features may not yet work with "Dimensional Points", in which case you may want to consider the equivalent `TrieIntField`, `TrieLongField`, `TrieFloatField`, and `TrieDoubleField` classes. Configure a `precisionStep="0"` if you wish to minimize index size, but if you expect users to make frequent range queries on numeric types, use the default `precisionStep` (by not specifying it) or specify it as `precisionStep="8"` (which is the default). This offers faster speed for range queries at the expense of increasing index size.
|
||||
Some Solr features may not yet work with "Dimensional Points", in which case you may want to consider the equivalent `TrieIntField`, `TrieLongField`, `TrieFloatField`, and `TrieDoubleField` classes. These field types are deprecated and are likely to be removed in a future major Solr release, but they can still be used if necessary. Configure a `precisionStep="0"` if you wish to minimize index size, but if you expect users to make frequent range queries on numeric types, use the default `precisionStep` (by not specifying it) or specify it as `precisionStep="8"` (which is the default). This offers faster speed for range queries at the expense of increasing index size.
|
||||
|
||||
== Working With Text
|
||||
|
||||
|
|
|
@ -94,14 +94,14 @@ The `add-field` command adds a new field definition to your schema. If a field w
|
|||
|
||||
All of the properties available when defining a field with manual `schema.xml` edits can be passed via the API. These request attributes are described in detail in the section <<defining-fields.adoc#defining-fields,Defining Fields>>.
|
||||
|
||||
For example, to define a new stored field named "sell-by", of type "tdate", you would POST the following request:
|
||||
For example, to define a new stored field named "sell-by", of type "pdate", you would POST the following request:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST -H 'Content-type:application/json' --data-binary '{
|
||||
"add-field":{
|
||||
"name":"sell-by",
|
||||
"type":"tdate",
|
||||
"type":"pdate",
|
||||
"stored":true }
|
||||
}' http://localhost:8983/solr/gettingstarted/schema
|
||||
----
|
||||
|
@ -803,7 +803,7 @@ The sample output below has been truncated to show a few different field types f
|
|||
"sortMissingLast": true
|
||||
},
|
||||
{
|
||||
"class": "solr.TrieFloatField",
|
||||
"class": "solr.FloatPointField",
|
||||
"dynamicFields": [
|
||||
"*_fs",
|
||||
"*_f"
|
||||
|
@ -814,7 +814,6 @@ The sample output below has been truncated to show a few different field types f
|
|||
],
|
||||
"name": "float",
|
||||
"positionIncrementGap": "0",
|
||||
"precisionStep": "0"
|
||||
}]
|
||||
}
|
||||
----
|
||||
|
|
|
@ -22,7 +22,7 @@ Schemaless Mode is a set of Solr features that, when used together, allow users
|
|||
|
||||
These Solr features, all controlled via `solrconfig.xml`, are:
|
||||
|
||||
. Managed schema: Schema modifications are made at runtime through Solr APIs, which requires the use of `schemaFactory` that supports these changes - see <<schema-factory-definition-in-solrconfig.adoc#schema-factory-definition-in-solrconfig,Schema Factory Definition in SolrConfig>> for more details.
|
||||
. Managed schema: Schema modifications are made at runtime through Solr APIs, which requires the use of a `schemaFactory` that supports these changes. See the section <<schema-factory-definition-in-solrconfig.adoc#schema-factory-definition-in-solrconfig,Schema Factory Definition in SolrConfig>> for more details.
|
||||
. Field value class guessing: Previously unseen fields are run through a cascading set of value-based parsers, which guess the Java class of field values - parsers for Boolean, Integer, Long, Float, Double, and Date are currently available.
|
||||
. Automatic schema field addition, based on field value class(es): Previously unseen fields are added to the schema, based on field value Java classes, which are mapped to schema field types - see <<solr-field-types.adoc#solr-field-types,Solr Field Types>>.
|
||||
|
||||
|
@ -35,7 +35,7 @@ The three features of schemaless mode are pre-configured in the `_default` <<con
|
|||
bin/solr start -e schemaless
|
||||
----
|
||||
|
||||
This will launch a Solr server, and automatically create a collection (named "```gettingstarted```") that contains only three fields in the initial schema: `id`, `\_version_`, and `\_text_`.
|
||||
This will launch a single Solr server, and automatically create a collection (named "```gettingstarted```") that contains only three fields in the initial schema: `id`, `\_version_`, and `\_text_`.
|
||||
|
||||
You can use the `/schema/fields` <<schema-api.adoc#schema-api,Schema API>> to confirm this: `curl \http://localhost:8983/solr/gettingstarted/schema/fields` will output:
|
||||
|
||||
|
@ -84,19 +84,23 @@ You can configure the `ManagedIndexSchemaFactory` (and control the resource file
|
|||
</schemaFactory>
|
||||
----
|
||||
|
||||
=== Define an UpdateRequestProcessorChain
|
||||
=== Enable Field Class Guessing
|
||||
|
||||
The UpdateRequestProcessorChain allows Solr to guess field types, and you can define the default field type classes to use. To start, you should define it as follows (see the javadoc links below for update processor factory documentation):
|
||||
In Solr, an <<update-request-processors.adoc#update-request-processors,UpdateRequestProcessorChain>> defines a chain of plugins that are applied to documents before or while they are indexed.
|
||||
|
||||
The field guessing aspect of Solr's schemaless mode uses a specially-defined UpdateRequestProcessorChain that allows Solr to guess field types. You can also define the default field type classes to use.
|
||||
|
||||
To start, you should define it as follows (see the javadoc links below for update processor factory documentation):
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/>
|
||||
<updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/>
|
||||
<updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating">
|
||||
<updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating"> <!--1-->
|
||||
<str name="pattern">[^\w-\.]</str>
|
||||
<str name="replacement">_</str>
|
||||
</updateProcessor>
|
||||
<updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/>
|
||||
<updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/> <!--2-->
|
||||
<updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/>
|
||||
<updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/>
|
||||
<updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date">
|
||||
|
@ -120,11 +124,11 @@ The UpdateRequestProcessorChain allows Solr to guess field types, and you can de
|
|||
<str>yyyy-MM-dd</str>
|
||||
</arr>
|
||||
</updateProcessor>
|
||||
<updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields">
|
||||
<updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields"> <!--3-->
|
||||
<lst name="typeMapping">
|
||||
<str name="valueClass">java.lang.String</str>
|
||||
<str name="valueClass">java.lang.String</str> <!--4-->
|
||||
<str name="fieldType">text_general</str>
|
||||
<lst name="copyField">
|
||||
<lst name="copyField"> <!--5-->
|
||||
<str name="dest">*_str</str>
|
||||
<int name="maxChars">256</int>
|
||||
</lst>
|
||||
|
@ -140,7 +144,7 @@ The UpdateRequestProcessorChain allows Solr to guess field types, and you can de
|
|||
<str name="fieldType">pdates</str>
|
||||
</lst>
|
||||
<lst name="typeMapping">
|
||||
<str name="valueClass">java.lang.Long</str>
|
||||
<str name="valueClass">java.lang.Long</str> <!--6-->
|
||||
<str name="valueClass">java.lang.Integer</str>
|
||||
<str name="fieldType">plongs</str>
|
||||
</lst>
|
||||
|
@ -152,14 +156,26 @@ The UpdateRequestProcessorChain allows Solr to guess field types, and you can de
|
|||
|
||||
<!-- The update.autoCreateFields property can be turned to false to disable schemaless mode -->
|
||||
<updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}"
|
||||
processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields">
|
||||
processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields"> <!--7-->
|
||||
<processor class="solr.LogUpdateProcessorFactory"/>
|
||||
<processor class="solr.DistributedUpdateProcessorFactory"/>
|
||||
<processor class="solr.RunUpdateProcessorFactory"/>
|
||||
</updateRequestProcessorChain>
|
||||
----
|
||||
|
||||
Javadocs for update processor factories mentioned above:
|
||||
There are many things defined in this chain. Let's step through a few of them.
|
||||
|
||||
<1> First, we're using the FieldNameMutatingUpdateProcessorFactory to lower-case all field names. Note that this and every following `<processor>` element include a `name`. These names will be used in the final chain definition at the end of this example.
|
||||
<2> Next we add several update request processors to parse different field types. Note the ParseDateFieldUpdateProcessorFactory includes a long list of possible date formations that would be parsed into valid Solr dates. If you have a custom date, you could add it to this list (see the link to the Javadocs below to get information on how).
|
||||
<3> Once the fields have been parsed, we define the field types that will be assigned to those fields. You can modify any of these that you would like to change.
|
||||
<4> In this definition, if the parsing step decides the incoming data in a field is a string, we will put this into a field in Solr with the field type `text_general`. This field type by default allows Solr to query on this field.
|
||||
<5> After we've added the `text_general` field, we have also defined a copy field rule that will copy all data from the new `text_general` field to a field with the same name suffixed with `_str`. This is done by Solr's dynamic fields feature. By defining the target of the copy field rule as a dynamic field in this way, you can control the field type used in your schema. The default selection allows Solr to facet, highlight, and sort on these fields.
|
||||
<6> This is another example of a mapping rule. In this case we define that when either of the `Long` or `Integer` field parsers identify a field, they should both map their fields to the `plongs` field type.
|
||||
<7> Finally, we add a chain definition that calls the list of plugins. These plugins are each called by the names we gave to them when we defined them. We can also add other processors to the chain, as shown here. Note we have also given the entire chain a `name` ("add-unknown-fields-to-the-schema"). We'll use this name in the next section to specify that our update request handler should use this chain definition.
|
||||
|
||||
CAUTION: This chain definition will make a number of copy field rules for string fields to be created from corresponding text fields. If your data causes you to end up with a lot of copy field rules, indexing may be slowed down noticeably, and your index size will be larger. To control for these issues, it's recommended that you review the copy field rules that are created, and remove any which you do not need for faceting, sorting, highlighting, etc.
|
||||
|
||||
If you're interested in more information about the classes used in this chain, here are links to the Javadocs for update processor factories mentioned above:
|
||||
|
||||
* {solr-javadocs}/solr-core/org/apache/solr/update/processor/UUIDUpdateProcessorFactory.html[UUIDUpdateProcessorFactory]
|
||||
* {solr-javadocs}/solr-core/org/apache/solr/update/processor/RemoveBlankFieldUpdateProcessorFactory.html[RemoveBlankFieldUpdateProcessorFactory]
|
||||
|
@ -170,9 +186,13 @@ Javadocs for update processor factories mentioned above:
|
|||
* {solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.html[ParseDateFieldUpdateProcessorFactory]
|
||||
* {solr-javadocs}/solr-core/org/apache/solr/update/processor/AddSchemaFieldsUpdateProcessorFactory.html[AddSchemaFieldsUpdateProcessorFactory]
|
||||
|
||||
=== Make the UpdateRequestProcessorChain the Default for the UpdateRequestHandler
|
||||
=== Set the Default UpdateRequestProcessorChain
|
||||
|
||||
Once the UpdateRequestProcessorChain has been defined, you must instruct your UpdateRequestHandlers to use it when working with index updates (i.e., adding, removing, replacing documents). There are two ways to do this. The update chain shown above has a `default=true` attribute which will use it for any update handler. An alternative, more explicit way is to use <<initparams-in-solrconfig.adoc#initparams-in-solrconfig,InitParams>> to set the defaults on all `/update` request handlers:
|
||||
Once the UpdateRequestProcessorChain has been defined, you must instruct your UpdateRequestHandlers to use it when working with index updates (i.e., adding, removing, replacing documents).
|
||||
|
||||
There are two ways to do this. The update chain shown above has a `default=true` attribute which will use it for any update handler.
|
||||
|
||||
An alternative, more explicit way is to use <<initparams-in-solrconfig.adoc#initparams-in-solrconfig,InitParams>> to set the defaults on all `/update` request handlers:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
|
@ -183,14 +203,18 @@ Once the UpdateRequestProcessorChain has been defined, you must instruct your Up
|
|||
</initParams>
|
||||
----
|
||||
|
||||
[IMPORTANT]
|
||||
====
|
||||
After each of these changes have been made, Solr should be restarted (or, you can reload the cores to load the new `solrconfig.xml` definitions).
|
||||
====
|
||||
IMPORTANT: After all of these changes have been made, Solr should be restarted or the cores reloaded.
|
||||
|
||||
=== Disabling Automatic Field Guessing
|
||||
|
||||
Automatic field creation can be disabled with the `update.autoCreateFields` property. To do this, you can use the <<config-api.adoc#config-api,Config API>> with a command such as:
|
||||
|
||||
[source,bash]
|
||||
curl http://host:8983/solr/mycollection/config -d '{"set-user-property": {"update.autoCreateFields":"false"}}'
|
||||
|
||||
== Examples of Indexed Documents
|
||||
|
||||
Once the schemaless mode has been enabled (whether you configured it manually or are using `_default`), documents that include fields that are not defined in your schema will be indexed, using the guessed field types which are automatically added to the schema.
|
||||
Once the schemaless mode has been enabled (whether you configured it manually or are using the `_default` configset), documents that include fields that are not defined in your schema will be indexed, using the guessed field types which are automatically added to the schema.
|
||||
|
||||
For example, adding a CSV document will cause unknown fields to be added, with fieldTypes based on values:
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
= Solr Upgrade Notes
|
||||
:page-shortname: solr-upgrade-notes
|
||||
:page-permalink: solr-upgrade-notes.html
|
||||
:page-children: major-changes-in-solr-7, major-changes-from-solr-5-to-solr-6
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
@ -22,60 +23,13 @@ The following notes describe changes to Solr in recent releases that you should
|
|||
|
||||
These notes are meant to highlight the biggest changes that may impact the largest number of implementations. It is not a comprehensive list of all changes to Solr in any release.
|
||||
|
||||
When planning your Solr upgrade, consider the customizations you have made to your system and review the {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] file found in your Solr package. That file includes all of the changes and updates that may effect your existing implementation. Detailed steps for upgrading a Solr cluster can be found in the appendix: <<upgrading-a-solr-cluster.adoc#upgrading-a-solr-cluster,Upgrading a Solr Cluster>>.
|
||||
When planning your Solr upgrade, consider the customizations you have made to your system and review the {solr-javadocs}/changes/Changes.html[`CHANGES.txt`] file found in your Solr package. That file includes all of the changes and updates that may effect your existing implementation.
|
||||
|
||||
== Upgrading from 6.5.x
|
||||
Detailed steps for upgrading a Solr cluster can be found in the section <<upgrading-a-solr-cluster.adoc#upgrading-a-solr-cluster,Upgrading a Solr Cluster>>.
|
||||
|
||||
If you are already using Solr 6.5, Solr 6.6 should not present any major problems.
|
||||
== Upgrading from Any 6.x Release
|
||||
|
||||
* Solr contribs map-reduce, morphlines-core and morphlines-cell have been removed.
|
||||
|
||||
* JSON Facet API now uses hyper-log-log for numBuckets cardinality calculation and calculates cardinality before filtering buckets by any mincount greater than 1.
|
||||
|
||||
* ZooKeeper dependency has been upgraded from 3.4.6 to 3.4.10.
|
||||
|
||||
== Upgrading from earlier 6.x versions
|
||||
|
||||
* If you use historical dates, specifically on or before the year 1582, you should re-index after upgrading to this version.
|
||||
* If you use the JSON Facet API (json.facet) with `method=stream`, you must now set `sort='index asc'` to get the streaming behavior; otherwise it won't stream. Reminder: "method" is a hint that doesn't change defaults of other parameters.
|
||||
* If you use the JSON Facet API (json.facet) to facet on a numeric field and if you use `mincount=0` or if you set the prefix, then you will now get an error as these options are incompatible with numeric faceting.
|
||||
* Solr's logging verbosity at the INFO level has been greatly reduced, and you may need to update the log configs to use the DEBUG level to see all the logging messages you used to see at INFO level before.
|
||||
* We are no longer backing up `solr.log` and `solr_gc.log` files in date-stamped copies forever. If you relied on the `solr_log_<date>` or `solr_gc_log_<date>` being in the logs folder that will no longer be the case. See the section <<configuring-logging.adoc#configuring-logging,Configuring Logging>> for details on how log rotation works as of Solr 6.3.
|
||||
* The create/deleteCollection methods on MiniSolrCloudCluster have been deprecated. Clients should instead use the CollectionAdminRequest API. In addition, `MiniSolrCloudCluster#uploadConfigDir(File, String)` has been deprecated in favour of `#uploadConfigSet(Path, String)`.
|
||||
* The http://solr.in[`bin/solr.in.sh`] (http://solr.in[`bin/solr.in.cmd`] on Windows) is now completely commented by default. Previously, this wasn't so, which had the effect of masking existing environment variables.
|
||||
* The `\_version_` field is no longer indexed and is now defined with `indexed=false` by default, because the field has DocValues enabled.
|
||||
* The `/export` handler has been changed so it no longer returns zero (0) for numeric fields that are not in the original document. One consequence of this change is that you must be aware that some tuples will not have values if there were none in the original document.
|
||||
* Metrics-related classes in `org.apache.solr.util.stats` have been removed in favor of the http://metrics.dropwizard.io/3.1.0/[Dropwizard metrics library]. Any custom plugins using these classes should be changed to use the equivalent classes from the metrics library. As part of this, the following changes were made to the output of Overseer Status API:
|
||||
** The "totalTime" metric has been removed because it is no longer supported.
|
||||
** The metrics "75thPctlRequestTime", "95thPctlRequestTime", "99thPctlRequestTime" and "999thPctlRequestTime" in Overseer Status API have been renamed to "75thPcRequestTime", "95thPcRequestTime" and so on for consistency with stats output in other parts of Solr.
|
||||
** The metrics "avgRequestsPerMinute", "5minRateRequestsPerMinute" and "15minRateRequestsPerMinute" have been replaced by corresponding per-second rates viz. "avgRequestsPerSecond", "5minRateRequestsPerSecond" and "15minRateRequestsPerSecond" for consistency with stats output in other parts of Solr.
|
||||
* A new highlighter named UnifiedHighlighter has been added. You are encouraged to try out the UnifiedHighlighter by setting `hl.method=unified` and report feedback. It might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the original Highlighter. That said, some options aren't supported yet. It will get more features in time, especially with your input. See HighlightParams.java for a listing of highlight parameters annotated with which highlighters use them. `hl.useFastVectorHighlighter` is now considered deprecated in lieu of `hl.method=fastVector`.
|
||||
* The <<query-settings-in-solrconfig.adoc#query-settings-in-solrconfig,`maxWarmingSearchers` parameter>> now defaults to 1, and more importantly commits will now block if this limit is exceeded instead of throwing an exception (a good thing). Consequently there is no longer a risk in overlapping commits. Nonetheless users should continue to avoid excessive committing. Users are advised to remove any pre-existing maxWarmingSearchers entries from their solrconfig.xml files.
|
||||
* The <<other-parsers.adoc#complex-phrase-query-parser,Complex Phrase query parser>> now supports leading wildcards. Beware of its possible heaviness, users are encouraged to use ReversedWildcardFilter in index time analysis.
|
||||
* The JMX metric "avgTimePerRequest" (and the corresponding metric in the metrics API for each handler) used to be a simple non-decaying average based on total cumulative time and the number of requests. New Codahale Metrics implementation applies exponential decay to this value, which heavily biases the average towards the last 5 minutes.
|
||||
* Index-time boosts are now deprecated. As a replacement, index-time scoring factors should be indexed in a separate field and combined with the query score using a function query. These boosts will be removed in Solr 7.0.
|
||||
* Parallel SQL now uses Apache Calcite as its SQL framework. As part of this change the default aggregation mode has been changed to facet rather than map_reduce. There have also been changes to the SQL aggregate response and some SQL syntax changes. Consult the <<parallel-sql-interface.adoc#parallel-sql-interface,Parallel SQL Interface>> documentation for full details.
|
||||
|
||||
== Upgrading from 5.5.x
|
||||
|
||||
* The deprecated `SolrServer` and subclasses have been removed, use <<using-solrj.adoc#using-solrj,`SolrClient`>> instead.
|
||||
* The deprecated `<nrtMode>` configuration in <<configuring-solrconfig-xml.adoc#configuring-solrconfig-xml,`solrconfig.xml`>> has been removed. Please remove it from `solrconfig.xml`.
|
||||
* `SolrClient.shutdown()` has been removed, use {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/SolrClient.html[`SolrClient.close()`] instead.
|
||||
* The deprecated `zkCredientialsProvider` element in `solrcloud` section of `solr.xml` is now removed. Use the correct spelling (<<zookeeper-access-control.adoc#zookeeper-access-control,`zkCredentialsProvider`>>) instead.
|
||||
* Internal/expert - `ResultContext` was significantly changed and expanded to allow for multiple full query results (`DocLists`) per Solr request. `TransformContext` was rendered redundant and was removed. See https://issues.apache.org/jira/browse/SOLR-7957[SOLR-7957] for details.
|
||||
* Several changes have been made regarding the "<<other-schema-elements.adoc#similarity,`Similarity`>>" used in Solr, in order to provide better default behavior for new users. There are 3 key impacts of these changes on existing users who upgrade:
|
||||
** `DefaultSimilarityFactory` has been removed. If you currently have `DefaultSimilarityFactory` explicitly referenced in your `schema.xml`, edit your config to use the functionally identical `ClassicSimilarityFactory`. See https://issues.apache.org/jira/browse/SOLR-8239[SOLR-8239] for more details.
|
||||
** The implicit default Similarity used when no `<similarity/>` is configured in `schema.xml` has been changed to `SchemaSimilarityFactory`. Users who wish to preserve back-compatible behavior should either explicitly configure `ClassicSimilarityFactory`, or ensure that the `luceneMatchVersion` for the collection is less then 6.0. See https://issues.apache.org/jira/browse/SOLR-8270[SOLR-8270] + http://SOLR-8271[SOLR-8271] for details.
|
||||
** `SchemaSimilarityFactory` has been modified to use `BM25Similarity` as the default for `fieldTypes` that do not explicitly declare a Similarity. The legacy behavior of using `ClassicSimilarity` as the default will occur if the `luceneMatchVersion` for the collection is less then 6.0, or the `'defaultSimFromFieldType'` configuration option may be used to specify any default of your choosing. See https://issues.apache.org/jira/browse/SOLR-8261[SOLR-8261] + https://issues.apache.org/jira/browse/SOLR-8329[SOLR-8329] for more details.
|
||||
* If your `solrconfig.xml` file doesn't explicitly mention the `schemaFactory` to use then Solr will choose the `ManagedIndexSchemaFactory` by default. Previously it would have chosen `ClassicIndexSchemaFactory`. This means that the Schema APIs (`/<collection>/schema`) are enabled and the schema is mutable. When Solr starts your `schema.xml` file will be renamed to `managed-schema`. If you want to retain the old behaviour then please ensure that the `solrconfig.xml` explicitly uses the `ClassicIndexSchemaFactory` or your `luceneMatchVersion` in the `solrconfig.xml` is less than 6.0. See the <<schema-factory-definition-in-solrconfig.adoc#schema-factory-definition-in-solrconfig,Schema Factory Definition in SolrConfig>> section for more details
|
||||
* `SolrIndexSearcher.QueryCommand` and `QueryResult` were moved to their own classes. If you reference them in your code, you should import them under o.a.s.search (or use your IDE's "Organize Imports").
|
||||
* The '<<request-parameters-api.adoc#request-parameters-api,`useParams`>>' attribute specified in request handler cannot be overridden from request params. See https://issues.apache.org/jira/browse/SOLR-8698[SOLR-8698] for more details.
|
||||
* When requesting stats in date fields, "sum" is now returned as a double value instead of a date. See https://issues.apache.org/jira/browse/SOLR-8671[SOLR-8671] for more details.
|
||||
* The deprecated GET methods for schema are now accessible through the <<schema-api.adoc#schema-api,bulk API>>. These methods now accept fewer request parameters, and output less information. See https://issues.apache.org/jira/browse/SOLR-8736[SOLR-8736] for more details. Some of the removed functionality will likely be restored in a future version of Solr - see https://issues.apache.org/jira/browse/SOLR-8992[SOLR-8992].
|
||||
* In the past, Solr guaranteed that retrieval of multi-valued fields would preserve the order of values. Because values may now be retrieved from column-stored fields (`docValues="true"`), in conjunction with the fact that <<docvalues.adoc#docvalues,DocValues>> do not currently preserve order, means that users should set <<defining-fields.adoc#defining-fields,`useDocValuesAsStored="false"`>> to prevent future optimizations from using the column-stored values over the row-stored values when fields have both `stored="true"` and `docValues="true"`.
|
||||
* <<working-with-dates.adoc#working-with-dates,Formatted date-times from Solr>> have some differences. If the year is more than 4 digits, there is a leading '+'. When there is a non-zero number of milliseconds, it is padded with zeros to 3 digits. Negative year (BC) dates are now possible. Parsing: It is now an error to supply a portion of the date out of its, range, like 67 seconds.
|
||||
* <<using-solrj.adoc#using-solrj,SolrJ>> no longer includes `DateUtil`. If for some reason you need to format or parse dates, simply use `Instant.format()` and `Instant.parse()`.
|
||||
* If you are using spatial4j, please upgrade to 0.6 and <<spatial-search.adoc#spatial-search,edit your `spatialContextFactory`>> to replace `com.spatial4j.core` with `org.locationtech.spatial4j` .
|
||||
The upgrade from Solr 6.x to Solr 7 introduces several *major* changes that you should be aware of before upgrading. Please do a thorough review of the section <<major-changes-in-solr-7.adoc#major-changes-in-solr-7,Major Changes in Solr 7>> before starting your upgrade.
|
||||
|
||||
== Upgrading from Older Versions of Solr
|
||||
|
||||
|
|
|
@ -61,7 +61,9 @@ See the section <<solrcloud-autoscaling-api.adoc#create-and-modify-cluster-prefe
|
|||
The following shows the default cluster preferences. This is applied automatically by Solr when no explicit cluster preferences have been set using the <<solrcloud-autoscaling-api.adoc#solrcloud-autoscaling-api,Autoscaling API>>.
|
||||
|
||||
[source,json]
|
||||
[{"minimize":"cores"}]
|
||||
[
|
||||
{"minimize":"cores"}
|
||||
]
|
||||
|
||||
==== Minimize Cores; Maximize Free Disk
|
||||
In this example, we want to minimize the number of Solr cores and in case of a tie, maximize the amount of free disk space on each node.
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
= SolrCloud Query Routing And Read Tolerance
|
||||
:page-shortname: solrcloud-query-routing-and-read-tolerance
|
||||
:page-permalink: solrcloud-query-routing-and-read-tolerance.html
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
SolrCloud is highly available and fault tolerant in reads and writes.
|
||||
|
||||
|
||||
== Read Side Fault Tolerance
|
||||
|
||||
In a SolrCloud cluster each individual node load balances read requests across all the replicas in collection. You still need a load balancer on the 'outside' that talks to the cluster, or you need a smart client which understands how to read and interact with Solr's metadata in ZooKeeper and only requests the ZooKeeper ensemble's address to start discovering to which nodes it should send requests. (Solr provides a smart Java SolrJ client called {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/impl/CloudSolrClient.html[CloudSolrClient].)
|
||||
|
||||
Even if some nodes in the cluster are offline or unreachable, a Solr node will be able to correctly respond to a search request as long as it can communicate with at least one replica of every shard, or one replica of every _relevant_ shard if the user limited the search via the `shards` or `\_route_` parameters. The more replicas there are of every shard, the more likely that the Solr cluster will be able to handle search results in the event of node failures.
|
||||
|
||||
=== zkConnected
|
||||
|
||||
A Solr node will return the results of a search request as long as it can communicate with at least one replica of every shard that it knows about, even if it can _not_ communicate with ZooKeeper at the time it receives the request. This is normally the preferred behavior from a fault tolerance standpoint, but may result in stale or incorrect results if there have been major changes to the collection structure that the node has not been informed of via ZooKeeper (i.e., shards may have been added or removed, or split into sub-shards)
|
||||
|
||||
A `zkConnected` header is included in every search response indicating if the node that processed the request was connected with ZooKeeper at the time:
|
||||
|
||||
.Solr Response with partialResults
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
"responseHeader": {
|
||||
"status": 0,
|
||||
"zkConnected": true,
|
||||
"QTime": 20,
|
||||
"params": {
|
||||
"q": "*:*"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"numFound": 107,
|
||||
"start": 0,
|
||||
"docs": [ "..." ]
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
=== shards.tolerant
|
||||
|
||||
In the event that one or more shards queried are completely unavailable, then Solr's default behavior is to fail the request. However, there are many use-cases where partial results are acceptable and so Solr provides a boolean `shards.tolerant` parameter (default `false`).
|
||||
|
||||
If `shards.tolerant=true` then partial results may be returned. If the returned response does not contain results from all the appropriate shards then the response header contains a special flag called `partialResults`.
|
||||
|
||||
The client can specify '<<distributed-search-with-index-sharding.adoc#distributed-search-with-index-sharding,`shards.info`>>' along with the `shards.tolerant` parameter to retrieve more fine-grained details.
|
||||
|
||||
Example response with `partialResults` flag set to 'true':
|
||||
|
||||
*Solr Response with partialResults*
|
||||
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
"responseHeader": {
|
||||
"status": 0,
|
||||
"zkConnected": true,
|
||||
"partialResults": true,
|
||||
"QTime": 20,
|
||||
"params": {
|
||||
"q": "*:*"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"numFound": 77,
|
||||
"start": 0,
|
||||
"docs": [ "..." ]
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
= Read and Write Side Fault Tolerance
|
||||
:page-shortname: read-and-write-side-fault-tolerance
|
||||
:page-permalink: read-and-write-side-fault-tolerance.html
|
||||
= SolrCloud Recoveries and Write Tolerance
|
||||
:page-shortname: solrcloud-recoveries-and-write-tolerance
|
||||
:page-permalink: solrcloud-recoveries-and-write-tolerance.html
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
@ -18,73 +18,7 @@
|
|||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
SolrCloud supports elasticity, high availability, and fault tolerance in reads and writes.
|
||||
|
||||
What this means, basically, is that when you have a large cluster, you can always make requests to the cluster: Reads will return results whenever possible, even if some nodes are down, and Writes will be acknowledged only if they are durable; i.e., you won't lose data.
|
||||
|
||||
== Read Side Fault Tolerance
|
||||
|
||||
In a SolrCloud cluster each individual node load balances read requests across all the replicas in collection. You still need a load balancer on the 'outside' that talks to the cluster, or you need a smart client which understands how to read and interact with Solr's metadata in ZooKeeper and only requests the ZooKeeper ensemble's address to start discovering to which nodes it should send requests. (Solr provides a smart Java SolrJ client called {solr-javadocs}/solr-solrj/org/apache/solr/client/solrj/impl/CloudSolrClient.html[CloudSolrClient].)
|
||||
|
||||
Even if some nodes in the cluster are offline or unreachable, a Solr node will be able to correctly respond to a search request as long as it can communicate with at least one replica of every shard, or one replica of every _relevant_ shard if the user limited the search via the `shards` or `\_route_` parameters. The more replicas there are of every shard, the more likely that the Solr cluster will be able to handle search results in the event of node failures.
|
||||
|
||||
=== zkConnected
|
||||
|
||||
A Solr node will return the results of a search request as long as it can communicate with at least one replica of every shard that it knows about, even if it can _not_ communicate with ZooKeeper at the time it receives the request. This is normally the preferred behavior from a fault tolerance standpoint, but may result in stale or incorrect results if there have been major changes to the collection structure that the node has not been informed of via ZooKeeper (i.e., shards may have been added or removed, or split into sub-shards)
|
||||
|
||||
A `zkConnected` header is included in every search response indicating if the node that processed the request was connected with ZooKeeper at the time:
|
||||
|
||||
.Solr Response with partialResults
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
"responseHeader": {
|
||||
"status": 0,
|
||||
"zkConnected": true,
|
||||
"QTime": 20,
|
||||
"params": {
|
||||
"q": "*:*"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"numFound": 107,
|
||||
"start": 0,
|
||||
"docs": [ "..." ]
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
=== shards.tolerant
|
||||
|
||||
In the event that one or more shards queried are completely unavailable, then Solr's default behavior is to fail the request. However, there are many use-cases where partial results are acceptable and so Solr provides a boolean `shards.tolerant` parameter (default `false`).
|
||||
|
||||
If `shards.tolerant=true` then partial results may be returned. If the returned response does not contain results from all the appropriate shards then the response header contains a special flag called `partialResults`.
|
||||
|
||||
The client can specify '<<distributed-search-with-index-sharding.adoc#distributed-search-with-index-sharding,`shards.info`>>' along with the `shards.tolerant` parameter to retrieve more fine-grained details.
|
||||
|
||||
Example response with `partialResults` flag set to 'true':
|
||||
|
||||
*Solr Response with partialResults*
|
||||
|
||||
[source,json]
|
||||
----
|
||||
{
|
||||
"responseHeader": {
|
||||
"status": 0,
|
||||
"zkConnected": true,
|
||||
"partialResults": true,
|
||||
"QTime": 20,
|
||||
"params": {
|
||||
"q": "*:*"
|
||||
}
|
||||
},
|
||||
"response": {
|
||||
"numFound": 77,
|
||||
"start": 0,
|
||||
"docs": [ "..." ]
|
||||
}
|
||||
}
|
||||
----
|
||||
SolrCloud is highly available and fault tolerant in reads and writes.
|
||||
|
||||
== Write Side Fault Tolerance
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
= SolrCloud Resilience
|
||||
:page-shortname: solrcloud-resilience
|
||||
:page-permalink: solrcloud-resilience.html
|
||||
:page-children: solrcloud-recoveries-and-write-tolerance, solrcloud-query-routing-and-read-tolerance
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
In this section, we'll cover how does Solr handle reads and writes when all the nodes in the cluster are not healthy
|
||||
|
||||
The following sections cover these topics:
|
||||
|
||||
* <<solrcloud-recoveries-and-write-tolerance.adoc#solrcloud-recoveries-and-write-tolerance,SolrCloud Recoveries and Write Tolerance>>
|
||||
* <<solrcloud-query-routing-and-read-tolerance.adoc#solrcloud-query-routing-and-read-tolerance,SolrCloud Query Routing And Read Tolerance>>
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
= SolrCloud
|
||||
:page-shortname: solrcloud
|
||||
:page-permalink: solrcloud.html
|
||||
:page-children: getting-started-with-solrcloud, how-solrcloud-works, solrcloud-configuration-and-parameters, rule-based-replica-placement, cross-data-center-replication-cdcr, solrcloud-autoscaling
|
||||
:page-children: getting-started-with-solrcloud, how-solrcloud-works, solrcloud-resilience, solrcloud-configuration-and-parameters, rule-based-replica-placement, cross-data-center-replication-cdcr, solrcloud-autoscaling
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
|
@ -33,7 +33,9 @@ In this section, we'll cover everything you need to know about using Solr in Sol
|
|||
* <<how-solrcloud-works.adoc#how-solrcloud-works,How SolrCloud Works>>
|
||||
** <<shards-and-indexing-data-in-solrcloud.adoc#shards-and-indexing-data-in-solrcloud,Shards and Indexing Data in SolrCloud>>
|
||||
** <<distributed-requests.adoc#distributed-requests,Distributed Requests>>
|
||||
** <<read-and-write-side-fault-tolerance.adoc#read-and-write-side-fault-tolerance,Read and Write Side Fault Tolerance>>
|
||||
* <<solrcloud-resilience.adoc#solrcloud-resilience,SolrCloud Resilience>>
|
||||
** <<solrcloud-recoveries-and-write-tolerance.adoc#solrcloud-recoveries-and-write-tolerance,SolrCloud Recoveries and Write Tolerance>>
|
||||
** <<solrcloud-query-routing-and-read-tolerance.adoc#solrcloud-query-routing-and-read-tolerance,SolrCloud Query Routing And Read Tolerance>>
|
||||
* <<solrcloud-configuration-and-parameters.adoc#solrcloud-configuration-and-parameters,SolrCloud Configuration and Parameters>>
|
||||
** <<setting-up-an-external-zookeeper-ensemble.adoc#setting-up-an-external-zookeeper-ensemble,Setting Up an External ZooKeeper Ensemble>>
|
||||
** <<using-zookeeper-to-manage-configuration-files.adoc#using-zookeeper-to-manage-configuration-files,Using ZooKeeper to Manage Configuration Files>>
|
||||
|
|
|
@ -354,9 +354,10 @@ The `BBoxField` field type indexes a single rectangle (bounding box) per documen
|
|||
[source,xml]
|
||||
----
|
||||
<field name="bbox" type="bbox" />
|
||||
|
||||
<fieldType name="bbox" class="solr.BBoxField"
|
||||
geo="true" units="kilometers" numberType="_bbox_coord" storeSubFields="false"/>
|
||||
<fieldType name="_bbox_coord" class="solr.TrieDoubleField" precisionStep="8" docValues="true" stored="false"/>
|
||||
geo="true" distanceUnits="kilometers" numberType="pdouble" />
|
||||
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
|
||||
----
|
||||
|
||||
BBoxField is actually based off of 4 instances of another field type referred to by numberType. It also uses a boolean to flag a dateline cross. Assuming you want to use the relevancy feature, docValues is required. Some of the attributes are in common with the RPT field like geo, units, worldBounds, and spatialContextFactory because they share some of the same spatial infrastructure.
|
||||
|
|
|
@ -22,10 +22,10 @@ The Extended DisMax (eDisMax) query parser is an improved version of the <<the-d
|
|||
|
||||
In addition to supporting all the DisMax query parser parameters, Extended Dismax:
|
||||
|
||||
* supports the <<the-standard-query-parser.adoc#the-standard-query-parser,full Lucene query parser syntax>>.
|
||||
* supports queries such as AND, OR, NOT, -, and +.
|
||||
* optionally treats "and" and "or" as "AND" and "OR" in Lucene syntax mode.
|
||||
* respects the 'magic field' names `\_val_` and `\_query_`. These are not a real fields in the Schema, but if used it helps do special things (like a function query in the case of `\_val_` or a nested query in the case of `\_query_`). If `\_val_` is used in a term or phrase query, the value is parsed as a function.
|
||||
* supports the full Lucene query parser syntax with the same enhancements as <<the-standard-query-parser.adoc#the-standard-query-parser,Solr's standard query parser>>.
|
||||
** supports queries such as AND, OR, NOT, -, and +.
|
||||
** optionally treats "and" and "or" as "AND" and "OR" in Lucene syntax mode.
|
||||
** respects the 'magic field' names `\_val_` and `\_query_`. These are not a real fields in the Schema, but if used it helps do special things (like a function query in the case of `\_val_` or a nested query in the case of `\_query_`). If `\_val_` is used in a term or phrase query, the value is parsed as a function.
|
||||
* includes improved smart partial escaping in the case of syntax errors; fielded queries, +/-, and phrase queries are still supported in this mode.
|
||||
* improves proximity boosting by using word shingles; you do not need the query to match all words in the document before proximity boosting is applied.
|
||||
* includes advanced stopword handling: stopwords are not required in the mandatory part of the query but are still used in the proximity boosting part. If a query consists of all stopwords, such as "to be or not to be", then all words are required.
|
||||
|
@ -218,16 +218,3 @@ _val_:"recip(rord(myfield),1,2,3)"
|
|||
_query_:"{!dismax qf=myfield}how now brown cow"
|
||||
----
|
||||
|
||||
Although not technically a syntax difference, note that if you use the Solr {solr-javadocs}/solr-core/org/apache/solr/schema/TrieDateField.html[`TrieDateField`] type, any queries on those fields (typically range queries) should use either the Complete ISO 8601 Date syntax that field supports, or the {solr-javadocs}/solr-core/org/apache/solr/util/DateMathParser.html[DateMath Syntax] to get relative dates. For example:
|
||||
|
||||
[source,text]
|
||||
----
|
||||
timestamp:[* TO NOW]
|
||||
createdate:[1976-03-06T23:59:59.999Z TO *]
|
||||
createdate:[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]
|
||||
pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY]
|
||||
createdate:[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]
|
||||
createdate:[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]
|
||||
----
|
||||
|
||||
IMPORTANT: `TO` must be uppercase, or Solr will report a 'Range Group' error.
|
||||
|
|
|
@ -18,11 +18,11 @@
|
|||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
The https://wiki.apache.org/solr/QueryElevationComponent[Query Elevation Component] lets you configure the top results for a given query regardless of the normal Lucene scoring.
|
||||
The Query Elevation Component lets you configure the top results for a given query regardless of the normal Lucene scoring.
|
||||
|
||||
This is sometimes called "sponsored search," "editorial boosting," or "best bets." This component matches the user query text to a configured map of top results. The text can be any string or non-string IDs, as long as it's indexed. Although this component will work with any QueryParser, it makes the most sense to use with <<the-dismax-query-parser.adoc#the-dismax-query-parser,DisMax>> or <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,eDisMax>>.
|
||||
This is sometimes called "sponsored search", "editorial boosting", or "best bets." This component matches the user query text to a configured map of top results. The text can be any string or non-string IDs, as long as it's indexed. Although this component will work with any QueryParser, it makes the most sense to use with <<the-dismax-query-parser.adoc#the-dismax-query-parser,DisMax>> or <<the-extended-dismax-query-parser.adoc#the-extended-dismax-query-parser,eDisMax>>.
|
||||
|
||||
The https://wiki.apache.org/solr/QueryElevationComponent[Query Elevation Component] is supported by distributed searching.
|
||||
The Query Elevation Component also supports distributed searching.
|
||||
|
||||
All of the sample configuration and queries used in this section assume you are running Solr's "```techproducts```" example:
|
||||
|
||||
|
@ -71,9 +71,9 @@ Path to the file that defines query elevation. This file must exist in `<instanc
|
|||
`forceElevation`::
|
||||
By default, this component respects the requested `sort` parameter: if the request asks to sort by date, it will order the results by date. If `forceElevation=true` (the default), results will first return the boosted docs, then order by date.
|
||||
|
||||
=== elevate.xml
|
||||
=== The elevate.xml File
|
||||
|
||||
Elevated query results are configured in an external XML file specified in the `config-file` argument. An `elevate.xml` file might look like this:
|
||||
Elevated query results can be configured in an external XML file specified in the `config-file` argument. An `elevate.xml` file might look like this:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
|
@ -93,6 +93,8 @@ Elevated query results are configured in an external XML file specified in the `
|
|||
|
||||
In this example, the query "foo bar" would first return documents 1, 2 and 3, then whatever normally appears for the same query. For the query "ipod", it would first return "MA147LL/A", and would make sure that "IW-02" is not in the result set.
|
||||
|
||||
If documents to be elevated are not defined in the `elevate.xml` file, they should be passed in at query time with the <<The elevateIds and excludeIds Parameters,`elevateIds` parameter>>.
|
||||
|
||||
== Using the Query Elevation Component
|
||||
|
||||
=== The enableElevation Parameter
|
||||
|
|
|
@ -350,11 +350,13 @@ This can even be used to cache individual clauses of complex filter queries. In
|
|||
|
||||
=== Specifying Dates and Times
|
||||
|
||||
Queries against fields using the `TrieDateField` type (typically range queries) should use the <<working-with-dates.adoc#working-with-dates,appropriate date syntax>>:
|
||||
Queries against date based fields must use the <<working-with-dates.adoc#working-with-dates,appropriate date formating>>. Queries for exact date values will require quoting or escaping since `:` is the parser syntax used to denote a field query:
|
||||
|
||||
* `timestamp:[* TO NOW]`
|
||||
* `createdate:1976-03-06T23\:59\:59.999Z`
|
||||
* `createdate:"1976-03-06T23:59:59.999Z"`
|
||||
* `createdate:[1976-03-06T23:59:59.999Z TO *]`
|
||||
* `createdate:[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]`
|
||||
* `timestamp:[* TO NOW]`
|
||||
* `pubdate:[NOW-1YEAR/DAY TO NOW/DAY+1DAY]`
|
||||
* `createdate:[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]`
|
||||
* `createdate:[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]`
|
||||
|
|
|
@ -142,7 +142,7 @@ However executing a processor only on the forwarding nodes is a great way of dis
|
|||
.Custom update chain post-processors may never be invoked on a recovering replica
|
||||
[WARNING]
|
||||
====
|
||||
While a replica is in <<read-and-write-side-fault-tolerance.adoc#write-side-fault-tolerance,recovery>>, inbound update requests are buffered to the transaction log. After recovery has completed successfully, those buffered update requests are replayed. As of this writing, however, custom update chain post-processors are never invoked for buffered update requests. See https://issues.apache.org/jira/browse/SOLR-8030[SOLR-8030]. To work around this problem until SOLR-8030 has been fixed, *avoid specifying post-processors in custom update chains*.
|
||||
While a replica is in <<solrcloud-recoveries-and-write-tolerance.adoc#solrcloud-recoveries-and-write-tolerance,recovery>>, inbound update requests are buffered to the transaction log. After recovery has completed successfully, those buffered update requests are replayed. As of this writing, however, custom update chain post-processors are never invoked for buffered update requests. See https://issues.apache.org/jira/browse/SOLR-8030[SOLR-8030]. To work around this problem until SOLR-8030 has been fixed, *avoid specifying post-processors in custom update chains*.
|
||||
====
|
||||
|
||||
=== Atomic Update Processor Factory
|
||||
|
|
|
@ -121,7 +121,7 @@ Realtime Get currently relies on the update log feature, which is enabled by def
|
|||
</updateLog>
|
||||
----
|
||||
|
||||
Three additional expert-level configuration settings affect indexing performance and how far a replica can fall behind on updates before it must enter into full recovery - see the section on <<read-and-write-side-fault-tolerance.adoc#write-side-fault-tolerance,write side fault tolerance>> for more information:
|
||||
Three additional expert-level configuration settings affect indexing performance and how far a replica can fall behind on updates before it must enter into full recovery - see the section on <<solrcloud-recoveries-and-write-tolerance.adoc#solrcloud-recoveries-and-write-tolerance,write side fault tolerance>> for more information:
|
||||
|
||||
`numRecordsToKeep`::
|
||||
The number of update records to keep per log. The default is `100`.
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue