Merge remote-tracking branch 'origin/master'

This commit is contained in:
Noble Paul 2016-09-13 17:19:33 +05:30
commit 652065e14e
90 changed files with 3463 additions and 2126 deletions

View File

@ -7,6 +7,8 @@
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />

View File

@ -61,6 +61,11 @@
<directory>${module-path}/src/resources</directory>
</resource>
</resources>
<testResources>
<testResource>
<directory>${module-path}/src/test-files</directory>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>

View File

@ -86,16 +86,24 @@ def check_url_list(lst):
if mirror_contains_file(url):
p('.')
else:
p('X')
p('\nFAIL: ' + url + '\n' if args.details else 'X')
ret.append(url)
return ret
parser = argparse.ArgumentParser(description='Checks that all Lucene mirrors contain a copy of a release')
parser.add_argument('-version', '-v', help='Lucene version to check', required=True)
parser.add_argument('-interval', '-i', help='seconds to wait to query again pending mirrors', type=int, default=300)
desc = 'Periodically checks that all Lucene/Solr mirrors contain either a copy of a release or a specified path'
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('-version', '-v', help='Lucene/Solr version to check')
parser.add_argument('-path', '-p', help='instead of a versioned release, check for some/explicit/path')
parser.add_argument('-interval', '-i', help='seconds to wait before re-querying mirrors', type=int, default=300)
parser.add_argument('-details', '-d', help='print missing mirror URLs', action='store_true', default=False)
args = parser.parse_args()
if (args.version is None and args.path is None) \
or (args.version is not None and args.path is not None):
p('You must specify either -version or -path but not both!\n')
sys.exit(1)
try:
conn = http.HTTPConnection('www.apache.org')
conn.request('GET', '/mirrors/')
@ -105,9 +113,9 @@ except Exception as e:
p('Unable to fetch the Apache mirrors list!\n')
sys.exit(1)
apache_path = 'lucene/java/{}/changes/Changes.html'.format(args.version);
maven_url = 'http://repo1.maven.org/maven2/' \
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
mirror_path = args.path if args.path is not None else 'lucene/java/{}/changes/Changes.html'.format(args.version)
maven_url = None if args.version is None else 'http://repo1.maven.org/maven2/' \
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
maven_available = False
pending_mirrors = []
@ -119,18 +127,19 @@ for match in re.finditer('<TR>(.*?)</TR>', str(html), re.MULTILINE | re.IGNORECA
match = re.search('<A\s+HREF\s*=\s*"([^"]+)"\s*>', row, re.MULTILINE | re.IGNORECASE)
if match:
pending_mirrors.append(match.group(1) + apache_path)
pending_mirrors.append(match.group(1) + mirror_path)
total_mirrors = len(pending_mirrors)
label = args.version if args.version is not None else args.path
while True:
p('\n' + str(datetime.datetime.now()))
p('\n{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
p('\nPolling {} Apache Mirrors'.format(len(pending_mirrors)))
if not maven_available:
if maven_url is not None and not maven_available:
p(' and Maven Central')
p('...\n')
if not maven_available:
if maven_url is not None and not maven_available:
maven_available = mirror_contains_file(maven_url)
start = time.time()
@ -140,14 +149,14 @@ while True:
available_mirrors = total_mirrors - len(pending_mirrors)
p('\n\n{} is{}downloadable from Maven Central\n'.format(args.version, maven_available and ' ' or ' not '))
p('{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'.format(args.version, available_mirrors,
total_mirrors,
available_mirrors * 100 / total_mirrors))
if maven_url is not None:
p('\n\n{} is{}downloadable from Maven Central'.format(label, ' ' if maven_available else ' not '))
p('\n{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'
.format(label, available_mirrors, total_mirrors, available_mirrors * 100 / total_mirrors))
if len(pending_mirrors) == 0:
break
if remaining > 0:
p('Sleeping for {} seconds...\n'.format(remaining))
p('Sleeping for {:d} seconds...\n'.format(int(remaining + 0.5)))
time.sleep(remaining)

View File

@ -36,7 +36,36 @@ Other
======================= Lucene 6.3.0 =======================
(No changes)
API Changes
* LUCENE-7436: MinHashFilter's constructor, and some of its default
settings, should be public. (Doug Turnbull via Mike McCandless)
New Features
Bug Fixes
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
term. (Thomas Kappler via David Smiley)
* LUCENE-7440: Document id skipping (PostingsEnum.advance) could throw an
ArrayIndexOutOfBoundsException exception on large index segments (>1.8B docs)
with large skips. (yonik)
* LUCENE-7442: MinHashFilter's ctor should validate its args.
(Cao Manh Dat via Steve Rowe)
* LUCENE-7318: Fix backwards compatibility issues around StandardAnalyzer
and its components, introduced with Lucene 6.2.0. The moved classes
were restored in their original packages: LowercaseFilter and StopFilter,
as well as several utility classes. (Uwe Schindler, Mike McCandless)
Improvements
Optimizations
Other
======================= Lucene 6.2.0 =======================
@ -632,6 +661,9 @@ Other
* LUCENE-7095: Add point values support to the numeric field query time join.
(Martijn van Groningen, Mike McCandless)
======================= Lucene 5.5.3 =======================
(No Changes)
======================= Lucene 5.5.2 =======================
Bug Fixes

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.TokenStream;
/**
* Normalizes token text to lower case.
* <p>
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
* is preserved for documentation purposes and consistency with filter factory.
* @see org.apache.lucene.analysis.LowerCaseFilter
* @see LowerCaseFilterFactory
*/
public final class LowerCaseFilter extends org.apache.lucene.analysis.LowerCaseFilter {
/**
* Create a new LowerCaseFilter, that normalizes token text to lower case.
*
* @param in TokenStream to filter
*/
public LowerCaseFilter(TokenStream in) {
super(in);
}
}

View File

@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
import java.util.Map;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
/**
* Removes stop words from a token stream.
* <p>
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
* is preserved for documentation purposes and consistency with filter factory.
* @see org.apache.lucene.analysis.StopFilter
* @see StopFilterFactory
*/
public final class StopFilter extends org.apache.lucene.analysis.StopFilter {
/**
* Constructs a filter which removes words from the input TokenStream that are
* named in the Set.
*
* @param in
* Input stream
* @param stopWords
* A {@link CharArraySet} representing the stopwords.
* @see #makeStopSet(java.lang.String...)
*/
public StopFilter(TokenStream in, CharArraySet stopWords) {
super(in, stopWords);
}
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Map;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WordlistLoader; // jdocs
import org.apache.lucene.analysis.util.ResourceLoader;

View File

@ -49,11 +49,11 @@ public class MinHashFilter extends TokenFilter {
private static final LongPair[] cachedIntHashes = new LongPair[HASH_CACHE_SIZE];
static final int DEFAULT_HASH_COUNT = 1;
public static final int DEFAULT_HASH_COUNT = 1;
static final int DEFAULT_HASH_SET_SIZE = 1;
public static final int DEFAULT_HASH_SET_SIZE = 1;
static final int DEFAULT_BUCKET_COUNT = 512;
public static final int DEFAULT_BUCKET_COUNT = 512;
static final String MIN_HASH_TYPE = "MIN_HASH";
@ -112,8 +112,17 @@ public class MinHashFilter extends TokenFilter {
* @param hashSetSize the no. of min hashes to keep
* @param withRotation whether rotate or not hashes while incrementing tokens
*/
MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
public MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
super(input);
if (hashCount <= 0) {
throw new IllegalArgumentException("hashCount must be greater than zero");
}
if (bucketCount <= 0) {
throw new IllegalArgumentException("bucketCount must be greater than zero");
}
if (hashSetSize <= 0) {
throw new IllegalArgumentException("hashSetSize must be greater than zero");
}
this.hashCount = hashCount;
this.bucketCount = bucketCount;
this.hashSetSize = hashSetSize;

View File

@ -46,5 +46,9 @@
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
</li>
</ul>
<p>
This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
The factories for those components (e.g., used in Solr) are still part of this module.
</body>
</html>

View File

@ -106,7 +106,9 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
SnowballFilter.class, // this is called SnowballPorterFilterFactory
PatternKeywordMarkerFilter.class,
SetKeywordMarkerFilter.class,
UnicodeWhitespaceTokenizer.class // a supported option via WhitespaceTokenizerFactory
UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
org.apache.lucene.analysis.StopFilter.class, // class from core, but StopFilterFactory creates one from this module
org.apache.lucene.analysis.LowerCaseFilter.class // class from core, but LowerCaseFilterFactory creates one from this module
);
}

View File

@ -166,7 +166,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// also randomly pick it:
ValidatingTokenFilter.class,
// TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or similar following will then cause pain)
WordDelimiterFilter.class)) {
WordDelimiterFilter.class,
// clones of core's filters:
org.apache.lucene.analysis.core.StopFilter.class,
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
for (Constructor<?> ctor : c.getConstructors()) {
brokenConstructors.put(ctor, ALWAYS);
}

View File

@ -357,7 +357,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
"5.5.1-cfs",
"5.5.1-nocfs",
"5.5.2-cfs",
"5.5.2-nocfs"
"5.5.2-nocfs",
"5.5.3-cfs",
"5.5.3-nocfs"
};
// TODO: on 6.0.0 release, gen the single segment indices and add here:

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.CharacterUtils;
/**
* Normalizes token text to lower case.
*/
public final class LowerCaseFilter extends TokenFilter {
public class LowerCaseFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**

View File

@ -28,7 +28,7 @@ import org.apache.lucene.analysis.CharArraySet;
/**
* Removes stop words from a token stream.
*/
public final class StopFilter extends FilteringTokenFilter {
public class StopFilter extends FilteringTokenFilter {
private final CharArraySet stopWords;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

View File

@ -63,7 +63,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
/** skipInterval of each level. */
private int skipInterval[];
/** Number of docs skipped per level. */
/** Number of docs skipped per level.
* It's possible for some values to overflow a signed int, but this has been accounted for.
*/
private int[] numSkipped;
/** Doc id of current skip entry per level. */
@ -150,8 +152,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
setLastSkipData(level);
numSkipped[level] += skipInterval[level];
if (numSkipped[level] > docCount) {
// numSkipped may overflow a signed int, so compare as unsigned.
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
// this skip list is exhausted
skipDoc[level] = Integer.MAX_VALUE;
if (numberOfSkipLevels > level) numberOfSkipLevels = level;

View File

@ -429,12 +429,10 @@ public abstract class LogMergePolicy extends MergePolicy {
private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
SegmentCommitInfo info;
float level;
int index;
public SegmentInfoAndLevel(SegmentCommitInfo info, float level, int index) {
public SegmentInfoAndLevel(SegmentCommitInfo info, float level) {
this.info = info;
this.level = level;
this.index = index;
}
// Sorts largest to smallest
@ -475,7 +473,7 @@ public abstract class LogMergePolicy extends MergePolicy {
size = 1;
}
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm);
levels.add(infoLevel);
if (verbose(writer)) {

View File

@ -31,7 +31,7 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
* though you can explicitly choose classic Levenshtein by passing <code>false</code>
* to the <code>transpositions</code> parameter.
*
* <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
* <p>This query uses {@link MultiTermQuery.TopTermsBlendedFreqScoringRewrite}
* as default. So terms will be collected and scored according to their
* edit distance. Only the top terms are used for building the {@link BooleanQuery}.
* It is not recommended to change the rewrite mode for fuzzy queries.

View File

@ -17,12 +17,7 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
@ -35,10 +30,12 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import java.io.IOException;
import java.util.Arrays;
/** Subclass of TermsEnum for enumerating all terms that are similar
* to the specified filter term.
*
@ -46,38 +43,46 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
* {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
*/
public class FuzzyTermsEnum extends TermsEnum {
public final class FuzzyTermsEnum extends TermsEnum {
// NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum:
private TermsEnum actualEnum;
private BoostAttribute actualBoostAtt;
private final BoostAttribute boostAtt =
attributes().addAttribute(BoostAttribute.class);
// We use this to communicate the score (boost) of the current matched term we are on back to
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite that is collecting the best (default 50) matched terms:
private final BoostAttribute boostAtt;
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite tells us the worst boost still in its queue using this att,
// which we use to know when we can reduce the automaton from ed=2 to ed=1, or ed=0 if only single top term is collected:
private final MaxNonCompetitiveBoostAttribute maxBoostAtt;
// We use this to share the pre-built (once for the query) Levenshtein automata across segments:
private final LevenshteinAutomataAttribute dfaAtt;
private float bottom;
private BytesRef bottomTerm;
protected final float minSimilarity;
protected final float scale_factor;
protected final int termLength;
protected int maxEdits;
protected final boolean raw;
private final CompiledAutomaton automata[];
protected final Terms terms;
private final Term term;
protected final int termText[];
protected final int realPrefixLength;
private final boolean transpositions;
private BytesRef queuedBottom;
final int termLength;
// Maximum number of edits we will accept. This is either 2 or 1 (or, degenerately, 0) passed by the user originally,
// but as we collect terms, we can lower this (e.g. from 2 to 1) if we detect that the term queue is full, and all
// collected terms are ed=1:
private int maxEdits;
final Terms terms;
final Term term;
final int termText[];
final int realPrefixLength;
// True (the default, in FuzzyQuery) if a transposition should count as a single edit:
final boolean transpositions;
/**
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
* <code>minSimilarity</code>.
* length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
* <p>
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
@ -87,105 +92,88 @@ public class FuzzyTermsEnum extends TermsEnum {
* thats contains information about competitive boosts during rewrite. It is also used
* to cache DFAs between segment transitions.
* @param term Pattern term.
* @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
* representing edit distance. Passing a fraction is deprecated.
* @param maxEdits Maximum edit distance.
* @param prefixLength Length of required common prefix. Default value is 0.
* @throws IOException if there is a low-level IO error
*/
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
throw new IllegalArgumentException("fractional edit distances are not allowed");
if (minSimilarity < 0.0f)
throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
if(prefixLength < 0)
final int maxEdits, final int prefixLength, boolean transpositions) throws IOException {
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("max edits must be 0.." + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got: " + maxEdits);
}
if (prefixLength < 0) {
throw new IllegalArgumentException("prefixLength cannot be less than 0");
}
this.maxEdits = maxEdits;
this.terms = terms;
this.term = term;
// convert the string into a utf32 int[] representation for fast comparisons
final String utf16 = term.text();
this.termText = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
termText[j++] = cp = utf16.codePointAt(i);
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
termText[j++] = cp = utf16.codePointAt(i);
}
this.termLength = termText.length;
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
// NOTE: boostAtt must pulled from attributes() not from atts! This is because TopTermsRewrite looks for boostAtt from this TermsEnum's
// private attributes() and not the global atts passed to us from MultiTermQuery:
this.boostAtt = attributes().addAttribute(BoostAttribute.class);
//The prefix could be longer than the word.
//It's kind of silly though. It means we must match the entire word.
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
// if minSimilarity >= 1, we treat it as number of edits
if (minSimilarity >= 1f) {
this.minSimilarity = 0; // just driven by number of edits
maxEdits = (int) minSimilarity;
raw = true;
} else {
this.minSimilarity = minSimilarity;
// calculate the maximum k edits for this similarity
maxEdits = initialMaxDistance(this.minSimilarity, termLength);
raw = false;
}
if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new UnsupportedOperationException("with transpositions enabled, distances > "
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
}
this.transpositions = transpositions;
this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
CompiledAutomaton[] prevAutomata = dfaAtt.automata();
if (prevAutomata == null) {
prevAutomata = new CompiledAutomaton[maxEdits+1];
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
for (int i = 0; i <= maxEdits; i++) {
Automaton a = builder.toAutomaton(i, prefix);
prevAutomata[i] = new CompiledAutomaton(a, true, false);
}
// first segment computes the automata, and we share with subsequent segments via this Attribute:
dfaAtt.setAutomata(prevAutomata);
}
this.automata = prevAutomata;
bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
bottomTerm = maxBoostAtt.getCompetitiveTerm();
bottomChanged(null, true);
bottomChanged(null);
}
/**
* return an automata-based enum for matching up to editDistance from
* lastTerm, if possible
*/
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
throws IOException {
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
if (editDistance < runAutomata.size()) {
//System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
final CompiledAutomaton compiled = runAutomata.get(editDistance);
return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())),
runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException {
assert editDistance < automata.length;
final CompiledAutomaton compiled = automata[editDistance];
BytesRef initialSeekTerm;
if (lastTerm == null) {
// This is the first enum we are pulling:
initialSeekTerm = null;
} else {
return null;
// We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2):
initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder());
}
return terms.intersect(compiled, initialSeekTerm);
}
/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
//System.out.println("cached automata size: " + runAutomata.size());
if (runAutomata.size() <= maxDistance &&
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
LevenshteinAutomata builder =
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
for (int i = runAutomata.size(); i <= maxDistance; i++) {
Automaton a = builder.toAutomaton(i, prefix);
//System.out.println("compute automaton n=" + i);
runAutomata.add(new CompiledAutomaton(a, true, false));
}
}
return runAutomata;
}
/** swap in a new actual enum to proxy to */
protected void setEnum(TermsEnum actualEnum) {
this.actualEnum = actualEnum;
this.actualBoostAtt = actualEnum.attributes().addAttribute(BoostAttribute.class);
}
/**
* fired when the max non-competitive boost has changed. this is the hook to
* swap in a smarter actualEnum
* swap in a smarter actualEnum.
*/
private void bottomChanged(BytesRef lastTerm, boolean init)
throws IOException {
private void bottomChanged(BytesRef lastTerm) throws IOException {
int oldMaxEdits = maxEdits;
// true if the last term encountered is lexicographically equal or after the bottom term in the PQ
@ -193,49 +181,73 @@ public class FuzzyTermsEnum extends TermsEnum {
// as long as the max non-competitive boost is >= the max boost
// for some edit distance, keep dropping the max edit distance.
while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
while (maxEdits > 0) {
float maxBoost = 1.0f - ((float) maxEdits / (float) termLength);
if (bottom < maxBoost || (bottom == maxBoost && termAfter == false)) {
break;
}
maxEdits--;
}
// TODO: this opto could be improved, e.g. if the worst term in the queue is zzzz with ed=2, then, really, on the next segment, we
// should only be looking for ed=1 terms up until zzzz, then ed=2. Tricky :)
if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
maxEditDistanceChanged(lastTerm, maxEdits, init);
if (oldMaxEdits != maxEdits || lastTerm == null) {
// This is a very powerful optimization: the maximum edit distance has changed. This happens because we collect only the top scoring
// N (= 50, by default) terms, and if e.g. maxEdits=2, and the queue is now full of matching terms, and we notice that the worst entry
// in that queue is ed=1, then we can switch the automata here to ed=1 which is a big speedup.
actualEnum = getAutomatonEnum(maxEdits, lastTerm);
}
}
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
throws IOException {
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
// instead of assert, we do a hard check in case someone uses our enum directly
// assert newEnum != null;
if (newEnum == null) {
assert maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
throw new IllegalArgumentException("maxEdits cannot be > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE");
}
setEnum(newEnum);
}
// for some raw min similarity and input term length, the maximum # of edits
private int initialMaxDistance(float minimumSimilarity, int termLen) {
return (int) ((1D-minimumSimilarity) * termLen);
}
// for some number of edits, the maximum possible scaled boost
private float calculateMaxBoost(int nEdits) {
final float similarity = 1.0f - ((float) nEdits / (float) (termLength));
return (similarity - minSimilarity) * scale_factor;
}
private BytesRef queuedBottom = null;
@Override
public BytesRef next() throws IOException {
if (queuedBottom != null) {
bottomChanged(queuedBottom, false);
bottomChanged(queuedBottom);
queuedBottom = null;
}
BytesRef term = actualEnum.next();
boostAtt.setBoost(actualBoostAtt.getBoost());
BytesRef term;
// while loop because we skip short terms even if they are within the specified edit distance (see the NOTE in FuzzyQuery class javadocs)
while (true) {
term = actualEnum.next();
if (term == null) {
// end
break;
}
int ed = maxEdits;
// we know the outer DFA always matches.
// now compute exact edit distance
while (ed > 0) {
if (matches(term, ed - 1)) {
ed--;
} else {
break;
}
}
if (ed == 0) { // exact match
boostAtt.setBoost(1.0F);
break;
} else {
final int codePointCount = UnicodeUtil.codePointCount(term);
int minTermLength = Math.min(codePointCount, termLength);
// only accept a matching term if it's longer than the edit distance:
if (minTermLength > ed) {
float similarity = 1.0f - (float) ed / (float) minTermLength;
boostAtt.setBoost(similarity);
break;
}
}
}
final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
if (term != null && (bottom != this.bottom || bottomTerm != this.bottomTerm)) {
@ -243,11 +255,18 @@ public class FuzzyTermsEnum extends TermsEnum {
this.bottomTerm = bottomTerm;
// clone the term before potentially doing something with it
// this is a rare but wonderful occurrence anyway
// We must delay bottomChanged until the next next() call otherwise we mess up docFreq(), etc., for the current term:
queuedBottom = BytesRef.deepCopyOf(term);
}
return term;
}
/** returns true if term is within k edits of the query term */
private boolean matches(BytesRef termIn, int k) {
return k == 0 ? termIn.equals(term.bytes()) : automata[k].runAutomaton.run(termIn.bytes, termIn.offset, termIn.length);
}
// proxy all other enum calls to the actual enum
@Override
@ -300,109 +319,43 @@ public class FuzzyTermsEnum extends TermsEnum {
return actualEnum.term();
}
/**
* Implement fuzzy enumeration with Terms.intersect.
* <p>
* This is the fastest method as opposed to LinearFuzzyTermsEnum:
* as enumeration is logarithmic to the number of terms (instead of linear)
* and comparison is linear to length of the term (rather than quadratic)
*/
private class AutomatonFuzzyTermsEnum extends FilteredTermsEnum {
private final ByteRunAutomaton matchers[];
private final BytesRef termRef;
private final BoostAttribute boostAtt =
attributes().addAttribute(BoostAttribute.class);
public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) {
super(tenum, false);
this.matchers = new ByteRunAutomaton[compiled.length];
for (int i = 0; i < compiled.length; i++)
this.matchers[i] = compiled[i].runAutomaton;
termRef = new BytesRef(term.text());
}
/** finds the smallest Lev(n) DFA that accepts the term. */
@Override
protected AcceptStatus accept(BytesRef term) {
//System.out.println("AFTE.accept term=" + term);
int ed = matchers.length - 1;
// we are wrapping either an intersect() TermsEnum or an AutomatonTermsENum,
// so we know the outer DFA always matches.
// now compute exact edit distance
while (ed > 0) {
if (matches(term, ed - 1)) {
ed--;
} else {
break;
}
}
//System.out.println("CHECK term=" + term.utf8ToString() + " ed=" + ed);
// scale to a boost and return (if similarity > minSimilarity)
if (ed == 0) { // exact match
boostAtt.setBoost(1.0F);
//System.out.println(" yes");
return AcceptStatus.YES;
} else {
final int codePointCount = UnicodeUtil.codePointCount(term);
final float similarity = 1.0f - ((float) ed / (float)
(Math.min(codePointCount, termLength)));
if (similarity > minSimilarity) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
//System.out.println(" yes");
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
}
}
/** returns true if term is within k edits of the query term */
final boolean matches(BytesRef term, int k) {
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
}
}
/** @lucene.internal */
public float getMinSimilarity() {
return minSimilarity;
}
/** @lucene.internal */
public float getScaleFactor() {
return scale_factor;
}
/**
* reuses compiled automata across different segments,
* because they are independent of the index
* @lucene.internal */
public static interface LevenshteinAutomataAttribute extends Attribute {
public List<CompiledAutomaton> automata();
public CompiledAutomaton[] automata();
public void setAutomata(CompiledAutomaton[] automata);
}
/**
* Stores compiled automata as a list (indexed by edit distance)
* @lucene.internal */
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
private final List<CompiledAutomaton> automata = new ArrayList<>();
private CompiledAutomaton[] automata;
@Override
public List<CompiledAutomaton> automata() {
public CompiledAutomaton[] automata() {
return automata;
}
@Override
public void setAutomata(CompiledAutomaton[] automata) {
this.automata = automata;
}
@Override
public void clear() {
automata.clear();
automata = null;
}
@Override
public int hashCode() {
return automata.hashCode();
if (automata == null) {
return 0;
} else {
return automata.hashCode();
}
}
@Override
@ -411,15 +364,17 @@ public class FuzzyTermsEnum extends TermsEnum {
return true;
if (!(other instanceof LevenshteinAutomataAttributeImpl))
return false;
return automata.equals(((LevenshteinAutomataAttributeImpl) other).automata);
return Arrays.equals(automata, ((LevenshteinAutomataAttributeImpl) other).automata);
}
@Override
public void copyTo(AttributeImpl target) {
final List<CompiledAutomaton> targetAutomata =
((LevenshteinAutomataAttribute) target).automata();
targetAutomata.clear();
targetAutomata.addAll(automata);
public void copyTo(AttributeImpl _target) {
LevenshteinAutomataAttribute target = (LevenshteinAutomataAttribute) _target;
if (automata == null) {
target.setAutomata(null);
} else {
target.setAutomata(automata);
}
}
@Override

View File

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.store.BaseDirectoryWrapper;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Monster;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TimeUnits;
@SuppressCodecs({"SimpleText", "Memory", "Direct"})
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
@Monster("Takes ~30min")
@SuppressSysoutChecks(bugUrl = "Stuff gets printed")
public class Test2BDocs extends LuceneTestCase {
// indexes Integer.MAX_VALUE docs with indexed field(s)
public void test2BDocs() throws Exception {
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BDocs"));
if (dir instanceof MockDirectoryWrapper) {
((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
}
IndexWriter w = new IndexWriter(dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
.setRAMBufferSizeMB(256.0)
.setMergeScheduler(new ConcurrentMergeScheduler())
.setMergePolicy(newLogMergePolicy(false, 10))
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
.setCodec(TestUtil.getDefaultCodec()));
Document doc = new Document();
Field field = new Field("f1", "a", StringField.TYPE_NOT_STORED);
doc.add(field);
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
w.addDocument(doc);
if (i % (10*1000*1000) == 0) {
System.out.println("indexed: " + i);
System.out.flush();
}
}
w.forceMerge(1);
w.close();
System.out.println("verifying...");
System.out.flush();
DirectoryReader r = DirectoryReader.open(dir);
BytesRef term = new BytesRef(1);
term.bytes[0] = (byte)'a';
term.length = 1;
long skips = 0;
Random rnd = random();
long start = System.nanoTime();
for (LeafReaderContext context : r.leaves()) {
LeafReader reader = context.reader();
int lim = context.reader().maxDoc();
Terms terms = reader.fields().terms("f1");
for (int i=0; i<10000; i++) {
TermsEnum te = terms.iterator();
assertTrue( te.seekExact(term) );
PostingsEnum docs = te.postings(null);
// skip randomly through the term
for (int target = -1;;)
{
int maxSkipSize = lim - target + 1;
// do a smaller skip half of the time
if (rnd.nextBoolean()) {
maxSkipSize = Math.min(256, maxSkipSize);
}
int newTarget = target + rnd.nextInt(maxSkipSize) + 1;
if (newTarget >= lim) {
if (target+1 >= lim) break; // we already skipped to end, so break.
newTarget = lim-1; // skip to end
}
target = newTarget;
int res = docs.advance(target);
if (res == PostingsEnum.NO_MORE_DOCS) break;
assertTrue( res >= target );
skips++;
target = res;
}
}
}
r.close();
dir.close();
long end = System.nanoTime();
System.out.println("Skip count=" + skips + " seconds=" + TimeUnit.NANOSECONDS.toSeconds(end-start));
assert skips > 0;
}
}

View File

@ -18,11 +18,13 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -36,7 +38,6 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
@ -66,19 +67,38 @@ public class TestBoolean2 extends LuceneTestCase {
private static Directory dir2;
private static int mulFactor;
private static Directory copyOf(Directory dir) throws IOException {
Directory copy = newFSDirectory(createTempDir());
for(String name : dir.listAll()) {
if (name.startsWith("extra")) {
continue;
}
copy.copyFrom(dir, name, name, IOContext.DEFAULT);
copy.sync(Collections.singleton(name));
}
return copy;
}
@BeforeClass
public static void beforeClass() throws Exception {
// in some runs, test immediate adjacency of matches - in others, force a full bucket gap between docs
NUM_FILLER_DOCS = random().nextBoolean() ? 0 : BooleanScorer.SIZE;
PRE_FILLER_DOCS = TestUtil.nextInt(random(), 0, (NUM_FILLER_DOCS / 2));
if (VERBOSE) {
System.out.println("TEST: NUM_FILLER_DOCS=" + NUM_FILLER_DOCS + " PRE_FILLER_DOCS=" + PRE_FILLER_DOCS);
}
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
directory = newFSDirectory(createTempDir());
} else {
directory = newDirectory();
}
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene62"));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, iwc);
// we'll make a ton of docs, disable store/norms/vectors
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
@ -118,8 +138,10 @@ public class TestBoolean2 extends LuceneTestCase {
singleSegmentDirectory.sync(Collections.singleton(fileName));
}
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// we need docID order to be preserved:
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene62"));
iwc.setMergePolicy(newLogMergePolicy());
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
w.forceMerge(1, true);
@ -129,7 +151,7 @@ public class TestBoolean2 extends LuceneTestCase {
singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true));
// Make big index
dir2 = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(directory));
dir2 = copyOf(directory);
// First multiply small test index:
mulFactor = 1;
@ -141,9 +163,14 @@ public class TestBoolean2 extends LuceneTestCase {
if (VERBOSE) {
System.out.println("\nTEST: cycle...");
}
final Directory copy = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir2));
RandomIndexWriter w = new RandomIndexWriter(random(), dir2);
final Directory copy = copyOf(dir2);
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
// randomized codecs are sometimes too costly for this test:
iwc.setCodec(Codec.forName("Lucene62"));
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
w.addIndexes(copy);
copy.close();
docCount = w.maxDoc();
w.close();
mulFactor *= 2;

View File

@ -18,13 +18,19 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
@ -32,7 +38,10 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
/**
@ -489,4 +498,210 @@ public class TestFuzzyQuery extends LuceneTestCase {
doc.add(newTextField("field", text, Field.Store.YES));
writer.addDocument(doc);
}
private String randomSimpleString(int digits) {
int termLength = TestUtil.nextInt(random(), 1, 8);
char[] chars = new char[termLength];
for(int i=0;i<termLength;i++) {
chars[i] = (char) ('a' + random().nextInt(digits));
}
return new String(chars);
}
@SuppressWarnings({"unchecked","rawtypes"})
public void testRandom() throws Exception {
int numTerms = atLeast(100);
int digits = TestUtil.nextInt(random(), 2, 3);
Set<String> terms = new HashSet<>();
while (terms.size() < numTerms) {
terms.add(randomSimpleString(digits));
}
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
for(String term : terms) {
Document doc = new Document();
doc.add(new StringField("field", term, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader r = w.getReader();
//System.out.println("TEST: reader=" + r);
IndexSearcher s = newSearcher(r);
int iters = atLeast(1000);
for(int iter=0;iter<iters;iter++) {
String queryTerm = randomSimpleString(digits);
int prefixLength = random().nextInt(queryTerm.length());
String queryPrefix = queryTerm.substring(0, prefixLength);
// we don't look at scores here:
List<TermAndScore>[] expected = new List[3];
for(int ed=0;ed<3;ed++) {
expected[ed] = new ArrayList<TermAndScore>();
}
for(String term : terms) {
if (term.startsWith(queryPrefix) == false) {
continue;
}
int ed = getDistance(term, queryTerm);
if (Math.min(queryTerm.length(), term.length()) > ed) {
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
while (ed < 3) {
expected[ed].add(new TermAndScore(term, score));
ed++;
}
}
}
for(int ed=0;ed<3;ed++) {
Collections.sort(expected[ed]);
int queueSize = TestUtil.nextInt(random(), 1, terms.size());
/*
System.out.println("\nTEST: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength);
for(TermAndScore ent : expected[ed]) {
System.out.println(" " + ent);
}
*/
FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, queueSize, true);
TopDocs hits = s.search(query, terms.size());
Set<String> actual = new HashSet<>();
for(ScoreDoc hit : hits.scoreDocs) {
Document doc = s.doc(hit.doc);
actual.add(doc.get("field"));
//System.out.println(" actual: " + doc.get("field") + " score=" + hit.score);
}
Set<String> expectedTop = new HashSet<>();
int limit = Math.min(queueSize, expected[ed].size());
for(int i=0;i<limit;i++) {
expectedTop.add(expected[ed].get(i).term);
}
if (actual.equals(expectedTop) == false) {
StringBuilder sb = new StringBuilder();
sb.append("FAILED: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength + "\n");
boolean first = true;
for(String term : actual) {
if (expectedTop.contains(term) == false) {
if (first) {
sb.append(" these matched but shouldn't:\n");
first = false;
}
sb.append(" " + term + "\n");
}
}
first = true;
for(String term : expectedTop) {
if (actual.contains(term) == false) {
if (first) {
sb.append(" these did not match but should:\n");
first = false;
}
sb.append(" " + term + "\n");
}
}
throw new AssertionError(sb.toString());
}
}
}
IOUtils.close(r, w, dir);
}
private static class TermAndScore implements Comparable<TermAndScore> {
final String term;
final float score;
public TermAndScore(String term, float score) {
this.term = term;
this.score = score;
}
@Override
public int compareTo(TermAndScore other) {
// higher score sorts first, and if scores are tied, lower term sorts first
if (score > other.score) {
return -1;
} else if (score < other.score) {
return 1;
} else {
return term.compareTo(other.term);
}
}
@Override
public String toString() {
return term + " score=" + score;
}
}
// Poached from LuceneLevenshteinDistance.java (from suggest module): it supports transpositions (treats them as ed=1, not ed=2)
private static int getDistance(String target, String other) {
IntsRef targetPoints;
IntsRef otherPoints;
int n;
int d[][]; // cost array
// NOTE: if we cared, we could 3*m space instead of m*n space, similar to
// what LevenshteinDistance does, except cycling thru a ring of three
// horizontal cost arrays... but this comparator is never actually used by
// DirectSpellChecker, it's only used for merging results from multiple shards
// in "distributed spellcheck", and it's inefficient in other ways too...
// cheaper to do this up front once
targetPoints = toIntsRef(target);
otherPoints = toIntsRef(other);
n = targetPoints.length;
final int m = otherPoints.length;
d = new int[n+1][m+1];
if (n == 0 || m == 0) {
if (n == m) {
return 0;
}
else {
return Math.max(n, m);
}
}
// indexes into strings s and t
int i; // iterates through s
int j; // iterates through t
int t_j; // jth character of t
int cost; // cost
for (i = 0; i<=n; i++) {
d[i][0] = i;
}
for (j = 0; j<=m; j++) {
d[0][j] = j;
}
for (j = 1; j<=m; j++) {
t_j = otherPoints.ints[j-1];
for (i=1; i<=n; i++) {
cost = targetPoints.ints[i-1]==t_j ? 0 : 1;
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost
d[i][j] = Math.min(Math.min(d[i-1][j]+1, d[i][j-1]+1), d[i-1][j-1]+cost);
// transposition
if (i > 1 && j > 1 && targetPoints.ints[i-1] == otherPoints.ints[j-2] && targetPoints.ints[i-2] == otherPoints.ints[j-1]) {
d[i][j] = Math.min(d[i][j], d[i-2][j-2] + cost);
}
}
}
return d[n][m];
}
private static IntsRef toIntsRef(String s) {
IntsRef ref = new IntsRef(s.length()); // worst case
int utf16Len = s.length();
for (int i = 0, cp = 0; i < utf16Len; i += Character.charCount(cp)) {
cp = ref.ints[ref.length++] = Character.codePointAt(s, i);
}
return ref;
}
}

View File

@ -539,7 +539,9 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
public void testConcurrentIndexCloseSearchAndRefresh() throws Exception {
final Directory dir = newFSDirectory(createTempDir());
AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
final MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(IndexWriter.MAX_TERM_LENGTH);
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
AtomicReference<SearcherManager> mgrRef = new AtomicReference<>();
mgrRef.set(new SearcherManager(writerRef.get(), null));
@ -561,7 +563,7 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
} else {
w.rollback();
}
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
}
}
docs.close();

View File

@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 1) {
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
}
else {
} else {
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
// this query is TermContext sensitive.
extractWeightedTerms(terms, query, boost);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), boost, terms);
for (Query clause : ((DisjunctionMaxQuery) query)) {
extract(clause, boost, terms);
}
} else if (query instanceof ToParentBlockJoinQuery) {
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
++distinctPositions;
}
for (int j = 0; j < termArray.length; ++j) {
disjuncts.add(new SpanTermQuery(termArray[j]));
for (Term aTermArray : termArray) {
disjuncts.add(new SpanTermQuery(aTermArray));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<SpanQuery> disjuncts = disjunctLists[i];
for (List<SpanQuery> disjuncts : disjunctLists) {
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()]));
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
if (clauses.length == 1) {
extractWeightedSpanTerms(terms, clauses[0], boost);
} else {
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
extractWeightedSpanTerms(terms, sp, boost);
}
}
} else if (query instanceof MatchAllDocsQuery) {
//nothing

View File

@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
import org.junit.Test;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
helper.start();
}
@Test
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
final String fieldName = "substring";
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
.add(new Term(fieldName, "uchu")).build();
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
}
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
throws IOException, InvalidTokenOffsetsException {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new NGramTokenizer(4, 4));
}
};
final String fieldName = "substring";
final List<BytesRef> list = new ArrayList<>();
list.add(new BytesRef("uchu"));
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
highlighter.setTextFragmenter(new SimpleFragmenter(100));
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
assertEquals("B<b>uchu</b>ng",fragment);
final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
assertEquals(expected, fragment);
}
public void testUnRewrittenQuery() throws Exception {

View File

@ -21,8 +21,8 @@ import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
import org.apache.lucene.sandbox.queries.SlowFuzzyQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@ -33,7 +33,7 @@ import org.w3c.dom.NodeList;
public class FuzzyLikeThisQueryBuilder implements QueryBuilder {
private static final int DEFAULT_MAX_NUM_TERMS = 50;
private static final float DEFAULT_MIN_SIMILARITY = SlowFuzzyQuery.defaultMinSimilarity;
private static final float DEFAULT_MIN_SIMILARITY = FuzzyQuery.defaultMinSimilarity;
private static final int DEFAULT_PREFIX_LENGTH = 1;
private static final boolean DEFAULT_IGNORE_TF = false;

View File

@ -38,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyTermsEnum;
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@ -46,6 +47,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
/**
* Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms.
@ -64,62 +66,62 @@ import org.apache.lucene.util.PriorityQueue;
*/
public class FuzzyLikeThisQuery extends Query
{
// TODO: generalize this query (at least it should not reuse this static sim!
// a better way might be to convert this into multitermquery rewrite methods.
// the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq)
// provided to TermQuery, so that the general idea is agnostic to any scoring system...
static TFIDFSimilarity sim=new ClassicSimilarity();
ArrayList<FieldVals> fieldVals=new ArrayList<>();
Analyzer analyzer;
// TODO: generalize this query (at least it should not reuse this static sim!
// a better way might be to convert this into multitermquery rewrite methods.
// the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq)
// provided to TermQuery, so that the general idea is agnostic to any scoring system...
static TFIDFSimilarity sim=new ClassicSimilarity();
ArrayList<FieldVals> fieldVals=new ArrayList<>();
Analyzer analyzer;
int MAX_VARIANTS_PER_TERM=50;
boolean ignoreTF=false;
private int maxNumTerms;
int MAX_VARIANTS_PER_TERM=50;
boolean ignoreTF=false;
private int maxNumTerms;
@Override
public int hashCode() {
int prime = 31;
int result = classHash();
result = prime * result + Objects.hashCode(analyzer);
result = prime * result + Objects.hashCode(fieldVals);
result = prime * result + (ignoreTF ? 1231 : 1237);
result = prime * result + maxNumTerms;
return result;
}
@Override
public int hashCode() {
int prime = 31;
int result = classHash();
result = prime * result + Objects.hashCode(analyzer);
result = prime * result + Objects.hashCode(fieldVals);
result = prime * result + (ignoreTF ? 1231 : 1237);
result = prime * result + maxNumTerms;
return result;
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) &&
equalsTo(getClass().cast(other));
}
private boolean equalsTo(FuzzyLikeThisQuery other) {
return Objects.equals(analyzer, other.analyzer) &&
Objects.equals(fieldVals, other.fieldVals) &&
ignoreTF == other.ignoreTF &&
maxNumTerms == other.maxNumTerms;
}
private boolean equalsTo(FuzzyLikeThisQuery other) {
return Objects.equals(analyzer, other.analyzer) &&
Objects.equals(fieldVals, other.fieldVals) &&
ignoreTF == other.ignoreTF &&
maxNumTerms == other.maxNumTerms;
}
/**
*
* @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery
*/
public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer)
{
this.analyzer=analyzer;
this.maxNumTerms = maxNumTerms;
}
/**
*
* @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery
*/
public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer)
{
this.analyzer=analyzer;
this.maxNumTerms = maxNumTerms;
}
class FieldVals
{
String queryString;
String fieldName;
float minSimilarity;
int prefixLength;
public FieldVals(String name, float similarity, int length, String queryString)
class FieldVals
{
String queryString;
String fieldName;
int maxEdits;
int prefixLength;
public FieldVals(String name, int maxEdits, int length, String queryString)
{
fieldName = name;
minSimilarity = similarity;
this.maxEdits = maxEdits;
prefixLength = length;
this.queryString = queryString;
}
@ -129,11 +131,11 @@ public class FuzzyLikeThisQuery extends Query
final int prime = 31;
int result = 1;
result = prime * result
+ ((fieldName == null) ? 0 : fieldName.hashCode());
result = prime * result + Float.floatToIntBits(minSimilarity);
+ ((fieldName == null) ? 0 : fieldName.hashCode());
result = prime * result + maxEdits;
result = prime * result + prefixLength;
result = prime * result
+ ((queryString == null) ? 0 : queryString.hashCode());
+ ((queryString == null) ? 0 : queryString.hashCode());
return result;
}
@ -151,9 +153,9 @@ public class FuzzyLikeThisQuery extends Query
return false;
} else if (!fieldName.equals(other.fieldName))
return false;
if (Float.floatToIntBits(minSimilarity) != Float
.floatToIntBits(other.minSimilarity))
if (maxEdits != other.maxEdits) {
return false;
}
if (prefixLength != other.prefixLength)
return false;
if (queryString == null) {
@ -166,18 +168,22 @@ public class FuzzyLikeThisQuery extends Query
}
}
/**
* Adds user input for "fuzzification"
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
* @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum)
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
*/
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
{
fieldVals.add(new FieldVals(fieldName,minSimilarity,prefixLength,queryString));
/**
* Adds user input for "fuzzification"
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
* @param minSimilarity The minimum similarity of the term variants; must be 0, 1 or 2 (see FuzzyTermsEnum)
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
*/
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
{
int maxEdits = (int) minSimilarity;
if (maxEdits != minSimilarity || maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
throw new IllegalArgumentException("minSimilarity must integer value between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got " + minSimilarity);
}
fieldVals.add(new FieldVals(fieldName,maxEdits,prefixLength,queryString));
}
private void addTerms(IndexReader reader, FieldVals f, ScoreTermQueue q) throws IOException {
@ -202,7 +208,7 @@ public class FuzzyLikeThisQuery extends Query
AttributeSource atts = new AttributeSource();
MaxNonCompetitiveBoostAttribute maxBoostAtt =
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
FuzzyTermsEnum fe = new FuzzyTermsEnum(terms, atts, startTerm, f.maxEdits, f.prefixLength, true);
//store the df so all variants use same idf
int df = reader.docFreq(startTerm);
int numVariants = 0;
@ -225,9 +231,9 @@ public class FuzzyLikeThisQuery extends Query
if (numVariants > 0) {
int avgDf = totalVariantDocFreqs / numVariants;
if (df == 0)//no direct match we can use as df for all variants
{
df = avgDf; //use avg df of all variants
}
{
df = avgDf; //use avg df of all variants
}
// take the top variants (scored by edit distance) and reset the score
// to include an IDF factor then add to the global queue for ranking
@ -267,105 +273,105 @@ public class FuzzyLikeThisQuery extends Query
}
@Override
public Query rewrite(IndexReader reader) throws IOException
{
ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
//load up the list of possible terms
for (FieldVals f : fieldVals) {
addTerms(reader, f, q);
}
BooleanQuery.Builder bq = new BooleanQuery.Builder();
//create BooleanQueries to hold the variants for each token/field pair and ensure it
// has no coord factor
//Step 1: sort the termqueries by term/field
HashMap<Term,ArrayList<ScoreTerm>> variantQueries=new HashMap<>();
int size = q.size();
for(int i = 0; i < size; i++)
{
ScoreTerm st = q.pop();
ArrayList<ScoreTerm> l= variantQueries.get(st.fuzziedSourceTerm);
if(l==null)
{
l=new ArrayList<>();
variantQueries.put(st.fuzziedSourceTerm,l);
}
l.add(st);
}
//Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext();)
{
ArrayList<ScoreTerm> variants = iter.next();
if(variants.size()==1)
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
Query tq = newTermQuery(reader, st.term);
// set the boost to a mix of IDF and score
bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
}
else
{
BooleanQuery.Builder termVariants=new BooleanQuery.Builder();
for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2
.hasNext();)
{
ScoreTerm st = iterator2.next();
// found a match
Query tq = newTermQuery(reader, st.term);
// set the boost using the ScoreTerm's score
termVariants.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
}
bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
}
}
//TODO possible alternative step 3 - organize above booleans into a new layer of field-based
// booleans with a minimum-should-match of NumFields-1?
return bq.build();
public Query rewrite(IndexReader reader) throws IOException
{
ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
//load up the list of possible terms
for (FieldVals f : fieldVals) {
addTerms(reader, f, q);
}
//Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
// term variants) then is reset with IDF for use in ranking against all other
// terms/fields
private static class ScoreTerm{
public Term term;
public float score;
Term fuzziedSourceTerm;
public ScoreTerm(Term term, float score, Term fuzziedSourceTerm){
this.term = term;
this.score = score;
this.fuzziedSourceTerm=fuzziedSourceTerm;
}
BooleanQuery.Builder bq = new BooleanQuery.Builder();
//create BooleanQueries to hold the variants for each token/field pair and ensure it
// has no coord factor
//Step 1: sort the termqueries by term/field
HashMap<Term,ArrayList<ScoreTerm>> variantQueries=new HashMap<>();
int size = q.size();
for(int i = 0; i < size; i++)
{
ScoreTerm st = q.pop();
ArrayList<ScoreTerm> l= variantQueries.get(st.fuzziedSourceTerm);
if(l==null)
{
l=new ArrayList<>();
variantQueries.put(st.fuzziedSourceTerm,l);
}
l.add(st);
}
private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
public ScoreTermQueue(int size){
super(size);
}
//Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext();)
{
ArrayList<ScoreTerm> variants = iter.next();
if(variants.size()==1)
{
//optimize where only one selected variant
ScoreTerm st= variants.get(0);
Query tq = newTermQuery(reader, st.term);
// set the boost to a mix of IDF and score
bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
}
else
{
BooleanQuery.Builder termVariants=new BooleanQuery.Builder();
for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2
.hasNext();)
{
ScoreTerm st = iterator2.next();
// found a match
Query tq = newTermQuery(reader, st.term);
// set the boost using the ScoreTerm's score
termVariants.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
}
bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
}
}
//TODO possible alternative step 3 - organize above booleans into a new layer of field-based
// booleans with a minimum-should-match of NumFields-1?
return bq.build();
}
//Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
// term variants) then is reset with IDF for use in ranking against all other
// terms/fields
private static class ScoreTerm{
public Term term;
public float score;
Term fuzziedSourceTerm;
/* (non-Javadoc)
* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
*/
@Override
protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
if (termA.score== termB.score)
return termA.term.compareTo(termB.term) > 0;
else
return termA.score < termB.score;
}
}
public ScoreTerm(Term term, float score, Term fuzziedSourceTerm){
this.term = term;
this.score = score;
this.fuzziedSourceTerm=fuzziedSourceTerm;
}
}
private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
public ScoreTermQueue(int size){
super(size);
}
/* (non-Javadoc)
* @see org.apache.lucene.search.Query#toString(java.lang.String)
* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
*/
@Override
public String toString(String field)
{
return null;
protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
if (termA.score== termB.score)
return termA.term.compareTo(termB.term) > 0;
else
return termA.score < termB.score;
}
}
/* (non-Javadoc)
* @see org.apache.lucene.search.Query#toString(java.lang.String)
*/
@Override
public String toString(String field)
{
return null;
}
public boolean isIgnoreTF()

View File

@ -1,201 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.queries;
import java.io.IOException;
import org.apache.lucene.index.SingleTermsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanQuery; // javadocs
import org.apache.lucene.search.FuzzyQuery; // javadocs
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
/** Implements the classic fuzzy search query. The similarity measurement
* is based on the Levenshtein (edit distance) algorithm.
* <p>
* Note that, unlike {@link FuzzyQuery}, this query will silently allow
* for a (possibly huge) number of edit distances in comparisons, and may
* be extremely slow (comparing every term in the index).
*
* @deprecated Use {@link FuzzyQuery} instead.
*/
@Deprecated
public class SlowFuzzyQuery extends MultiTermQuery {
public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
public final static int defaultPrefixLength = 0;
public final static int defaultMaxExpansions = 50;
private float minimumSimilarity;
private int prefixLength;
private boolean termLongEnough = false;
protected Term term;
/**
* Create a new SlowFuzzyQuery that will match terms with a similarity
* of at least <code>minimumSimilarity</code> to <code>term</code>.
* If a <code>prefixLength</code> &gt; 0 is specified, a common prefix
* of that length is also required.
*
* @param term the term to search for
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
* between the query term and the matching terms. For example, for a
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
* as the query term is considered similar to the query term if the edit distance
* between both terms is less than <code>length(term)*0.5</code>
* <p>
* Alternatively, if <code>minimumSimilarity</code> is &gt;= 1f, it is interpreted
* as a pure Levenshtein edit distance. For example, a value of <code>2f</code>
* will match all terms within an edit distance of <code>2</code> from the
* query term. Edit distances specified in this way may not be fractional.
*
* @param prefixLength length of common (non-fuzzy) prefix
* @param maxExpansions the maximum number of terms to match. If this number is
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
* then the maxClauseCount will be used instead.
* @throws IllegalArgumentException if minimumSimilarity is &gt;= 1 or &lt; 0
* or if prefixLength &lt; 0
*/
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
int maxExpansions) {
super(term.field());
this.term = term;
if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity)
throw new IllegalArgumentException("fractional edit distances are not allowed");
if (minimumSimilarity < 0.0f)
throw new IllegalArgumentException("minimumSimilarity < 0");
if (prefixLength < 0)
throw new IllegalArgumentException("prefixLength < 0");
if (maxExpansions < 0)
throw new IllegalArgumentException("maxExpansions < 0");
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
String text = term.text();
int len = text.codePointCount(0, text.length());
if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) {
this.termLongEnough = true;
}
this.minimumSimilarity = minimumSimilarity;
this.prefixLength = prefixLength;
}
/**
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)}.
*/
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
}
/**
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)}.
*/
public SlowFuzzyQuery(Term term, float minimumSimilarity) {
this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
}
/**
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)}.
*/
public SlowFuzzyQuery(Term term) {
this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);
}
/**
* Returns the minimum similarity that is required for this query to match.
* @return float value between 0.0 and 1.0
*/
public float getMinSimilarity() {
return minimumSimilarity;
}
/**
* Returns the non-fuzzy prefix length. This is the number of characters at the start
* of a term that must be identical (not fuzzy) to the query term if the query
* is to match that term.
*/
public int getPrefixLength() {
return prefixLength;
}
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
if (!termLongEnough) { // can only match if it's exact
return new SingleTermsEnum(terms.iterator(), term.bytes());
}
return new SlowFuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
}
/**
* Returns the pattern term.
*/
public Term getTerm() {
return term;
}
@Override
public String toString(String field) {
final StringBuilder buffer = new StringBuilder();
if (!term.field().equals(field)) {
buffer.append(term.field());
buffer.append(":");
}
buffer.append(term.text());
buffer.append('~');
buffer.append(Float.toString(minimumSimilarity));
return buffer.toString();
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + Float.floatToIntBits(minimumSimilarity);
result = prime * result + prefixLength;
result = prime * result + ((term == null) ? 0 : term.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (!super.equals(obj))
return false;
if (getClass() != obj.getClass())
return false;
SlowFuzzyQuery other = (SlowFuzzyQuery) obj;
if (Float.floatToIntBits(minimumSimilarity) != Float
.floatToIntBits(other.minimumSimilarity))
return false;
if (prefixLength != other.prefixLength)
return false;
if (term == null) {
if (other.term != null)
return false;
} else if (!term.equals(other.term))
return false;
return true;
}
}

View File

@ -1,263 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.queries;
import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.FuzzyTermsEnum;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
/** Potentially slow fuzzy TermsEnum for enumerating all terms that are similar
* to the specified filter term.
* <p> If the minSimilarity or maxEdits is greater than the Automaton's
* allowable range, this backs off to the classic (brute force)
* fuzzy terms enum method by calling FuzzyTermsEnum's getAutomatonEnum.
* </p>
* <p>Term enumerations are always ordered by
* {@link BytesRef#compareTo}. Each term in the enumeration is
* greater than all that precede it.</p>
*
* @deprecated Use {@link FuzzyTermsEnum} instead.
*/
@Deprecated
public final class SlowFuzzyTermsEnum extends FuzzyTermsEnum {
public SlowFuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
float minSimilarity, int prefixLength) throws IOException {
super(terms, atts, term, minSimilarity, prefixLength, false);
}
@Override
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
throws IOException {
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
if (newEnum != null) {
setEnum(newEnum);
} else if (init) {
setEnum(new LinearFuzzyTermsEnum());
}
}
/**
* Implement fuzzy enumeration with linear brute force.
*/
private class LinearFuzzyTermsEnum extends FilteredTermsEnum {
/* Allows us save time required to create a new array
* every time similarity is called.
*/
private int[] d;
private int[] p;
// this is the text, minus the prefix
private final int[] text;
private final BoostAttribute boostAtt =
attributes().addAttribute(BoostAttribute.class);
/**
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity &gt;
* <code>minSimilarity</code>.
* <p>
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
*
* @throws IOException If there is a low-level I/O error.
*/
public LinearFuzzyTermsEnum() throws IOException {
super(terms.iterator());
this.text = new int[termLength - realPrefixLength];
System.arraycopy(termText, realPrefixLength, text, 0, text.length);
final String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
prefixBytesRef = new BytesRef(prefix);
this.d = new int[this.text.length + 1];
this.p = new int[this.text.length + 1];
setInitialSeekTerm(prefixBytesRef);
}
private final BytesRef prefixBytesRef;
// used for unicode conversion from BytesRef byte[] to int[]
private final IntsRefBuilder utf32 = new IntsRefBuilder();
/**
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
* calculate the distance between the given term and the comparing term.
* </p>
* <p>If the minSimilarity is &gt;= 1.0, this uses the maxEdits as the comparison.
* Otherwise, this method uses the following logic to calculate similarity.
* <pre>
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
* </pre>
* where distance is the Levenshtein distance for the two words.
* </p>
*
*/
@Override
protected final AcceptStatus accept(BytesRef term) {
if (StringHelper.startsWith(term, prefixBytesRef)) {
utf32.copyUTF8Bytes(term);
final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength);
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
if (distance == Integer.MIN_VALUE){
return AcceptStatus.NO;
}
//no need to calc similarity, if raw is true and distance > maxEdits
if (raw == true && distance > maxEdits){
return AcceptStatus.NO;
}
final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length);
//if raw is true, then distance must also be <= maxEdits by now
//given the previous if statement
if (raw == true ||
(raw == false && similarity > minSimilarity)) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
return AcceptStatus.YES;
} else {
return AcceptStatus.NO;
}
} else {
return AcceptStatus.END;
}
}
/******************************
* Compute Levenshtein distance
******************************/
/**
* <p>calcDistance returns the Levenshtein distance between the query term
* and the target term.</p>
*
* <p>Embedded within this algorithm is a fail-fast Levenshtein distance
* algorithm. The fail-fast algorithm differs from the standard Levenshtein
* distance algorithm in that it is aborted if it is discovered that the
* minimum distance between the words is greater than some threshold.
* <p>Levenshtein distance (also known as edit distance) is a measure of similarity
* between two strings where the distance is measured as the number of character
* deletions, insertions or substitutions required to transform one string to
* the other string.
* @param target the target word or phrase
* @param offset the offset at which to start the comparison
* @param length the length of what's left of the string to compare
* @return the number of edits or Integer.MIN_VALUE if the edit distance is
* greater than maxDistance.
*/
private final int calcDistance(final int[] target, int offset, int length) {
final int m = length;
final int n = text.length;
if (n == 0) {
//we don't have anything to compare. That means if we just add
//the letters for m we get the new word
return m;
}
if (m == 0) {
return n;
}
final int maxDistance = calculateMaxDistance(m);
if (maxDistance < Math.abs(m-n)) {
//just adding the characters of m to n or vice-versa results in
//too many edits
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
//given this optimal circumstance, the edit distance cannot be less than 5.
//which is 8-3 or more precisely Math.abs(3-8).
//if our maximum edit distance is 4, then we can discard this word
//without looking at it.
return Integer.MIN_VALUE;
}
// init matrix d
for (int i = 0; i <=n; ++i) {
p[i] = i;
}
// start computing edit distance
for (int j = 1; j<=m; ++j) { // iterates through target
int bestPossibleEditDistance = m;
final int t_j = target[offset+j-1]; // jth character of t
d[0] = j;
for (int i=1; i<=n; ++i) { // iterates through text
// minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
if (t_j != text[i-1]) {
d[i] = Math.min(Math.min(d[i-1], p[i]), p[i-1]) + 1;
} else {
d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]);
}
bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i]);
}
//After calculating row i, the best possible edit distance
//can be found by found by finding the smallest value in a given column.
//If the bestPossibleEditDistance is greater than the max distance, abort.
if (j > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater
//the closest the target can be to the text is just too far away.
//this target is leaving the party early.
return Integer.MIN_VALUE;
}
// copy current distance counts to 'previous row' distance counts: swap p and d
int _d[] = p;
p = d;
d = _d;
}
// our last action in the above loop was to switch d and p, so p now
// actually has the most recent cost counts
return p[n];
}
private float calcSimilarity(int edits, int m, int n){
// this will return less than 0.0 when the edit distance is
// greater than the number of characters in the shorter word.
// but this was the formula that was previously used in FuzzyTermEnum,
// so it has not been changed (even though minimumSimilarity must be
// greater than 0.0)
return 1.0f - ((float)edits / (float) (realPrefixLength + Math.min(n, m)));
}
/**
* The max Distance is the maximum Levenshtein distance for the text
* compared to some other value that results in score that is
* better than the minimum similarity.
* @param m the length of the "other value"
* @return the maximum levenshtein distance that we care about
*/
private int calculateMaxDistance(int m) {
return raw ? maxEdits : Math.min(maxEdits,
(int)((1-minSimilarity) * (Math.min(text.length, m) + realPrefixLength)));
}
}
}

View File

@ -77,7 +77,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("smith", "name", 0.3f, 1);
flt.addTerms("smith", "name", 2, 1);
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
@ -94,7 +94,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
//Test multiple input words are having variants produced
public void testMultiWord() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
flt.addTerms("jonathin smoth", "name", 2, 1);
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
@ -110,8 +110,8 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
// LUCENE-4809
public void testNonExistingField() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
flt.addTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
flt.addTerms("jonathin smoth", "name", 2, 1);
flt.addTerms("jonathin smoth", "this field does not exist", 2, 1);
// don't fail here just because the field doesn't exits
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
@ -129,7 +129,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
//Test bug found when first query word does not match anything
public void testNoMatchFirstWordBug() throws Throwable {
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
flt.addTerms("fernando smith", "name", 0.3f, 1);
flt.addTerms("fernando smith", "name", 2, 1);
Query q = flt.rewrite(searcher.getIndexReader());
HashSet<Term> queryTerms = new HashSet<>();
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
@ -144,9 +144,9 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
public void testFuzzyLikeThisQueryEquals() {
Analyzer analyzer = new MockAnalyzer(random());
FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
fltq1.addTerms("javi", "subject", 0.5f, 2);
fltq1.addTerms("javi", "subject", 2, 2);
FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
fltq2.addTerms("javi", "subject", 0.5f, 2);
fltq2.addTerms("javi", "subject", 2, 2);
assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
fltq2);
}

View File

@ -1,487 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.queries;
import java.util.List;
import java.util.Arrays;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
/**
* Tests {@link SlowFuzzyQuery}.
*
*/
public class TestSlowFuzzyQuery extends LuceneTestCase {
public void testFuzziness() throws Exception {
//every test with SlowFuzzyQuery.defaultMinSimilarity
//is exercising the Automaton, not the brute force linear method
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("aaaaa", writer);
addDoc("aaaab", writer);
addDoc("aaabb", writer);
addDoc("aabbb", writer);
addDoc("abbbb", writer);
addDoc("bbbbb", writer);
addDoc("ddddd", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
// same with prefix
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(2, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
// test scoring
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("3 documents should match", 3, hits.length);
List<String> order = Arrays.asList("bbbbb","abbbb","aabbb");
for (int i = 0; i < hits.length; i++) {
final String term = searcher.doc(hits[i].doc).get("field");
//System.out.println(hits[i].score);
assertEquals(order.get(i), term);
}
// test pq size by supplying maxExpansions=2
// This query would normally return 3 documents, because 3 terms match (see above):
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals("only 2 documents should match", 2, hits.length);
order = Arrays.asList("bbbbb","abbbb");
for (int i = 0; i < hits.length; i++) {
final String term = searcher.doc(hits[i].doc).get("field");
//System.out.println(hits[i].score);
assertEquals(order.get(i), term);
}
// not similar enough:
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// query identical to a word in the index:
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
// default allows for up to two edits:
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
// query similar to a word in the index:
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
// now with prefix
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(2, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
// now with prefix
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// different field = no match:
query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
reader.close();
directory.close();
}
public void testFuzzinessLong2() throws Exception {
//Lucene-5033
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("abcdef", writer);
addDoc("segment", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
SlowFuzzyQuery query;
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
reader.close();
directory.close();
}
public void testFuzzinessLong() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("aaaaaaa", writer);
addDoc("segment", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
SlowFuzzyQuery query;
// not similar enough:
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
// in testDefaultFuzziness so a bigger difference is allowed:
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
// now with prefix
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// no match, more than half of the characters is wrong:
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// now with prefix
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// "student" and "stellent" are indeed similar to "segment" by default:
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
// now with prefix
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// "student" doesn't match anymore thanks to increased minimum similarity:
query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
expectThrows(IllegalArgumentException.class, () -> {
new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
});
expectThrows(IllegalArgumentException.class, () -> {
new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
});
reader.close();
directory.close();
}
/**
* MultiTermQuery provides (via attribute) information about which values
* must be competitive to enter the priority queue.
*
* SlowFuzzyQuery optimizes itself around this information, if the attribute
* is not implemented correctly, there will be problems!
*/
public void testTieBreaker() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("a123456", writer);
addDoc("c123456", writer);
addDoc("d123456", writer);
addDoc("e123456", writer);
Directory directory2 = newDirectory();
RandomIndexWriter writer2 = new RandomIndexWriter(random(), directory2);
addDoc("a123456", writer2);
addDoc("b123456", writer2);
addDoc("b123456", writer2);
addDoc("b123456", writer2);
addDoc("c123456", writer2);
addDoc("f123456", writer2);
IndexReader ir1 = writer.getReader();
IndexReader ir2 = writer2.getReader();
MultiReader mr = new MultiReader(ir1, ir2);
IndexSearcher searcher = newSearcher(mr);
SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2);
TopDocs docs = searcher.search(fq, 2);
assertEquals(5, docs.totalHits); // 5 docs, from the a and b's
mr.close();
ir1.close();
ir2.close();
writer.close();
writer2.close();
directory.close();
directory2.close();
}
public void testTokenLengthOpt() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("12345678911", writer);
addDoc("segment", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
Query query;
// term not over 10 chars, so optimization shortcuts
query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f);
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// 10 chars, so no optimization
query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
// over 10 chars, so no optimization
query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(1, hits.length);
// over 10 chars, no match
query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
hits = searcher.search(query, 1000).scoreDocs;
assertEquals(0, hits.length);
reader.close();
directory.close();
}
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
public void testBoostOnlyRewrite() throws Exception {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
addDoc("Lucene", writer);
addDoc("Lucene", writer);
addDoc("Lucenne", writer);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
writer.close();
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
assertEquals(3, hits.length);
// normally, 'Lucenne' would be the first result as IDF will skew the score.
assertEquals("Lucene", reader.document(hits[0].doc).get("field"));
assertEquals("Lucene", reader.document(hits[1].doc).get("field"));
assertEquals("Lucenne", reader.document(hits[2].doc).get("field"));
reader.close();
directory.close();
}
public void testGiga() throws Exception {
Directory index = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), index);
addDoc("Lucene in Action", w);
addDoc("Lucene for Dummies", w);
//addDoc("Giga", w);
addDoc("Giga byte", w);
addDoc("ManagingGigabytesManagingGigabyte", w);
addDoc("ManagingGigabytesManagingGigabytes", w);
addDoc("The Art of Computer Science", w);
addDoc("J. K. Rowling", w);
addDoc("JK Rowling", w);
addDoc("Joanne K Roling", w);
addDoc("Bruce Willis", w);
addDoc("Willis bruce", w);
addDoc("Brute willis", w);
addDoc("B. willis", w);
IndexReader r = w.getReader();
w.close();
Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f);
// 3. search
IndexSearcher searcher = newSearcher(r);
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("Giga byte", searcher.doc(hits[0].doc).get("field"));
r.close();
index.close();
}
public void testDistanceAsEditsSearching() throws Exception {
Directory index = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), index);
addDoc("foobar", w);
addDoc("test", w);
addDoc("working", w);
IndexReader reader = w.getReader();
IndexSearcher searcher = newSearcher(reader);
w.close();
SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2);
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "foubara"), 2);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(1, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
hits = searcher.search(q, 10).scoreDocs;
assertEquals(2, hits.length);
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
reader.close();
index.close();
}
private void addDoc(String text, RandomIndexWriter writer) throws IOException {
Document doc = new Document();
doc.add(newTextField("field", text, Field.Store.YES));
writer.addDocument(doc);
}
}

View File

@ -415,40 +415,42 @@ public class DirectSpellChecker {
BoostAttribute boostAtt =
e.attributes().addAttribute(BoostAttribute.class);
while ((candidateTerm = e.next()) != null) {
final float boost = boostAtt.getBoost();
// For FuzzyQuery, boost is the score:
float score = boostAtt.getBoost();
// ignore uncompetitive hits
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
continue;
}
// ignore exact match of the same term
if (queryTerm.bytesEquals(candidateTerm))
if (queryTerm.bytesEquals(candidateTerm)) {
continue;
}
int df = e.docFreq();
// check docFreq if required
if (df <= docfreq)
if (df <= docfreq) {
continue;
}
final float score;
final String termAsString;
if (distance == INTERNAL_LEVENSHTEIN) {
// delay creating strings until the end
termAsString = null;
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
score = boost / e.getScaleFactor() + e.getMinSimilarity();
} else {
spare.copyUTF8Bytes(candidateTerm);
termAsString = spare.toString();
score = distance.getDistance(term.text(), termAsString);
}
if (score < accuracy)
if (score < accuracy) {
continue;
}
// add new entry in PQ
st.term = BytesRef.deepCopyOf(candidateTerm);
st.boost = boost;
st.boost = score;
st.docfreq = df;
st.termAsString = termAsString;
st.score = score;

View File

@ -77,6 +77,15 @@ prefix, then you will now get an error as these options are incompatible with nu
New Features
----------------------
* SOLR-5725: facet.method=enum can bypass exact counts calculation with facet.exists=true, it just returns 1 for
terms which exists in result docset. (Alexey Kozhemiakin, Sebastian Koziel, Radoslaw Zielinski via Mikhail Khludnev)
* SOLR-9127: Excel workbook (.xlsx) response writer. use 'wt=xlsx' (Tony Moriarty, noble)
* SOLR-9469: JettySolrRunner now has the option of restarting using a different
port (Alan Woodward)
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble)
Bug Fixes
----------------------
@ -103,12 +112,19 @@ Bug Fixes
* SOLR-9461: DELETENODE, REPLACENODE should pass down the 'async' param to subcommands (shalin, noble)
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble )
* SOLR-9444: Fix path usage for cloud backup/restore. (Hrishikesh Gadre, Uwe Schindler, Varun Thacker)
* SOLR-9381: Snitch for freedisk uses '/' instead of 'coreRootDirectory' (Tim Owen, noble)
* SOLR-9488: Shard split can fail to write commit data on shutdown/restart causing replicas to recover
without replicating the index. This can cause data loss. (shalin)
* SOLR-9490: Fixed bugs in BoolField that caused it to erroneously return "false" for all docs depending
on usage (Colvin Cowie, Dan Fox, hossman)
* SOLR-9438: Shard split can be marked successful and sub-shard states switched to 'active' even when
one or more sub-shards replicas do not recover due to the leader crashing or restarting between the time
the replicas are created and before they can recover. This can cause data loss. (shalin)
Optimizations
----------------------
@ -135,6 +151,27 @@ Other Changes
* SOLR-9406: SolrSuggester should selectively register close hook (Gethin James, Joel Bernstein)
* SOLR-8961: Add a test module for solr-test-framework (Alan Woodward)
* SOLR-9474: MiniSolrCloudCluster will not reuse ports by default when
restarting its JettySolrRunners (Alan Woodward)
* SOLR-9498: Remove HDFS properties from DIH solrconfig.xml, as started in SOLR-6943 (Alexandre Rafalovitch)
* SOLR-9365: Reduce noise in solr logs during graceful shutdown. (Cao Manh Dat via shalin)
================== 6.2.1 ==================
Bug Fixes
----------------------
* SOLR-9494: Use of {!collapse} sometimes doesn't correctly return true for Collector.needsScores(), especially when the
query was cached. This can cause an exception when 'q' is a SpanQuery or potentially others. (David Smiley)
* SOLR-9408: Fix TreeMergeOutputFormat to add timestamp metadata to a commit. SolrCloud replication relies on this.
(Jessica Cheng Mallet via Varun Thacker)
================== 6.2.0 ==================
Versions of Major Components
@ -1168,6 +1205,23 @@ Other Changes
* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
(David Smiley)
======================= 5.5.3 =======================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
Versions of Major Components
---------------------
Apache Tika 1.13
Carrot2 3.12.0
Velocity 1.7 and Velocity Tools 2.0
Apache UIMA 2.3.1
Apache ZooKeeper 3.4.6
Jetty 9.3.8.v20160314
(No Changes)
======================= 5.5.2 =======================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -0,0 +1,414 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.extraction;
import java.io.CharArrayWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import org.apache.lucene.index.IndexableField;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.BasicResultContext;
import org.apache.solr.response.RawResponseWriter;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.TextResponseWriter;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.DocList;
import org.apache.solr.search.ReturnFields;
public class XLSXResponseWriter extends RawResponseWriter {
@Override
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
// throw away arraywriter just to satisfy super requirements; we're grabbing
// all writes before they go to it anyway
XLSXWriter w = new XLSXWriter(new CharArrayWriter(), req, rsp);
LinkedHashMap<String,String> reqNamesMap = new LinkedHashMap<>();
LinkedHashMap<String,Integer> reqWidthsMap = new LinkedHashMap<>();
Iterator<String> paramNamesIter = req.getParams().getParameterNamesIterator();
while (paramNamesIter.hasNext()) {
String nextParam = paramNamesIter.next();
if (nextParam.startsWith("colname.")) {
String field = nextParam.substring("colname.".length());
reqNamesMap.put(field, req.getParams().get(nextParam));
} else if (nextParam.startsWith("colwidth.")) {
String field = nextParam.substring("colwidth.".length());
reqWidthsMap.put(field, req.getParams().getInt(nextParam));
}
}
try {
w.writeResponse(out, reqNamesMap, reqWidthsMap);
} finally {
w.close();
}
}
@Override
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
}
}
class XLSXWriter extends TextResponseWriter {
SolrQueryRequest req;
SolrQueryResponse rsp;
class SerialWriteWorkbook {
SXSSFWorkbook swb;
Sheet sh;
XSSFCellStyle headerStyle;
int rowIndex;
Row curRow;
int cellIndex;
SerialWriteWorkbook() {
this.swb = new SXSSFWorkbook(100);
this.sh = this.swb.createSheet();
this.rowIndex = 0;
this.headerStyle = (XSSFCellStyle)swb.createCellStyle();
this.headerStyle.setFillBackgroundColor(IndexedColors.BLACK.getIndex());
//solid fill
this.headerStyle.setFillPattern((short)1);
Font headerFont = swb.createFont();
headerFont.setFontHeightInPoints((short)14);
headerFont.setBoldweight(Font.BOLDWEIGHT_BOLD);
headerFont.setColor(IndexedColors.WHITE.getIndex());
this.headerStyle.setFont(headerFont);
}
void addRow() {
curRow = sh.createRow(rowIndex++);
cellIndex = 0;
}
void setHeaderRow() {
curRow.setHeightInPoints((short)21);
}
//sets last created cell to have header style
void setHeaderCell() {
curRow.getCell(cellIndex - 1).setCellStyle(this.headerStyle);
}
//set the width of the most recently created column
void setColWidth(int charWidth) {
//width in poi is units of 1/256th of a character width for some reason
this.sh.setColumnWidth(cellIndex - 1, 256*charWidth);
}
void writeCell(String value) {
Cell cell = curRow.createCell(cellIndex++);
cell.setCellValue(value);
}
void flush(OutputStream out) {
try {
swb.write(out);
} catch (IOException e) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
String stacktrace = sw.toString();
}finally {
swb.dispose();
}
}
}
private SerialWriteWorkbook wb = new SerialWriteWorkbook();
static class XLField {
String name;
SchemaField sf;
}
private Map<String,XLField> xlFields = new LinkedHashMap<String,XLField>();
public XLSXWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp){
super(writer, req, rsp);
this.req = req;
this.rsp = rsp;
}
public void writeResponse(OutputStream out, LinkedHashMap<String, String> colNamesMap,
LinkedHashMap<String, Integer> colWidthsMap) throws IOException {
SolrParams params = req.getParams();
Collection<String> fields = returnFields.getRequestedFieldNames();
Object responseObj = rsp.getValues().get("response");
boolean returnOnlyStored = false;
if (fields==null||returnFields.hasPatternMatching()) {
if (responseObj instanceof SolrDocumentList) {
// get the list of fields from the SolrDocumentList
if(fields==null) {
fields = new LinkedHashSet<String>();
}
for (SolrDocument sdoc: (SolrDocumentList)responseObj) {
fields.addAll(sdoc.getFieldNames());
}
} else {
// get the list of fields from the index
Iterable<String> all = req.getSearcher().getFieldNames();
if (fields == null) {
fields = Sets.newHashSet(all);
} else {
Iterables.addAll(fields, all);
}
}
if (returnFields.wantsScore()) {
fields.add("score");
} else {
fields.remove("score");
}
returnOnlyStored = true;
}
for (String field : fields) {
if (!returnFields.wantsField(field)) {
continue;
}
if (field.equals("score")) {
XLField xlField = new XLField();
xlField.name = "score";
xlFields.put("score", xlField);
continue;
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf == null) {
FieldType ft = new StrField();
sf = new SchemaField(field, ft);
}
// Return only stored fields, unless an explicit field list is specified
if (returnOnlyStored && sf != null && !sf.stored()) {
continue;
}
XLField xlField = new XLField();
xlField.name = field;
xlField.sf = sf;
xlFields.put(field, xlField);
}
wb.addRow();
//write header
for (XLField xlField : xlFields.values()) {
String printName = xlField.name;
int colWidth = 14;
String niceName = colNamesMap.get(xlField.name);
if (niceName != null) {
printName = niceName;
}
Integer niceWidth = colWidthsMap.get(xlField.name);
if (niceWidth != null) {
colWidth = niceWidth.intValue();
}
writeStr(xlField.name, printName, false);
wb.setColWidth(colWidth);
wb.setHeaderCell();
}
wb.setHeaderRow();
wb.addRow();
if (responseObj instanceof ResultContext) {
writeDocuments(null, (ResultContext)responseObj );
}
else if (responseObj instanceof DocList) {
ResultContext ctx = new BasicResultContext((DocList)responseObj, returnFields, null, null, req);
writeDocuments(null, ctx );
} else if (responseObj instanceof SolrDocumentList) {
writeSolrDocumentList(null, (SolrDocumentList)responseObj, returnFields );
}
wb.flush(out);
wb = null;
}
@Override
public void close() throws IOException {
super.close();
}
@Override
public void writeNamedList(String name, NamedList val) throws IOException {
}
@Override
public void writeStartDocumentList(String name,
long start, int size, long numFound, Float maxScore) throws IOException
{
// nothing
}
@Override
public void writeEndDocumentList() throws IOException
{
// nothing
}
//NOTE: a document cannot currently contain another document
List tmpList;
@Override
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx ) throws IOException {
if (tmpList == null) {
tmpList = new ArrayList(1);
tmpList.add(null);
}
for (XLField xlField : xlFields.values()) {
Object val = doc.getFieldValue(xlField.name);
int nVals = val instanceof Collection ? ((Collection)val).size() : (val==null ? 0 : 1);
if (nVals == 0) {
writeNull(xlField.name);
continue;
}
if ((xlField.sf != null && xlField.sf.multiValued()) || nVals > 1) {
Collection values;
// normalize to a collection
if (val instanceof Collection) {
values = (Collection)val;
} else {
tmpList.set(0, val);
values = tmpList;
}
writeArray(xlField.name, values.iterator());
} else {
// normalize to first value
if (val instanceof Collection) {
Collection values = (Collection)val;
val = values.iterator().next();
}
writeVal(xlField.name, val);
}
}
wb.addRow();
}
@Override
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
wb.writeCell(val);
}
@Override
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
}
@Override
public void writeArray(String name, Iterator val) throws IOException {
StringBuffer output = new StringBuffer();
while (val.hasNext()) {
Object v = val.next();
if (v instanceof IndexableField) {
IndexableField f = (IndexableField)v;
if (v instanceof Date) {
output.append(((Date) val).toInstant().toString() + "; ");
} else {
output.append(f.stringValue() + "; ");
}
} else {
output.append(v.toString() + "; ");
}
}
if (output.length() > 0) {
output.deleteCharAt(output.length()-1);
output.deleteCharAt(output.length()-1);
}
writeStr(name, output.toString(), false);
}
@Override
public void writeNull(String name) throws IOException {
wb.writeCell("");
}
@Override
public void writeInt(String name, String val) throws IOException {
wb.writeCell(val);
}
@Override
public void writeLong(String name, String val) throws IOException {
wb.writeCell(val);
}
@Override
public void writeBool(String name, String val) throws IOException {
wb.writeCell(val);
}
@Override
public void writeFloat(String name, String val) throws IOException {
wb.writeCell(val);
}
@Override
public void writeDouble(String name, String val) throws IOException {
wb.writeCell(val);
}
@Override
public void writeDate(String name, Date val) throws IOException {
writeDate(name, val.toInstant().toString());
}
@Override
public void writeDate(String name, String val) throws IOException {
wb.writeCell(val);
}
}

View File

@ -415,6 +415,7 @@
-->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
@ -422,6 +423,7 @@
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_dt1" type="date" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>

View File

@ -0,0 +1,257 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.extraction;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.time.Instant;
import java.util.Date;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.QueryResponseWriter;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.response.RawResponseWriter;
import org.apache.solr.search.SolrReturnFields;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestXLSXResponseWriter extends SolrTestCaseJ4 {
private static XLSXResponseWriter writerXlsx;
@BeforeClass
public static void beforeClass() throws Exception {
System.setProperty("enable.update.log", "false");
initCore("solrconfig.xml","schema.xml",getFile("extraction/solr").getAbsolutePath());
createIndex();
//find a reference to the default response writer so we can redirect its output later
SolrCore testCore = h.getCore();
QueryResponseWriter writer = testCore.getQueryResponseWriter("xlsx");
if (writer instanceof XLSXResponseWriter) {
writerXlsx = (XLSXResponseWriter) testCore.getQueryResponseWriter("xlsx");
} else {
throw new Exception("XLSXResponseWriter not registered with solr core");
}
}
public static void createIndex() {
assertU(adoc("id","1", "foo_i","-1", "foo_s","hi", "foo_l","12345678987654321", "foo_b","false", "foo_f","1.414","foo_d","-1.0E300","foo_dt1","2000-01-02T03:04:05Z"));
assertU(adoc("id","2", "v_ss","hi", "v_ss","there", "v2_ss","nice", "v2_ss","output", "shouldbeunstored","foo"));
assertU(adoc("id","3", "shouldbeunstored","foo"));
assertU(adoc("id","4", "foo_s1","foo"));
assertU(commit());
}
@AfterClass
public static void cleanupWriter() throws Exception {
writerXlsx = null;
}
@Test
public void testStructuredDataViaBaseWriters() throws IOException, Exception {
SolrQueryResponse rsp = new SolrQueryResponse();
// Don't send a ContentStream back, this will fall back to the configured base writer.
// But abuse the CONTENT key to ensure writer is also checking type
rsp.add(RawResponseWriter.CONTENT, "test");
rsp.add("foo", "bar");
SolrQueryRequest r = req();
// check Content-Type
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", writerXlsx.getContentType(r, rsp));
// test our basic types,and that fields come back in the requested order
XSSFSheet resultSheet = getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1"));
assertEquals("id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1\n1,hi,-1,12345678987654321,F,1.414,-1.0E300,2000-01-02T03:04:05Z\n"
, getStringFromSheet(resultSheet));
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "fl","id,score,foo_s"));
// test retrieving score
assertEquals("id,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "colname.id", "I.D.", "colwidth.id", "10",
"fl","id,score,foo_s"));
// test override colname/width
assertEquals("I.D.,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
// test colwidth (value returned is in 256ths of a character as per excel standard)
assertEquals(10*256, resultSheet.getColumnWidth(0));
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,v_ss"));
// test multivalued
assertEquals("id,v_ss\n2,hi; there\n", getStringFromSheet(resultSheet));
// test retrieving fields from index
resultSheet = getWSResultForQuery(req("q","*:*", "wt","xslx", "fl","*,score"));
String result = getStringFromSheet(resultSheet);
for (String field : "id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss,score".split(",")) {
assertTrue(result.indexOf(field) >= 0);
}
// test null values
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,foo_s,v_ss"));
assertEquals("id,foo_s,v_ss\n2,,hi; there\n", getStringFromSheet(resultSheet));
// now test SolrDocumentList
SolrDocument d = new SolrDocument();
SolrDocument d1 = d;
d.addField("id","1");
d.addField("foo_i",-1);
d.addField("foo_s","hi");
d.addField("foo_l","12345678987654321L");
d.addField("foo_b",false);
d.addField("foo_f",1.414f);
d.addField("foo_d",-1.0E300);
d.addField("foo_dt1", new Date(Instant.parse("2000-01-02T03:04:05Z").toEpochMilli()));
d.addField("score", "2.718");
d = new SolrDocument();
SolrDocument d2 = d;
d.addField("id","2");
d.addField("v_ss","hi");
d.addField("v_ss","there");
d.addField("v2_ss","nice");
d.addField("v2_ss","output");
d.addField("score", "89.83");
d.addField("shouldbeunstored","foo");
SolrDocumentList sdl = new SolrDocumentList();
sdl.add(d1);
sdl.add(d2);
SolrQueryRequest req = req("q","*:*");
rsp = new SolrQueryResponse();
rsp.addResponse(sdl);
rsp.setReturnFields( new SolrReturnFields("id,foo_s", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("id,foo_s\n1,hi\n2,\n", getStringFromSheet(resultSheet));
// try scores
rsp.setReturnFields( new SolrReturnFields("id,score,foo_s", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("id,score,foo_s\n1,2.718,hi\n2,89.83,\n", getStringFromSheet(resultSheet));
// get field values from docs... should be ordered and not include score unless requested
rsp.setReturnFields( new SolrReturnFields("*", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss\n" +
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z,,\n" +
"2,,,,,,,,hi; there,nice; output\n", getStringFromSheet(resultSheet));
// get field values and scores - just check that the scores are there... we don't guarantee where
rsp.setReturnFields( new SolrReturnFields("*,score", req) );
resultSheet = getWSResultForQuery(req, rsp);
String s = getStringFromSheet(resultSheet);
assertTrue(s.indexOf("score") >=0 && s.indexOf("2.718") > 0 && s.indexOf("89.83") > 0 );
// Test field globs
rsp.setReturnFields( new SolrReturnFields("id,foo*", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1\n" +
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z\n" +
"2,,,,,,,\n", getStringFromSheet(resultSheet));
rsp.setReturnFields( new SolrReturnFields("id,*_d*", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("id,foo_d,foo_dt1\n" +
"1,-1.0E300,2000-01-02T03:04:05Z\n" +
"2,,\n", getStringFromSheet(resultSheet));
// Test function queries
rsp.setReturnFields( new SolrReturnFields("sum(1,1),id,exists(foo_s1),div(9,1),foo_f", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("sum(1,1),id,exists(foo_s1),div(9,1),foo_f\n" +
",1,,,1.414\n" +
",2,,,\n", getStringFromSheet(resultSheet));
// Test transformers
rsp.setReturnFields( new SolrReturnFields("mydocid:[docid],[explain]", req) );
resultSheet = getWSResultForQuery(req, rsp);
assertEquals("mydocid,[explain]\n" +
",\n" +
",\n", getStringFromSheet(resultSheet));
req.close();
}
@Test
public void testPseudoFields() throws Exception {
// Use Pseudo Field
SolrQueryRequest req = req("q","id:1", "wt","xlsx", "fl","XXX:id,foo_s");
XSSFSheet resultSheet = getWSResultForQuery(req);
assertEquals("XXX,foo_s\n1,hi\n", getStringFromSheet(resultSheet));
String txt = getStringFromSheet(getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","XXX:id,YYY:[docid],FOO:foo_s")));
String[] lines = txt.split("\n");
assertEquals(2, lines.length);
assertEquals("XXX,YYY,FOO", lines[0] );
assertEquals("1,0,hi", lines[1] );
//assertions specific to multiple pseudofields functions like abs, div, exists, etc.. (SOLR-5423)
String funcText = getStringFromSheet(getWSResultForQuery(req("q","*", "wt","xlsx", "fl","XXX:id,YYY:exists(foo_s1)")));
String[] funcLines = funcText.split("\n");
assertEquals(5, funcLines.length);
assertEquals("XXX,YYY", funcLines[0] );
assertEquals("1,false", funcLines[1] );
assertEquals("3,false", funcLines[3] );
}
// returns first worksheet as XLSXResponseWriter only returns one sheet
private XSSFSheet getWSResultForQuery(SolrQueryRequest req) throws IOException, Exception {
SolrQueryResponse rsp = h.queryAndResponse("standard", req);
return getWSResultForQuery(req, rsp);
}
private XSSFSheet getWSResultForQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, Exception {
ByteArrayOutputStream xmlBout = new ByteArrayOutputStream();
writerXlsx.write(xmlBout, req, rsp);
XSSFWorkbook output = new XSSFWorkbook(new ByteArrayInputStream(xmlBout.toByteArray()));
XSSFSheet sheet = output.getSheetAt(0);
req.close();
output.close();
return sheet;
}
private String getStringFromSheet(XSSFSheet sheet) {
StringBuilder output = new StringBuilder();
for (Row row: sheet) {
for (Cell cell: row) {
output.append(cell.getStringCellValue());
output.append(",");
}
output.setLength(output.length() - 1);
output.append("\n");
}
return output.toString();
}
}

View File

@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import com.google.common.base.Preconditions;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@ -40,12 +41,11 @@ import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.misc.IndexMergeTool;
import org.apache.lucene.store.Directory;
import org.apache.solr.store.hdfs.HdfsDirectory;
import org.apache.solr.update.SolrIndexWriter;
import org.apache.solr.util.RTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
/**
* See {@link IndexMergeTool}.
*/
@ -151,7 +151,7 @@ public class TreeMergeOutputFormat extends FileOutputFormat<Text, NullWritable>
LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
timer = new RTimer();
if (maxSegments < Integer.MAX_VALUE) {
writer.forceMerge(maxSegments);
writer.forceMerge(maxSegments);
// TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
// see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
}
@ -161,6 +161,10 @@ public class TreeMergeOutputFormat extends FileOutputFormat<Text, NullWritable>
}
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime());
// Set Solr's commit data so the created index is usable by SolrCloud. E.g. Currently SolrCloud relies on
// commitTimeMSec in the commit data to do replication.
SolrIndexWriter.setCommitData(writer);
timer = new RTimer();
LOG.info("Optimizing Solr: Closing index writer");
writer.close();

View File

@ -307,9 +307,24 @@ public class JettySolrRunner {
/**
* Start the Jetty server
*
* If the server has been started before, it will restart using the same port
*
* @throws Exception if an error occurs on startup
*/
public void start() throws Exception {
start(true);
}
/**
* Start the Jetty server
*
* @param reusePort when true, will start up on the same port as used by any
* previous runs of this JettySolrRunner. If false, will use
* the port specified by the server's JettyConfig.
*
* @throws Exception if an error occurs on startup
*/
public void start(boolean reusePort) throws Exception {
// Do not let Jetty/Solr pollute the MDC for this thread
Map<String, String> prevContext = MDC.getCopyOfContextMap();
MDC.clear();
@ -317,7 +332,8 @@ public class JettySolrRunner {
// if started before, make a new server
if (startedBefore) {
waitOnSolr = false;
init(lastPort);
int port = reusePort ? lastPort : this.config.port;
init(port);
} else {
startedBefore = true;
}

View File

@ -79,7 +79,7 @@ public class DeleteShardCmd implements Cmd {
// TODO: Add check for range gaps on Slice deletion
final Slice.State state = slice.getState();
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY
|| state == Slice.State.CONSTRUCTION)) {
|| state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state
+ ". Only non-active (or custom-hashed) slices can be deleted.");
}

View File

@ -288,8 +288,12 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
ActionThrottle lt;
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
if (core == null ) {
if (cc.isShutDown()) {
return;
} else {
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
}
}
MDCLoggingContext.setCore(core);
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
@ -325,9 +329,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
try (SolrCore core = cc.getCore(coreName)) {
if (core == null) {
cancelElection();
throw new SolrException(ErrorCode.SERVER_ERROR,
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
if (!zkController.getCoreContainer().isShutDown()) {
cancelElection();
throw new SolrException(ErrorCode.SERVER_ERROR,
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
} else {
return;
}
}
// should I be leader?

View File

@ -347,7 +347,9 @@ public class LeaderElector {
// am I the next leader?
checkIfIamLeader(context, true);
} catch (Exception e) {
log.warn("", e);
if (!zkClient.isClosed()) {
log.warn("", e);
}
}
}
}

View File

@ -176,7 +176,9 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
try {
prioritizer.prioritizeOverseerNodes(myId);
} catch (Exception e) {
log.error("Unable to prioritize overseer ", e);
if (!zkStateReader.getZkClient().isClosed()) {
log.error("Unable to prioritize overseer ", e);
}
}
// TODO: Make maxThreads configurable.

View File

@ -582,7 +582,7 @@ public class RecoveryStrategy extends Thread implements Closeable {
prepCmd.setCheckLive(true);
prepCmd.setOnlyIfLeader(true);
final Slice.State state = slice.getState();
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY) {
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
prepCmd.setOnlyIfLeaderActive(true);
}
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);

View File

@ -47,6 +47,7 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.util.TestInjection;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -126,6 +127,13 @@ public class SplitShardCmd implements Cmd {
Thread.currentThread().interrupt();
}
// let's record the ephemeralOwner of the parent leader node
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
if (leaderZnodeStat == null) {
// we just got to know the leader but its live node is gone already!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
}
DocRouter.Range range = parentSlice.getRange();
if (range == null) {
range = new PlainIdRouter().fullRange();
@ -253,6 +261,8 @@ public class SplitShardCmd implements Cmd {
propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
propMap.put("shard_parent_node", parentShardLeader.getNodeName());
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
@ -420,6 +430,32 @@ public class SplitShardCmd implements Cmd {
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
// compare against the ephemeralOwner of the parent leader node
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
// put sub-shards in recovery_failed state
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
for (String subSlice : subSlices) {
propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(Utils.toJSON(m));
if (leaderZnodeStat == null) {
// the leader is not live anymore, fail the split!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
} else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
// there's a new leader, fail the split!
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from "
+ ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
}
}
// we must set the slice state into recovery before actually creating the replica cores
// this ensures that the logic inside Overseer to update sub-shard state to 'active'
// always gets a chance to execute. See SOLR-7673

View File

@ -59,11 +59,19 @@ public class CollectionMutator {
String shardRange = message.getStr(ZkStateReader.SHARD_RANGE_PROP);
String shardState = message.getStr(ZkStateReader.SHARD_STATE_PROP);
String shardParent = message.getStr(ZkStateReader.SHARD_PARENT_PROP);
String shardParentZkSession = message.getStr("shard_parent_zk_session");
String shardParentNode = message.getStr("shard_parent_node");
sliceProps.put(Slice.RANGE, shardRange);
sliceProps.put(ZkStateReader.STATE_PROP, shardState);
if (shardParent != null) {
sliceProps.put(Slice.PARENT, shardParent);
}
if (shardParentZkSession != null) {
sliceProps.put("shard_parent_zk_session", shardParentZkSession);
}
if (shardParentNode != null) {
sliceProps.put("shard_parent_node", shardParentNode);
}
collection = updateSlice(collectionName, collection, new Slice(shardId, replicas, sliceProps));
return new ZkWriteCommand(collectionName, collection);
} else {

View File

@ -38,6 +38,7 @@ import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.Utils;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -403,19 +404,57 @@ public class ReplicaMutator {
}
if (allActive) {
// hurray, all sub shard replicas are active
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE. Preparing to switch shard states.", sliceName);
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE.", sliceName);
String parentSliceName = (String) sliceProps.remove(Slice.PARENT);
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
propMap.put(sliceName, Slice.State.ACTIVE.toString());
for (Slice subShardSlice : subShardSlices) {
propMap.put(subShardSlice.getName(), Slice.State.ACTIVE.toString());
// now lets see if the parent leader is still the same or else there's a chance of data loss
// see SOLR-9438 for details
String shardParentZkSession = (String) sliceProps.remove("shard_parent_zk_session");
String shardParentNode = (String) sliceProps.remove("shard_parent_node");
boolean isLeaderSame = true;
if (shardParentNode != null && shardParentZkSession != null) {
log.info("Checking whether sub-shard leader node is still the same one at {} with ZK session id {}", shardParentNode, shardParentZkSession);
try {
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE
+ "/" + shardParentNode, null, true);
if (leaderZnodeStat == null) {
log.error("The shard leader node: {} is not live anymore!", shardParentNode);
isLeaderSame = false;
} else if (leaderZnodeStat.getEphemeralOwner() != Long.parseLong(shardParentZkSession)) {
log.error("The zk session id for shard leader node: {} has changed from {} to {}",
shardParentNode, shardParentZkSession, leaderZnodeStat.getEphemeralOwner());
isLeaderSame = false;
}
} catch (Exception e) {
log.warn("Error occurred while checking if parent shard node is still live with the same zk session id. " +
"We cannot switch shard states at this time.", e);
return collection; // we aren't going to make any changes right now
}
}
if (isLeaderSame) {
log.info("Sub-shard leader node is still the same one at {} with ZK session id {}. Preparing to switch shard states.", shardParentNode, shardParentZkSession);
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
propMap.put(sliceName, Slice.State.ACTIVE.toString());
for (Slice subShardSlice : subShardSlices) {
propMap.put(subShardSlice.getName(), Slice.State.ACTIVE.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
ZkNodeProps m = new ZkNodeProps(propMap);
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
} else {
// we must mark the shard split as failed by switching sub-shards to recovery_failed state
Map<String, Object> propMap = new HashMap<>();
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
propMap.put(sliceName, Slice.State.RECOVERY_FAILED.toString());
for (Slice subShardSlice : subShardSlices) {
propMap.put(subShardSlice.getName(), Slice.State.RECOVERY_FAILED.toString());
}
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
ZkNodeProps m = new ZkNodeProps(propMap);
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
}
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
ZkNodeProps m = new ZkNodeProps(propMap);
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
}
}
}

View File

@ -164,9 +164,10 @@ public class SliceMutator {
log.info("Update shard state " + key + " to " + message.getStr(key));
Map<String, Object> props = slice.shallowCopy();
if (Slice.State.getState((String) props.get(ZkStateReader.STATE_PROP)) == Slice.State.RECOVERY
&& Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
if (Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
props.remove(Slice.PARENT);
props.remove("shard_parent_node");
props.remove("shard_parent_zk_session");
}
props.put(ZkStateReader.STATE_PROP, message.getStr(key));
Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props);

View File

@ -2215,6 +2215,12 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
m.put("smile", new SmileResponseWriter());
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
DEFAULT_RESPONSE_WRITERS = Collections.unmodifiableMap(m);
try {
m.put("xlsx",
(QueryResponseWriter) Class.forName("org.apache.solr.handler.extraction.XLSXResponseWriter").newInstance());
} catch (Exception e) {
//don't worry; solrcell contrib not in class path
}
}
private static BinaryResponseWriter getFileStreamWriter() {
@ -2237,7 +2243,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
}
public interface RawWriter {
public void write(OutputStream os) throws IOException ;
void write(OutputStream os) throws IOException ;
}
/** Configure the query response writers. There will always be a default writer; additional

View File

@ -308,6 +308,9 @@ public class IndexFetcher {
long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
long latestGeneration = (Long) response.get(GENERATION);
LOG.info("Master's generation: " + latestGeneration);
LOG.info("Master's version: " + latestVersion);
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
if (commit == null) {
@ -326,6 +329,7 @@ public class IndexFetcher {
}
}
LOG.info("Slave's generation: " + commit.getGeneration());
if (latestVersion == 0L) {
if (forceReplication && commit.getGeneration() != 0) {
@ -353,8 +357,6 @@ public class IndexFetcher {
successfulInstall = true;
return true;
}
LOG.info("Master's generation: " + latestGeneration);
LOG.info("Slave's generation: " + commit.getGeneration());
LOG.info("Starting replication process");
// get the list of files first
fetchFileList(latestGeneration);

View File

@ -1265,7 +1265,14 @@ public class FacetComponent extends SearchComponent {
if (facetFs != null) {
for (String field : facetFs) {
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
final DistribFieldFacet ff;
if (params.getFieldBool(field, FacetParams.FACET_EXISTS, false)) {
// cap facet count by 1 with this method
ff = new DistribFacetExistsField(rb, field);
} else {
ff = new DistribFieldFacet(rb, field);
}
facets.put(ff.getKey(), ff);
}
}
@ -1469,7 +1476,7 @@ public class FacetComponent extends SearchComponent {
sfc.termNum = termNum++;
counts.put(name, sfc);
}
sfc.count += count;
incCount(sfc, count);
terms.set(sfc.termNum);
last = count;
}
@ -1485,6 +1492,10 @@ public class FacetComponent extends SearchComponent {
missingMax[shardNum] = last;
counted[shardNum] = terms;
}
protected void incCount(ShardFacetCount sfc, long count) {
sfc.count += count;
}
public ShardFacetCount[] getLexSorted() {
ShardFacetCount[] arr
@ -1530,7 +1541,7 @@ public class FacetComponent extends SearchComponent {
}
}
}
/**
* <b>This API is experimental and subject to change</b>
*/
@ -1547,4 +1558,18 @@ public class FacetComponent extends SearchComponent {
}
}
private static final class DistribFacetExistsField extends DistribFieldFacet {
private DistribFacetExistsField(ResponseBuilder rb, String facetStr) {
super(rb, facetStr);
SimpleFacets.checkMincountOnExists(field, minCount);
}
@Override
protected void incCount(ShardFacetCount sfc, long count) {
if (count>0) {
sfc.count = 1;
}
}
}
}

View File

@ -406,7 +406,8 @@ public class SimpleFacets {
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
NamedList<Integer> counts;
SchemaField sf = searcher.getSchema().getField(field);
FieldType ft = sf.getType();
@ -422,13 +423,15 @@ public class SimpleFacets {
requestedMethod = FacetMethod.FC;
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
requestedMethod = FacetMethod.UIF;
}else{
} else {
requestedMethod=null;
}
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
FacetMethod appliedFacetMethod = selectFacetMethod(sf, requestedMethod, mincount);
FacetMethod appliedFacetMethod = selectFacetMethod(field,
sf, requestedMethod, mincount,
exists);
RTimer timer = null;
if (fdebug != null) {
@ -446,7 +449,8 @@ public class SimpleFacets {
switch (appliedFacetMethod) {
case ENUM:
assert TrieField.getMainValuePrefix(ft) == null;
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase, params);
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase,
exists);
break;
case FCS:
assert !multiToken;
@ -538,6 +542,29 @@ public class SimpleFacets {
return counts;
}
/**
* @param existsRequested facet.exists=true is passed for the given field
* */
static FacetMethod selectFacetMethod(String fieldName,
SchemaField field, FacetMethod method, Integer mincount,
boolean existsRequested) {
if (existsRequested) {
checkMincountOnExists(fieldName, mincount);
if (method == null) {
method = FacetMethod.ENUM;
}
}
final FacetMethod facetMethod = selectFacetMethod(field, method, mincount);
if (existsRequested && facetMethod!=FacetMethod.ENUM) {
throw new SolrException (ErrorCode.BAD_REQUEST,
FacetParams.FACET_EXISTS + "=true is requested, but "+
FacetParams.FACET_METHOD+"="+FacetParams.FACET_METHOD_enum+ " can't be used with "+fieldName
);
}
return facetMethod;
}
/**
* This method will force the appropriate facet method even if the user provided a different one as a request parameter
*
@ -811,7 +838,8 @@ public class SimpleFacets {
* @see FacetParams#FACET_ZEROS
* @see FacetParams#FACET_MISSING
*/
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase, SolrParams params)
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing,
String sort, String prefix, String contains, boolean ignoreCase, boolean intersectsCheck)
throws IOException {
/* :TODO: potential optimization...
@ -901,7 +929,11 @@ public class SimpleFacets {
deState.postingsEnum = postingsEnum;
}
c = searcher.numDocs(docs, deState);
if (intersectsCheck) {
c = searcher.intersects(docs, deState) ? 1 : 0;
} else {
c = searcher.numDocs(docs, deState);
}
postingsEnum = deState.postingsEnum;
} else {
@ -916,19 +948,33 @@ public class SimpleFacets {
if (postingsEnum instanceof MultiPostingsEnum) {
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
SEGMENTS_LOOP:
for (int subindex = 0; subindex < numSubs; subindex++) {
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
if (sub.postingsEnum == null) continue;
int base = sub.slice.start;
int docid;
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid + base)) c++;
if (fastForRandomSet.exists(docid + base)) {
c++;
if (intersectsCheck) {
assert c==1;
break SEGMENTS_LOOP;
}
}
}
}
} else {
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
if (fastForRandomSet.exists(docid)) c++;
if (fastForRandomSet.exists(docid)) {
c++;
if (intersectsCheck) {
assert c==1;
break;
}
}
}
}
@ -969,6 +1015,15 @@ public class SimpleFacets {
return res;
}
public static void checkMincountOnExists(String fieldName, int mincount) {
if (mincount > 1) {
throw new SolrException (ErrorCode.BAD_REQUEST,
FacetParams.FACET_MINCOUNT + "="+mincount+" exceed 1 that's not supported with " +
FacetParams.FACET_EXISTS + "=true for " + fieldName
);
}
}
/**
* A simple key=&gt;val pair whose natural order is such that
* <b>higher</b> vals come before lower vals.

View File

@ -19,7 +19,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;

View File

@ -128,11 +128,13 @@ public class BoolField extends PrimitiveFieldType {
@Override
public String toExternal(IndexableField f) {
if (f.binaryValue() == null) {
return null;
if (null != f.binaryValue()) {
return indexedToReadable(f.binaryValue().utf8ToString());
}
return indexedToReadable(f.binaryValue().utf8ToString());
if (null != f.stringValue()) {
return indexedToReadable(f.stringValue());
}
return null;
}
@Override

View File

@ -517,6 +517,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
}
}
@Override public boolean needsScores() { return true; }
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.contexts[context.ord] = context;
@ -726,6 +728,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
}
@Override public boolean needsScores() { return true; }
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.contexts[context.ord] = context;
@ -909,6 +913,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
}
}
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
public void setScorer(Scorer scorer) {
this.collapseStrategy.setScorer(scorer);
}
@ -1069,6 +1075,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
}
}
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
public void setScorer(Scorer scorer) {
this.collapseStrategy.setScorer(scorer);
}
@ -1686,7 +1694,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private float[] ordVals;
private Map rcontext;
private final CollapseScore collapseScore = new CollapseScore();
private final boolean cscore;
private float score;
public OrdValueSourceStrategy(int maxDoc,
@ -1714,7 +1721,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
Arrays.fill(ordVals, Float.MAX_VALUE);
}
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
if(this.needsScores) {
this.scores = new float[ords.length];
@ -1735,7 +1742,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
this.boostDocs.add(globalDoc);
}
if(needsScores || cscore) {
if (needsScores) {
this.score = scorer.score();
this.collapseScore.score = score;
}
@ -2208,7 +2215,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private FunctionValues functionValues;
private Map rcontext;
private final CollapseScore collapseScore = new CollapseScore();
private final boolean cscore;
private float score;
private int index=-1;
@ -2240,7 +2246,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
comp = new MinFloatComp();
}
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
if(needsScores) {
this.scores = new float[size];
@ -2263,7 +2269,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
return;
}
if(needsScores || cscore) {
if (needsScores) {
this.score = scorer.score();
this.collapseScore.score = score;
}

View File

@ -2285,6 +2285,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return all.andNotSize(positiveA.union(positiveB));
}
/** @lucene.internal */
public boolean intersects(DocSet a, DocsEnumState deState) throws IOException {
return a.intersects(getDocSet(deState));
}
/**
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
*/

View File

@ -20,10 +20,8 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.LongAdder;
@ -47,7 +45,6 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.SolrConfig.UpdateHandlerInfo;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
@ -516,16 +513,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
return rc;
}
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
" but currently suspiciously used for replication as well")
private void setCommitData(IndexWriter iw) {
final Map<String,String> commitData = new HashMap<>();
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY,
String.valueOf(System.currentTimeMillis()));
iw.setLiveCommitData(commitData.entrySet());
}
public void prepareCommit(CommitUpdateCommand cmd) throws IOException {
boolean error=true;
@ -534,7 +522,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
log.info("start "+cmd);
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
try {
setCommitData(iw.get());
SolrIndexWriter.setCommitData(iw.get());
iw.get().prepareCommit();
} finally {
iw.decref();
@ -615,7 +603,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
// SolrCore.verbose("writer.commit() start writer=",writer);
if (writer.hasUncommittedChanges()) {
setCommitData(writer);
SolrIndexWriter.setCommitData(writer);
writer.commit();
} else {
log.info("No uncommitted changes. Skipping IW.commit.");
@ -800,7 +788,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
}
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
setCommitData(writer);
SolrIndexWriter.setCommitData(writer);
writer.commit();
synchronized (solrCoreState.getUpdateLock()) {

View File

@ -22,12 +22,12 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SlowCodecReaderWrapper;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
@ -134,6 +134,11 @@ public class SolrIndexSplitter {
CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
}
// we commit explicitly instead of sending a CommitUpdateCommand through the processor chain
// because the sub-shard cores will just ignore such a commit because the update log is not
// in active state at this time.
SolrIndexWriter.setCommitData(iw);
iw.commit();
success = true;
} finally {
if (iwRef != null) {
@ -151,8 +156,6 @@ public class SolrIndexSplitter {
}
FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
LeafReader reader = readerContext.reader();
FixedBitSet[] docSets = new FixedBitSet[numPieces];

View File

@ -18,6 +18,8 @@ package org.apache.solr.update;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.Codec;
@ -27,8 +29,9 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.InfoStream;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.slf4j.Logger;
@ -86,7 +89,16 @@ public class SolrIndexWriter extends IndexWriter {
this.directory = directory;
numOpens.incrementAndGet();
}
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
" but currently suspiciously used for replication as well")
public static void setCommitData(IndexWriter iw) {
log.info("Calling setCommitData with IW:" + iw.toString());
final Map<String,String> commitData = new HashMap<>();
commitData.put(COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
iw.setLiveCommitData(commitData.entrySet());
}
private void setDirectoryFactory(DirectoryFactory factory) {
this.directoryFactory = factory;
}

View File

@ -1169,12 +1169,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
if (upShards.contains(s)) {
// this is no longer true if there was a query timeout on an up shard
// assertTrue("Expected to find numFound in the up shard info",info.get("numFound") != null);
assertTrue("Expected to find shardAddress in the up shard info",info.get("shardAddress") != null);
assertTrue("Expected to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
}
else {
assertEquals("Expected to find the "+SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY+" header set if a shard is down",
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
assertTrue("Expected to find error in the down shard info",info.get("error") != null);
assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
}
}
}

View File

@ -16,22 +16,39 @@
*/
package org.apache.solr;
import org.apache.lucene.util.TestUtil;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.JSONUtil;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.invoke.MethodHandles;
import java.util.*;
@Slow
public class TestRandomFaceting extends SolrTestCaseJ4 {
private static final Pattern trieFields = Pattern.compile(".*_t.");
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String FOO_STRING_FIELD = "foo_s1";
@ -80,6 +97,21 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
types.add(new FldType("missing_ss",new IRange(0,0), new SVal('a','b',1,1)));
// TODO: doubles, multi-floats, ints with precisionStep>0, booleans
types.add(new FldType("small_tf",ZERO_ONE, new FVal(-4,5)));
assert trieFields.matcher("small_tf").matches();
assert !trieFields.matcher("small_f").matches();
types.add(new FldType("foo_ti",ZERO_ONE, new IRange(-2,indexSize)));
assert trieFields.matcher("foo_ti").matches();
assert !trieFields.matcher("foo_i").matches();
types.add(new FldType("bool_b",ZERO_ONE, new Vals(){
@Override
public Comparable get() {
return random().nextBoolean();
}
}));
}
void addMoreDocs(int ndocs) throws Exception {
@ -144,8 +176,8 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
}
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc"});
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs"});
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc", null});
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs", null});
void doFacetTests(FldType ftype) throws Exception {
@ -154,10 +186,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
Random rand = random();
boolean validate = validateResponses;
ModifiableSolrParams params = params("facet","true", "wt","json", "indent","true", "omitHeader","true");
params.add("q","*:*", "rows","0"); // TODO: select subsets
params.add("q","*:*"); // TODO: select subsets
params.add("rows","0");
SchemaField sf = req.getSchema().getField(ftype.fname);
boolean multiValued = sf.getType().multiValuedFieldCache();
@ -198,6 +229,10 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
params.add("facet.missing", "true");
}
if (rand.nextBoolean()) {
params.add("facet.enum.cache.minDf",""+ rand.nextInt(indexSize));
}
// TODO: randomly add other facet params
String key = ftype.fname;
String facet_field = ftype.fname;
@ -210,45 +245,207 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
List<String> methods = multiValued ? multiValuedMethods : singleValuedMethods;
List<String> responses = new ArrayList<>(methods.size());
for (String method : methods) {
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
// TODO: allow method to be passed on local params?
params.set("facet.method", method);
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
String strResponse = h.query(req(params));
// Object realResponse = ObjectBuilder.fromJSON(strResponse);
// System.out.println(strResponse);
responses.add(strResponse);
for (boolean exists : new boolean [] {false, true}) {
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
// TODO: allow method to be passed on local params?
if (method!=null) {
params.set("facet.method", method);
} else {
params.remove("facet.method");
}
params.set("facet.exists", ""+exists);
if (!exists && rand.nextBoolean()) {
params.remove("facet.exists");
}
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation
if (exists && params.getInt("facet.mincount", 0)>1) {
assertQEx("no mincount on facet.exists",
rand.nextBoolean() ? "facet.exists":"facet.mincount",
req(params), ErrorCode.BAD_REQUEST);
continue;
}
// facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
final boolean notEnum = method != null && !method.equals("enum");
final boolean trieField = trieFields.matcher(ftype.fname).matches();
if ((notEnum || trieField) && exists) {
assertQEx("facet.exists only when enum or ommitted",
"facet.exists", req(params), ErrorCode.BAD_REQUEST);
continue;
}
}
String strResponse = h.query(req(params));
responses.add(strResponse);
if (responses.size()>1) {
validateResponse(responses.get(0), strResponse, params, method, methods);
}
}
}
/**
String strResponse = h.query(req(params));
Object realResponse = ObjectBuilder.fromJSON(strResponse);
**/
if (validate) {
for (int i=1; i<methods.size(); i++) {
String err = JSONTestUtil.match("/", responses.get(i), responses.get(0), 0.0);
if (err != null) {
log.error("ERROR: mismatch facet response: " + err +
"\n expected =" + responses.get(0) +
"\n response = " + responses.get(i) +
"\n request = " + params
);
fail(err);
}
}
}
} finally {
req.close();
}
}
private void validateResponse(String expected, String actual, ModifiableSolrParams params, String method,
List<String> methods) throws Exception {
if (params.getBool("facet.exists", false)) {
if (isSortByCount(params)) { // it's challenged with facet.sort=count
expected = getExpectationForSortByCount(params, methods);// that requires to recalculate expactation
} else { // facet.sort=index
expected = capFacetCountsTo1(expected);
}
}
String err = JSONTestUtil.match("/", actual, expected, 0.0);
if (err != null) {
log.error("ERROR: mismatch facet response: " + err +
"\n expected =" + expected +
"\n response = " + actual +
"\n request = " + params
);
fail(err);
}
}
/** if facet.exists=true with facet.sort=counts,
* it should return all values with 1 hits ordered by label index
* then all vals with 0 , and then missing count with null label,
* in the implementation below they are called three stratas
* */
private String getExpectationForSortByCount( ModifiableSolrParams params, List<String> methods) throws Exception {
String indexSortedResponse = getIndexSortedAllFacetValues(params, methods);
return transformFacetFields(indexSortedResponse, e -> {
List<Object> facetSortedByIndex = (List<Object>) e.getValue();
Map<Integer,List<Object>> stratas = new HashMap<Integer,List<Object>>(){
@Override // poor man multimap, I won't do that anymore, I swear.
public List<Object> get(Object key) {
if (!containsKey(key)) {
put((Integer) key, new ArrayList<>());
}
return super.get(key);
}
};
for (Iterator iterator = facetSortedByIndex.iterator(); iterator.hasNext();) {
Object label = (Object) iterator.next();
Long count = (Long) iterator.next();
final Integer strata;
if (label==null) { // missing (here "stratas" seems like overengineering )
strata = null;
}else {
if (count>0) {
count = 1L; // capping here
strata = 1; // non-zero count become zero
} else {
strata = 0; // zero-count
}
}
final List<Object> facet = stratas.get(strata);
facet.add(label);
facet.add(count);
}
List stratified =new ArrayList<>();
for(Integer s : new Integer[]{1, 0}) { // non-zero capped to one goes first, zeroes go then
stratified.addAll(stratas.get(s));
}// cropping them now
int offset=params.getInt("facet.offset", 0) * 2;
int end = offset + params.getInt("facet.limit", 100) * 2 ;
int fromIndex = offset > stratified.size() ? stratified.size() : offset;
stratified = stratified.subList(fromIndex,
end > stratified.size() ? stratified.size() : end);
if (params.getInt("facet.limit", 100)>0) { /// limit=0 omits even miss count
stratified.addAll(stratas.get(null));
}
facetSortedByIndex.clear();
facetSortedByIndex.addAll(stratified);
});
}
private String getIndexSortedAllFacetValues(ModifiableSolrParams in, List<String> methods) throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams(in);
params.set("facet.sort", "index");
String goodOldMethod = methods.get(random().nextInt( methods.size()));
params.set("facet.method", goodOldMethod);
params.set("facet.exists", "false");
if (random().nextBoolean()) {
params.remove("facet.exists");
}
params.set("facet.limit",-1);
params.set("facet.offset",0);
final String query;
SolrQueryRequest req = null;
try {
req = req(params);
query = h.query(req);
} finally {
req.close();
}
return query;
}
private boolean isSortByCount(ModifiableSolrParams in) {
boolean sortIsCount;
String sortParam = in.get("facet.sort");
sortIsCount = "count".equals(sortParam) || (sortParam==null && in.getInt("facet.limit",100)>0);
return sortIsCount;
}
/*
* {
"response":{"numFound":6,"start":0,"docs":[]
},
"facet_counts":{
"facet_queries":{},
"facet_fields":{
"foo_i":[
"6",2,
"2",1,
"3",1]},
"facet_ranges":{},
"facet_intervals":{},
"facet_heatmaps":{}}}
* */
@SuppressWarnings({"rawtypes", "unchecked"})
private String capFacetCountsTo1(String expected) throws IOException {
return transformFacetFields(expected, e -> {
List<Object> facetValues = (List<Object>) e.getValue();
for (ListIterator iterator = facetValues.listIterator(); iterator.hasNext();) {
Object value = iterator.next();
Long count = (Long) iterator.next();
if (value!=null && count > 1) {
iterator.set(1);
}
}
});
}
private String transformFacetFields(String expected, Consumer<Map.Entry<Object,Object>> consumer) throws IOException {
Object json = ObjectBuilder.fromJSON(expected);
Map facet_fields = getFacetFieldMap(json);
Set entries = facet_fields.entrySet();
for (Object facetTuples : entries) { //despite there should be only one field
Entry entry = (Entry)facetTuples;
consumer.accept(entry);
}
return JSONUtil.toJSON(json);
}
private Map getFacetFieldMap(Object json) {
Object facet_counts = ((Map)json).get("facet_counts");
Map facet_fields = (Map) ((Map)facet_counts).get("facet_fields");
return facet_fields;
}
}

View File

@ -25,20 +25,28 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.request.QueryRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.CoreAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.CollectionStateWatcher;
import org.apache.solr.common.cloud.CompositeIdRouter;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
@ -56,6 +64,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
@ -71,6 +80,12 @@ public class ShardSplitTest extends BasicDistributedZkTest {
schemaString = "schema15.xml"; // we need a string id
}
@Override
public void distribSetUp() throws Exception {
super.distribSetUp();
useFactory(null);
}
@Test
public void test() throws Exception {
@ -91,6 +106,146 @@ public class ShardSplitTest extends BasicDistributedZkTest {
//waitForThingsToLevelOut(15);
}
/*
Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
Add a replica. Ensure count matches in leader and replica.
*/
public void testSplitStaticIndexReplication() throws Exception {
waitForThingsToLevelOut(15);
DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
Replica replica = defCol.getReplicas().get(0);
String nodeName = replica.getNodeName();
String collectionName = "testSplitStaticIndexReplication";
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
create.setCreateNodeSet(nodeName); // we want to create the leader on a fixed node so that we know which one to restart later
create.process(cloudClient);
try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
client.setDefaultCollection(collectionName);
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
try {
thread.start();
Thread.sleep(1000); // give the indexer sometime to do its work
thread.safeStop();
thread.join();
client.commit();
controlClient.commit();
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
splitShard.setShardName(SHARD1);
String asyncId = splitShard.processAsync(client);
RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
if (state == RequestStatusState.COMPLETED) {
waitForRecoveriesToFinish(collectionName, true);
// let's wait to see parent shard become inactive
CountDownLatch latch = new CountDownLatch(1);
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
Slice parent = collectionState.getSlice(SHARD1);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null &&
parent.getState() == Slice.State.INACTIVE &&
slice10.getState() == Slice.State.ACTIVE &&
slice11.getState() == Slice.State.ACTIVE) {
latch.countDown();
return true; // removes the watch
}
return false;
}
});
latch.await(1, TimeUnit.MINUTES);
if (latch.getCount() != 0) {
// sanity check
fail("Sub-shards did not become active even after waiting for 1 minute");
}
int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
// restart the sub-shard leader node
boolean restarted = false;
for (JettySolrRunner jetty : jettys) {
int port = jetty.getBaseUrl().getPort();
if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
ChaosMonkey.kill(jetty);
ChaosMonkey.start(jetty);
restarted = true;
break;
}
}
if (!restarted) {
// sanity check
fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
}
// add a new replica for the sub-shard
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
// use control client because less chances of it being the node being restarted
// this is to avoid flakiness of test because of NoHttpResponseExceptions
String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
state = addReplica.processAndWait(control, 30);
}
if (state == RequestStatusState.COMPLETED) {
CountDownLatch newReplicaLatch = new CountDownLatch(1);
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
if (liveNodes.size() != liveNodeCount) {
return false;
}
Slice slice = collectionState.getSlice(SHARD1_0);
if (slice.getReplicas().size() == 2) {
if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
// we see replicas and none of them are recovering
newReplicaLatch.countDown();
return true;
}
}
return false;
}
});
newReplicaLatch.await(30, TimeUnit.SECONDS);
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
// handle new shards/replica so well.
ClusterState clusterState = client.getZkStateReader().getClusterState();
DocCollection collection = clusterState.getCollection(collectionName);
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
} else {
fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
}
} else {
fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
}
} finally {
thread.safeStop();
thread.join();
}
}
}
private int assertConsistentReplicas(Slice shard) throws SolrServerException, IOException {
long numFound = Long.MIN_VALUE;
int count = 0;
for (Replica replica : shard.getReplicas()) {
HttpSolrClient client = new HttpSolrClient.Builder(replica.getCoreUrl())
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build();
QueryResponse response = client.query(new SolrQuery("q", "*:*", "distrib", "false"));
log.info("Found numFound={} on replica: {}", response.getResults().getNumFound(), replica.getCoreUrl());
if (numFound == Long.MIN_VALUE) {
numFound = response.getResults().getNumFound();
} else {
assertEquals("Shard " + shard.getName() + " replicas do not have same number of documents", numFound, response.getResults().getNumFound());
}
count++;
}
return count;
}
/**
* Used to test that we can split a shard when a previous split event
* left sub-shards in construction or recovery state.
@ -143,6 +298,218 @@ public class ShardSplitTest extends BasicDistributedZkTest {
}
}
@Test
public void testSplitWithChaosMonkey() throws Exception {
waitForThingsToLevelOut(15);
List<StoppableIndexingThread> indexers = new ArrayList<>();
try {
for (int i = 0; i < 1; i++) {
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, cloudClient, String.valueOf(i), true);
indexers.add(thread);
thread.start();
}
Thread.sleep(1000); // give the indexers some time to do their work
} catch (Exception e) {
log.error("Error in test", e);
} finally {
for (StoppableIndexingThread indexer : indexers) {
indexer.safeStop();
indexer.join();
}
}
cloudClient.commit();
controlClient.commit();
AtomicBoolean stop = new AtomicBoolean();
AtomicBoolean killed = new AtomicBoolean(false);
Runnable monkey = new Runnable() {
@Override
public void run() {
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
zkStateReader.registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
if (stop.get()) {
return true; // abort and remove the watch
}
Slice slice = collectionState.getSlice(SHARD1_0);
if (slice != null && slice.getReplicas().size() > 1) {
// ensure that only one watcher invocation thread can kill!
if (killed.compareAndSet(false, true)) {
log.info("Monkey thread found 2 replicas for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
try {
Thread.sleep(1000 + random().nextInt(500));
ChaosMonkey.kill(cjetty);
stop.set(true);
return true;
} catch (Exception e) {
log.error("Monkey unable to kill jetty at port " + cjetty.jetty.getLocalPort(), e);
}
}
}
log.info("Monkey thread found only one replica for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
return false;
}
});
}
};
Thread monkeyThread = null;
/*
somehow the cluster state object inside this zk state reader has static copy of the collection which is never updated
so any call to waitForRecoveriesToFinish just keeps looping until timeout.
We workaround by explicitly registering the collection as an interesting one so that it is watched by ZkStateReader
see SOLR-9440. Todo remove this hack after SOLR-9440 is fixed.
*/
cloudClient.getZkStateReader().registerCore(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
monkeyThread = new Thread(monkey);
monkeyThread.start();
try {
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
splitShard.setShardName(SHARD1);
String asyncId = splitShard.processAsync(cloudClient);
RequestStatusState splitStatus = null;
try {
splitStatus = CollectionAdminRequest.requestStatus(asyncId).waitFor(cloudClient, 120);
} catch (Exception e) {
log.warn("Failed to get request status, maybe because the overseer node was shutdown by monkey", e);
}
// we don't care if the split failed because we are injecting faults and it is likely
// that the split has failed but in any case we want to assert that all docs that got
// indexed are available in SolrCloud and if the split succeeded then all replicas of the sub-shard
// must be consistent (i.e. have same numdocs)
log.info("Shard split request state is COMPLETED");
stop.set(true);
monkeyThread.join();
Set<String> addFails = new HashSet<>();
Set<String> deleteFails = new HashSet<>();
for (StoppableIndexingThread indexer : indexers) {
addFails.addAll(indexer.getAddFails());
deleteFails.addAll(indexer.getDeleteFails());
}
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
log.info("Starting shard1 leader jetty at port {}", cjetty.jetty.getLocalPort());
ChaosMonkey.start(cjetty.jetty);
cloudClient.getZkStateReader().forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
log.info("Current collection state: {}", printClusterStateInfo(AbstractDistribZkTestBase.DEFAULT_COLLECTION));
boolean replicaCreationsFailed = false;
if (splitStatus == RequestStatusState.FAILED) {
// either one or more replica creation failed (because it may have been created on the same parent shard leader node)
// or the split may have failed while trying to soft-commit *after* all replicas have been created
// the latter counts as a successful switch even if the API doesn't say so
// so we must find a way to distinguish between the two
// an easy way to do that is to look at the sub-shard replicas and check if the replica core actually exists
// instead of existing solely inside the cluster state
DocCollection collectionState = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null) {
for (Replica replica : slice10) {
if (!doesReplicaCoreExist(replica)) {
replicaCreationsFailed = true;
break;
}
}
for (Replica replica : slice11) {
if (!doesReplicaCoreExist(replica)) {
replicaCreationsFailed = true;
break;
}
}
}
}
// true if sub-shard states switch to 'active' eventually
AtomicBoolean areSubShardsActive = new AtomicBoolean(false);
if (!replicaCreationsFailed) {
// all sub-shard replicas were created successfully so all cores must recover eventually
waitForRecoveriesToFinish(AbstractDistribZkTestBase.DEFAULT_COLLECTION, true);
// let's wait for the overseer to switch shard states
CountDownLatch latch = new CountDownLatch(1);
cloudClient.getZkStateReader().registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
Slice parent = collectionState.getSlice(SHARD1);
Slice slice10 = collectionState.getSlice(SHARD1_0);
Slice slice11 = collectionState.getSlice(SHARD1_1);
if (slice10 != null && slice11 != null &&
parent.getState() == Slice.State.INACTIVE &&
slice10.getState() == Slice.State.ACTIVE &&
slice11.getState() == Slice.State.ACTIVE) {
areSubShardsActive.set(true);
latch.countDown();
return true; // removes the watch
} else if (slice10 != null && slice11 != null &&
parent.getState() == Slice.State.ACTIVE &&
slice10.getState() == Slice.State.RECOVERY_FAILED &&
slice11.getState() == Slice.State.RECOVERY_FAILED) {
areSubShardsActive.set(false);
latch.countDown();
return true;
}
return false;
}
});
latch.await(2, TimeUnit.MINUTES);
if (latch.getCount() != 0) {
// sanity check
fail("We think that split was successful but sub-shard states were not updated even after 2 minutes.");
}
}
cloudClient.commit(); // for visibility of results on sub-shards
checkShardConsistency(true, true, addFails, deleteFails);
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
// ensure we have added more than 0 docs
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
assertEquals("Found " + ctrlDocs + " control docs and " + cloudClientDocs + " cloud docs", ctrlDocs, cloudClientDocs);
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
// handle new shards/replica so well.
if (areSubShardsActive.get()) {
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
DocCollection collection = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_1));
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_1", 2, numReplicasChecked);
}
} finally {
stop.set(true);
monkeyThread.join();
}
}
private boolean doesReplicaCoreExist(Replica replica) throws IOException {
try (HttpSolrClient client = new HttpSolrClient.Builder(replica.getStr(BASE_URL_PROP))
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build()) {
String coreName = replica.getCoreName();
try {
CoreAdminResponse status = CoreAdminRequest.getStatus(coreName, client);
if (status.getCoreStatus(coreName) == null || status.getCoreStatus(coreName).size() == 0) {
return false;
}
} catch (Exception e) {
log.warn("Error gettting core status of replica " + replica + ". Perhaps it does not exist!", e);
return false;
}
}
return true;
}
@Test
public void testSplitShardWithRule() throws Exception {
waitForThingsToLevelOut(15);

View File

@ -17,7 +17,6 @@
package org.apache.solr.cloud;
import java.io.File;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.ArrayList;
@ -27,10 +26,8 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
@ -235,70 +232,6 @@ public class TestMiniSolrCloudCluster extends LuceneTestCase {
}
}
@Test
public void testErrorsInStartup() throws Exception {
AtomicInteger jettyIndex = new AtomicInteger();
MiniSolrCloudCluster cluster = null;
try {
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
@Override
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
if (jettyIndex.incrementAndGet() != 2)
return super.startJettySolrRunner(name, context, config);
throw new IOException("Fake exception on startup!");
}
};
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
}
catch (Exception e) {
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
}
finally {
if (cluster != null)
cluster.shutdown();
}
}
@Test
public void testErrorsInShutdown() throws Exception {
AtomicInteger jettyIndex = new AtomicInteger();
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
@Override
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
JettySolrRunner j = super.stopJettySolrRunner(jetty);
if (jettyIndex.incrementAndGet() == 2)
throw new IOException("Fake IOException on shutdown!");
return j;
}
};
try {
cluster.shutdown();
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
}
catch (Exception e) {
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
}
}
@Test
public void testExtraFilters() throws Exception {
Builder jettyConfig = JettyConfig.builder();
jettyConfig.waitForLoadingCoresToFinish(null);
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
cluster.shutdown();
}
@Test
public void testCollectionCreateWithoutCoresThenDelete() throws Exception {

View File

@ -1,207 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.File;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RequestStatusState;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.index.TieredMergePolicyFactory;
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
public class TestMiniSolrCloudClusterBase extends LuceneTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected int NUM_SERVERS = 5;
protected int NUM_SHARDS = 2;
protected int REPLICATION_FACTOR = 2;
public TestMiniSolrCloudClusterBase () {
NUM_SERVERS = 5;
NUM_SHARDS = 2;
REPLICATION_FACTOR = 2;
}
@Rule
public TestRule solrTestRules = RuleChain
.outerRule(new SystemPropertiesRestoreRule());
@ClassRule
public static TestRule solrClassRules = RuleChain.outerRule(
new SystemPropertiesRestoreRule()).around(
new RevertDefaultThreadHandlerRule());
@Test
public void testBasics() throws Exception {
final String collectionName = "testSolrCloudCollection";
testCollectionCreateSearchDelete(collectionName);
}
private MiniSolrCloudCluster createMiniSolrCloudCluster() throws Exception {
JettyConfig.Builder jettyConfig = JettyConfig.builder();
jettyConfig.waitForLoadingCoresToFinish(null);
return new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
}
private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId) throws Exception {
String configName = "solrCloudCollectionConfig";
File configDir = new File(SolrTestCaseJ4.TEST_HOME() + File.separator + "collection1" + File.separator + "conf");
miniCluster.uploadConfigDir(configDir, configName);
Map<String, String> collectionProperties = new HashMap<>();
collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
collectionProperties.put("solr.tests.maxBufferedDocs", "100000");
collectionProperties.put("solr.tests.ramBufferSizeMB", "100");
// use non-test classes so RandomizedRunner isn't necessary
if (random().nextBoolean()) {
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
} else {
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
}
collectionProperties.put("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
collectionProperties.put("solr.directoryFactory", "solr.RAMDirectoryFactory");
miniCluster.createCollection(collectionName, NUM_SHARDS, REPLICATION_FACTOR, configName, createNodeSet, asyncId, collectionProperties);
}
protected void testCollectionCreateSearchDelete(String collectionName) throws Exception {
MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
try {
assertNotNull(miniCluster.getZkServer());
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
assertEquals(NUM_SERVERS, jettys.size());
for (JettySolrRunner jetty : jettys) {
assertTrue(jetty.isRunning());
}
// shut down a server
JettySolrRunner stoppedServer = miniCluster.stopJettySolrRunner(0);
assertTrue(stoppedServer.isStopped());
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
// create a server
JettySolrRunner startedServer = miniCluster.startJettySolrRunner();
assertTrue(startedServer.isRunning());
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
// create collection
final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
createCollection(miniCluster, collectionName, null, asyncId);
if (asyncId != null) {
final RequestStatusState state = AbstractFullDistribZkTestBase.getRequestStateAfterCompletion(asyncId, 330,
cloudSolrClient);
assertSame("did not see async createCollection completion", RequestStatusState.COMPLETED, state);
}
try (SolrZkClient zkClient = new SolrZkClient
(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
zkStateReader.createClusterStateWatchersAndUpdate();
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
// modify/query collection
cloudSolrClient.setDefaultCollection(collectionName);
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", "1");
cloudSolrClient.add(doc);
cloudSolrClient.commit();
SolrQuery query = new SolrQuery();
query.setQuery("*:*");
QueryResponse rsp = cloudSolrClient.query(query);
assertEquals(1, rsp.getResults().getNumFound());
// remove a server not hosting any replicas
zkStateReader.forceUpdateCollection(collectionName);
ClusterState clusterState = zkStateReader.getClusterState();
HashMap<String, JettySolrRunner> jettyMap = new HashMap<String, JettySolrRunner>();
for (JettySolrRunner jetty : miniCluster.getJettySolrRunners()) {
String key = jetty.getBaseUrl().toString().substring((jetty.getBaseUrl().getProtocol() + "://").length());
jettyMap.put(key, jetty);
}
Collection<Slice> slices = clusterState.getSlices(collectionName);
// track the servers not host repliacs
for (Slice slice : slices) {
jettyMap.remove(slice.getLeader().getNodeName().replace("_solr", "/solr"));
for (Replica replica : slice.getReplicas()) {
jettyMap.remove(replica.getNodeName().replace("_solr", "/solr"));
}
}
assertTrue("Expected to find a node without a replica", jettyMap.size() > 0);
JettySolrRunner jettyToStop = jettyMap.entrySet().iterator().next().getValue();
jettys = miniCluster.getJettySolrRunners();
for (int i = 0; i < jettys.size(); ++i) {
if (jettys.get(i).equals(jettyToStop)) {
miniCluster.stopJettySolrRunner(i);
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
}
}
// now restore the original state so that this function could be called multiple times
// re-create a server (to restore original NUM_SERVERS count)
startedServer = miniCluster.startJettySolrRunner();
assertTrue(startedServer.isRunning());
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
doExtraTests(miniCluster, zkClient, zkStateReader,cloudSolrClient, collectionName);
}
}
finally {
miniCluster.shutdown();
}
}
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader, CloudSolrClient cloudSolrClient,
String defaultCollName) throws Exception { /*do nothing*/ }
}

View File

@ -127,20 +127,6 @@ public class TestMiniSolrCloudClusterKerberos extends TestMiniSolrCloudCluster {
public void testCollectionCreateSearchDelete() throws Exception {
super.testCollectionCreateSearchDelete();
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
@Test
@Override
public void testErrorsInShutdown() throws Exception {
super.testErrorsInShutdown();
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
@Test
@Override
public void testErrorsInStartup() throws Exception {
super.testErrorsInStartup();
}
@Override
public void tearDown() throws Exception {

View File

@ -25,6 +25,7 @@ import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.lucene.util.Constants;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
@ -57,16 +58,18 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
private static SolrClient solrClient;
private static String getUsersFirstGroup() throws Exception {
org.apache.hadoop.security.Groups hGroups =
new org.apache.hadoop.security.Groups(new Configuration());
String group = "*"; // accept any group if a group can't be found
try {
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
if (g != null && g.size() > 0) {
group = g.get(0);
if (!Constants.WINDOWS) { // does not work on Windows!
org.apache.hadoop.security.Groups hGroups =
new org.apache.hadoop.security.Groups(new Configuration());
try {
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
if (g != null && g.size() > 0) {
group = g.get(0);
}
} catch (NullPointerException npe) {
// if user/group doesn't exist on test box
}
} catch (NullPointerException npe) {
// if user/group doesn't exist on test box
}
return group;
}
@ -92,6 +95,8 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
@BeforeClass
public static void startup() throws Exception {
assumeFalse("Hadoop does not work on Windows", Constants.WINDOWS);
System.setProperty("authenticationPlugin", HttpParamDelegationTokenPlugin.class.getName());
System.setProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED, "true");
@ -151,7 +156,9 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
miniCluster.shutdown();
}
miniCluster = null;
solrClient.close();
if (solrClient != null) {
solrClient.close();
}
solrClient = null;
System.clearProperty("authenticationPlugin");
System.clearProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED);

View File

@ -1,113 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.Properties;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.handler.IndexFetcher;
import org.apache.solr.util.AbstractSolrTestCase;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestRule;
import org.xml.sax.SAXException;
/**
*
*/
public class TestArbitraryIndexDir extends AbstractSolrTestCase {
@Rule
public TestRule testRules = new SystemPropertiesRestoreRule();
// TODO: fix this test to not require FSDirectory
@BeforeClass
public static void beforeClass() {
// this test wants to start solr, and then open a separate indexwriter of its own on the same dir.
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory");
}
@Override
public void setUp() throws Exception {
super.setUp();
initCore("solrconfig.xml", "schema12.xml");
}
@Test
public void testLoadNewIndexDir() throws IOException, ParserConfigurationException, SAXException {
//add a doc in original index dir
assertU(adoc("id", String.valueOf(1),
"name", "name"+String.valueOf(1)));
//create a new index dir and index.properties file
File idxprops = new File(h.getCore().getDataDir() + IndexFetcher.INDEX_PROPERTIES);
Properties p = new Properties();
File newDir = new File(h.getCore().getDataDir() + "index_temp");
newDir.mkdirs();
p.put("index", newDir.getName());
Writer os = null;
try {
os = new OutputStreamWriter(new FileOutputStream(idxprops), StandardCharsets.UTF_8);
p.store(os, "index properties");
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
"Unable to write " + IndexFetcher.INDEX_PROPERTIES, e);
} finally {
IOUtils.closeWhileHandlingException(os);
}
//add a doc in the new index dir
Directory dir = newFSDirectory(newDir.toPath());
IndexWriter iw = new IndexWriter(
dir,
new IndexWriterConfig(new StandardAnalyzer())
);
Document doc = new Document();
doc.add(new TextField("id", "2", Field.Store.YES));
doc.add(new TextField("name", "name2", Field.Store.YES));
iw.addDocument(doc);
iw.commit();
iw.close();
//commit will cause searcher to open with the new index dir
assertU(commit());h.getCoreContainer().reload(h.getCore().getName());
//new index dir contains just 1 doc.
assertQ("return doc with id 2",
req("id:2"),
"*[count(//doc)=1]"
);
dir.close();
}
}

View File

@ -278,11 +278,11 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.LowerCaseFilter");
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.StopFilter");
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
@ -311,7 +311,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.LowerCaseFilter");
tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 6 tokens", 6, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@ -320,7 +320,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
tokenList = valueResult.get("org.apache.lucene.analysis.StopFilter");
tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));

View File

@ -209,7 +209,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 10);
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@ -222,7 +222,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.StopFilter");
tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
assertEquals(tokenList.size(), 8);
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
@ -258,12 +258,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.LowerCaseFilter");
tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
tokenList = queryPart.get("org.apache.lucene.analysis.StopFilter");
tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
assertEquals(2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
@ -416,7 +416,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
assertEquals(6, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));

View File

@ -0,0 +1,236 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import static org.hamcrest.CoreMatchers.is;
import java.io.IOException;
import java.util.List;
import java.util.Random;
import org.apache.solr.BaseDistributedSearchTestCase;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.junit.Before;
public class DistributedFacetExistsSmallTest extends BaseDistributedSearchTestCase {
public static final String FLD = "t_s";
private int maxId;
public DistributedFacetExistsSmallTest() {
}
@Before
public void prepareIndex() throws Exception {
del("*:*");
final Random rnd = random();
index(id, maxId=rnd.nextInt(5), FLD, "AAA");
index(id, maxId+=1+rnd.nextInt(5), FLD, "B");
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
final SolrClient shard0 = clients.get(0);
// expectidly fails test
//shard0.add(sdoc("id", 13, FLD, "DDD"));
commit();
handle.clear();
handle.put("QTime", SKIPVAL);
handle.put("timestamp", SKIPVAL);
handle.put("maxScore", SKIPVAL);
handle.put("_version_", SKIPVAL);
}
@ShardsFixed(num=4)
public void test() throws Exception{
checkBasicRequest();
checkWithMinCountEqOne();
checkWithSortCount();
checkWithMethodSetPerField();
{
// empty enum for checking npe
final ModifiableSolrParams params = buildParams();
params.remove("facet.exists");
QueryResponse rsp = query(params);
}
checkRandomParams();
checkInvalidMincount();
}
private void checkRandomParams() throws Exception {
final ModifiableSolrParams params = buildParams();
Random rand = random();
if (rand.nextBoolean()) {
int from;
params.set("q", "["+(from = rand.nextInt(maxId/2))+
" TO "+((from-1)+(rand.nextInt(maxId)))+"]");
}
int offset = 0;
int indexSize = 6;
if (rand .nextInt(100) < 20) {
if (rand.nextBoolean()) {
offset = rand.nextInt(100) < 10 ? rand.nextInt(indexSize *2) : rand.nextInt(indexSize/3+1);
}
params.add("facet.offset", Integer.toString(offset));
}
int limit = 100;
if (rand.nextInt(100) < 20) {
if (rand.nextBoolean()) {
limit = rand.nextInt(100) < 10 ? rand.nextInt(indexSize/2+1) : rand.nextInt(indexSize*2);
}
params.add("facet.limit", Integer.toString(limit));
}
if (rand.nextBoolean()) {
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
}
if ( rand.nextInt(100) < 20) {
final String[] prefixes = new String[] {"A","B","C"};
params.add("facet.prefix", prefixes[rand.nextInt(prefixes.length)]);
}
if (rand.nextInt(100) < 20) {
params.add("facet.missing", "true");
}
if (rand.nextInt(100) < 20) { // assigning only valid vals
params.add("facet.mincount", rand.nextBoolean() ? "0": "1" );
}
final boolean shardRespondsWithMissingEvenLimitIsZero =
params.getBool("facet.missing", false) && params.getInt("facet.limit", 100)==0;
// skip miss count check, here cloud is different to non-distrib
if (shardRespondsWithMissingEvenLimitIsZero ) {
handle.put(null, SKIP);
}
query(params);
if (shardRespondsWithMissingEvenLimitIsZero ) {
handle.remove(null);
}
}
private void checkInvalidMincount() throws SolrServerException, IOException {
final ModifiableSolrParams params = buildParams();
if (random().nextBoolean()) {
params.remove("facet.exists");
params.set("f."+FLD+".facet.exists","true");
}
if (random().nextBoolean()) {
params.set("facet.mincount", ""+(2+random().nextInt(100)) );
} else {
params.set("f."+FLD+".facet.mincount", ""+(2+random().nextInt(100)) );
}
try {
if (random().nextBoolean()) {
setDistributedParams(params);
queryServer(params);
} else {
params.set("distrib", "false");
controlClient.query(params);
}
fail();
} catch(SolrException e) { // check that distr and single index search fail the same
assertEquals(e.code(), ErrorCode.BAD_REQUEST.code);
assertTrue(e.getMessage().contains("facet.exists"));
assertTrue(e.getMessage().contains("facet.mincount"));
assertTrue(e.getMessage().contains(FLD));
}
}
private void checkBasicRequest() throws Exception {
final ModifiableSolrParams params = buildParams();
QueryResponse rsp = query(params);
assertResponse(rsp);
}
private void checkWithMinCountEqOne() throws Exception {
final ModifiableSolrParams params = buildParams("facet.mincount","1");
QueryResponse rsp = query(params);
assertResponse(rsp);
}
private void checkWithSortCount() throws Exception {
final ModifiableSolrParams params = buildParams("facet.sort","count");
QueryResponse rsp = query(params);
assertResponse(rsp);
}
private void checkWithMethodSetPerField() throws Exception {
final ModifiableSolrParams params = buildParams("f." + FLD + ".facet.exists", "true");
params.remove("facet.exists");
QueryResponse rsp = query(params);
assertResponse(rsp);
}
private ModifiableSolrParams buildParams(String... additionalParams) {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add("q", "*:*");
params.add("rows", "0");
//params.add("debugQuery", "true");
params.add("facet", "true");
params.add("sort", "id asc");
if(random().nextBoolean()){
params.add("facet.method", "enum");
}
params.add("facet.exists", "true");
params.add("facet.field", FLD);
for(int i = 0; i < additionalParams.length;) {
params.add(additionalParams[i++], additionalParams[i++]);
}
return params;
}
private void assertResponse(QueryResponse rsp) {
final FacetField facetField = rsp.getFacetField(FLD);
assertThat(facetField.getValueCount(), is(6));
final List<FacetField.Count> counts = facetField.getValues();
for (FacetField.Count count : counts) {
assertThat("Count for: " + count.getName(), count.getCount(), is(1L));
}
assertThat(counts.get(0).getName(), is("AAA"));
assertThat(counts.get(1).getName(), is("B"));
assertThat(counts.get(2).getName(), is("BB"));
}
}

View File

@ -38,7 +38,6 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.util.TimeZoneUtils;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
@ -494,11 +493,9 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
ModifiableSolrParams params = params("q","*:*", "rows","0", "facet","true", "facet.field","{!key=myalias}"+field);
String[] methods = {null, "fc","enum","fcs", "uif"
};
String[] methods = {null, "fc","enum","fcs", "uif"};
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
methods = new String[]{null, "fc","enum", "uif"
};
methods = new String[]{null, "fc","enum", "uif"};
}
prefixes = prefixes==null ? new String[]{null} : prefixes;
@ -2017,6 +2014,49 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3");
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100");
doFacetPrefix("t_s", null, "", "facet.method", "fc");
doFacetExistsPrefix("t_s", null, "");
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "3");
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "100");
}
@Test
public void testFacetExistsShouldThrowExceptionForMincountGreaterThanOne () throws Exception {
final String f = "t_s";
final List<String> msg = Arrays.asList("facet.mincount", "facet.exists", f);
Collections.shuffle(msg, random());
assertQEx("checking global method or per field", msg.get(0),
req("q", "id:[* TO *]"
,"indent","on"
,"facet","true"
, random().nextBoolean() ? "facet.exists": "f."+f+".facet.exists", "true"
,"facet.field", f
, random().nextBoolean() ? "facet.mincount" : "f."+f+".facet.mincount" ,
"" + (2+random().nextInt(Integer.MAX_VALUE-2))
)
, ErrorCode.BAD_REQUEST);
assertQ("overriding per field",
req("q", "id:[* TO *]"
,"indent","on"
,"facet","true"
,"facet.exists", "true"
,"f."+f+".facet.exists", "false"
,"facet.field", f
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
),
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
assertQ("overriding per field",
req("q", "id:[* TO *]"
,"indent","on"
,"facet","true"
,"facet.exists", "true"
,"facet.field", f
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
,"f."+f+".facet.mincount", random().nextBoolean() ? "0":"1"
),
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
}
static void indexFacetPrefixSingleValued() {
@ -2037,7 +2077,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
}
@Test
@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
//@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
public void testFacetContainsUif() {
doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "uif");
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "uif", "facet.contains", "Ast");
@ -2063,6 +2103,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true");
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true");
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true");
doFacetExistsPrefix("contains_s1", null, "Astra", "facet.contains", "Ast");
}
static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) {
@ -2313,6 +2354,239 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
);
}
public void doFacetExistsPrefix(String f, String local, String termSuffix, String... params) {
String indent="on";
String pre = "//lst[@name='"+f+"']";
String lf = local==null ? f : local+f;
assertQ("test field facet.method",
req(params, "q", "id:[* TO *]"
,"indent", indent
,"facet", "true"
,"f."+lf+".facet.exists", "true"
,"facet.field", lf
,"facet.mincount", "0"
,"facet.offset", "0"
,"facet.limit", "100"
,"facet.sort", "count"
,"facet.prefix", "B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, exact match first term",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, exact match first term, unsorted",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","index"
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, paging",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","1"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='BBB"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, paging",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","1"
,"facet.limit","1"
,"facet.sort","count"
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix end, not exact match",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","C"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix end, exact match",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","CC"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix past end",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","X"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
);
assertQ("test facet.prefix past end",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","1"
,"facet.limit","-1"
,"facet.sort","count"
,"facet.prefix","X"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
);
assertQ("test facet.prefix at start, exact match",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","AAA"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix at Start, not exact match",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","AA"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix before start",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","0"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","999"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
);
assertQ("test facet.prefix before start",
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","0"
,"facet.offset","2"
,"facet.limit","100"
,"facet.sort","count"
,"facet.prefix","999"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
);
// test offset beyond what is collected internally in queue
assertQ(
req(params, "q", "id:[* TO *]"
,"indent",indent
,"facet","true"
,"facet.exists", "true"
,"facet.field", lf
,"facet.mincount","1"
,"facet.offset","5"
,"facet.limit","10"
,"facet.sort","count"
,"facet.prefix","CC"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
);
}
public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) {
String indent="on";
String pre = "//lst[@name='"+f+"']";

View File

@ -263,6 +263,23 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
}
@Test // https://issues.apache.org/jira/browse/SOLR-9494
public void testNeedsScoreBugFixed() throws Exception {
String[] doc = {"id","1", "group_s", "xyz", "text_ws", "hello xxx world"};
assertU(adoc(doc));
assertU(commit());
ModifiableSolrParams params = params(
"q", "{!surround df=text_ws} 2W(hello, world)", // a SpanQuery that matches
"fq", "{!collapse field=group_s}", // collapse on some field
// note: rows= whatever; doesn't matter
"facet", "true", // facet on something
"facet.field", "group_s"
);
assertQ(req(params));
assertQ(req(params)); // fails *second* time!
}
@Test
public void testMergeBoost() throws Exception {

View File

@ -40,20 +40,16 @@ import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.GenericSolrRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.cloud.MiniSolrCloudCluster;
import org.apache.solr.cloud.TestMiniSolrCloudClusterBase;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.ContentStreamBase;
@ -61,50 +57,50 @@ import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.util.SolrCLI;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Collections.singletonMap;
import static org.apache.solr.SolrTestCaseJ4.getHttpSolrClient;
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
public class BasicAuthIntegrationTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader,
CloudSolrClient cloudSolrClient, String defaultCollName) throws Exception {
private static final String COLLECTION = "authCollection";
@BeforeClass
public static void setupCluster() throws Exception {
configureCluster(3)
.addConfig("conf", configset("cloud-minimal"))
.configure();
CollectionAdminRequest.createCollection(COLLECTION, "conf", 3, 1).process(cluster.getSolrClient());
}
@Test
public void testBasicAuth() throws Exception {
String authcPrefix = "/admin/authentication";
String authzPrefix = "/admin/authorization";
String old = cloudSolrClient.getDefaultCollection();
cloudSolrClient.setDefaultCollection(null);
NamedList<Object> rsp;
HttpClient cl = null;
try {
cl = HttpClientUtil.createClient(null);
String baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
JettySolrRunner randomJetty = cluster.getRandomJetty(random());
String baseUrl = randomJetty.getBaseUrl().toString();
verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20);
zkClient.setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
zkClient().setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
boolean found = false;
for (JettySolrRunner jettySolrRunner : miniCluster.getJettySolrRunners()) {
if(baseUrl.contains(String.valueOf(jettySolrRunner.getLocalPort()))){
found = true;
jettySolrRunner.stop();
jettySolrRunner.start();
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
break;
}
}
assertTrue("No server found to restart , looking for : "+baseUrl , found);
randomJetty.stop();
randomJetty.start(false);
baseUrl = randomJetty.getBaseUrl().toString();
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
String command = "{\n" +
"'set-user': {'harry':'HarryIsCool'}\n" +
@ -112,11 +108,12 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams());
genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), "")));
try {
cloudSolrClient.request(genericReq);
fail("Should have failed with a 401");
} catch (HttpSolrClient.RemoteSolrException e) {
}
HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class, () -> {
cluster.getSolrClient().request(genericReq);
});
assertEquals(401, exp.code());
command = "{\n" +
"'set-user': {'harry':'HarryIsUberCool'}\n" +
"}";
@ -130,7 +127,8 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
int statusCode = r.getStatusLine().getStatusCode();
Utils.consumeFully(r.getEntity());
assertEquals("proper_cred sent, but access denied", 200, statusCode);
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20);
command = "{\n" +
@ -139,7 +137,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks");
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20);
executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap
@ -153,7 +151,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" );
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20);
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(defaultCollName);
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(COLLECTION);
try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) {
try {
@ -170,18 +168,17 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
}
}
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
.setBasicAuthCredentials("harry", "HarryIsUberCool"));
try {
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
.setBasicAuthCredentials("harry", "Cool12345"));
fail("This should not succeed");
} catch (HttpSolrClient.RemoteSolrException e) {
}
cloudSolrClient.setDefaultCollection(old);
executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool");
SolrInputDocument doc = new SolrInputDocument();
@ -190,7 +187,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
update.setBasicAuthCredentials("harry","HarryIsUberCool");
update.add(doc);
update.setCommitWithin(100);
cloudSolrClient.request(update);
cluster.getSolrClient().request(update, COLLECTION);
executeCommand(baseUrl + authcPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool");

View File

@ -116,23 +116,7 @@
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
otherwise they will be ignored. If you don't plan on using hdfs,
you can safely remove this section. -->
<!-- The root directory that collection data should be written to. -->
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<!-- The hadoop configuration files to use for the hdfs client. -->
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- The CodecFactory for defining the format of the inverted index.
The default implementation is SchemaCodecFactory, which is the official Lucene

View File

@ -119,23 +119,7 @@
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
otherwise they will be ignored. If you don't plan on using hdfs,
you can safely remove this section. -->
<!-- The root directory that collection data should be written to. -->
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<!-- The hadoop configuration files to use for the hdfs client. -->
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- The CodecFactory for defining the format of the inverted index.
The default implementation is SchemaCodecFactory, which is the official Lucene

View File

@ -116,23 +116,7 @@
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
otherwise they will be ignored. If you don't plan on using hdfs,
you can safely remove this section. -->
<!-- The root directory that collection data should be written to. -->
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<!-- The hadoop configuration files to use for the hdfs client. -->
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- The CodecFactory for defining the format of the inverted index.
The default implementation is SchemaCodecFactory, which is the official Lucene

View File

@ -116,23 +116,7 @@
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
otherwise they will be ignored. If you don't plan on using hdfs,
you can safely remove this section. -->
<!-- The root directory that collection data should be written to. -->
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<!-- The hadoop configuration files to use for the hdfs client. -->
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- The CodecFactory for defining the format of the inverted index.
The default implementation is SchemaCodecFactory, which is the official Lucene

View File

@ -117,23 +117,7 @@
persistent, and doesn't work with replication.
-->
<directoryFactory name="DirectoryFactory"
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
otherwise they will be ignored. If you don't plan on using hdfs,
you can safely remove this section. -->
<!-- The root directory that collection data should be written to. -->
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
<!-- The hadoop configuration files to use for the hdfs client. -->
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
<!-- Enable/Disable the hdfs cache. -->
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
<!-- Enable/Disable using one global cache for all SolrCores.
The settings used will be from the first HdfsDirectoryFactory created. -->
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
</directoryFactory>
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
<!-- The CodecFactory for defining the format of the inverted index.
The default implementation is SchemaCodecFactory, which is the official Lucene

View File

@ -81,7 +81,16 @@ public class Slice extends ZkNodeProps implements Iterable<Replica> {
* shard in that state still receives update requests from the parent shard
* leader, however does not participate in distributed search.
*/
RECOVERY;
RECOVERY,
/**
* Sub-shards of a split shard are put in that state when the split is deemed failed
* by the overseer even though all replicas are active because either the leader node is
* no longer live or has a different ephemeral owner (zk session id). Such conditions can potentially
* lead to data loss. See SOLR-9438 for details. A shard in that state will neither receive
* update requests from the parent shard leader, nor participate in distributed search.
*/
RECOVERY_FAILED;
@Override
public String toString() {

View File

@ -32,6 +32,7 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
@ -263,7 +264,14 @@ public class SolrZkClient implements Closeable {
@Override
public void process(final WatchedEvent event) {
log.debug("Submitting job to respond to event " + event);
zkCallbackExecutor.submit(() -> watcher.process(event));
try {
zkCallbackExecutor.submit(() -> watcher.process(event));
} catch (RejectedExecutionException e) {
// If not a graceful shutdown
if (!isClosed()) {
throw e;
}
}
}
};
}

View File

@ -185,6 +185,14 @@ public interface FacetParams {
* only use the filterCache for terms with a df &gt;= to this parameter.
*/
public static final String FACET_ENUM_CACHE_MINDF = FACET + ".enum.cache.minDf";
/**
* A boolean parameter that caps the facet counts at 1.
* With this set, a returned count will only be 0 or 1.
* For apps that don't need the count, this should be an optimization
*/
public static final String FACET_EXISTS = FACET+".exists";
/**
* Any field whose terms the user wants to enumerate over for
* Facet Contraint Counts (multi-value)

View File

@ -182,12 +182,15 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
// test a second query, test making a copy of the main query
SolrQuery query2 = query.getCopy();
query2.addFilterQuery("inStock:true");
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
response = client.query( query2 );
Assert.assertEquals(1, query2.getFilterQueries().length);
Assert.assertEquals(0, response.getStatus());
Assert.assertEquals(2, response.getResults().getNumFound() );
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
for (SolrDocument outDoc : response.getResults()) {
assertEquals(true, outDoc.getFieldValue("inStock"));
}
// sanity check round tripping of params...
query = new SolrQuery("foo");
query.addFilterQuery("{!field f=inStock}true");

View File

@ -175,33 +175,36 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
List<Object> matchObj = generateAllDataTypes();
assertEquals(unmarshaledObj.size(), matchObj.size());
for(int i=0; i < unmarshaledObj.size(); i++) {
if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
byte[] b1 = (byte[]) unmarshaledObj.get(i);
byte[] b2 = (byte[]) matchObj.get(i);
assertTrue(Arrays.equals(b1, b2));
} else if(unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument ) {
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
} else if(unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList ) {
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
} else if(unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
} else if(unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
} else {
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
}
}
compareObjects(unmarshaledObj, matchObj);
} catch (IOException e) {
throw e;
}
}
private void compareObjects(List unmarshaledObj, List matchObj) {
assertEquals(unmarshaledObj.size(), matchObj.size());
for (int i = 0; i < unmarshaledObj.size(); i++) {
if (unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
byte[] b1 = (byte[]) unmarshaledObj.get(i);
byte[] b2 = (byte[]) matchObj.get(i);
assertTrue(Arrays.equals(b1, b2));
} else if (unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument) {
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
} else if (unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList) {
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
} else if (unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
} else if (unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
} else {
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
}
}
}
@Test
public void testBackCompatForSolrDocumentWithChildDocs() throws IOException {
JavaBinCodec javabin = new JavaBinCodec(){
@ -267,14 +270,33 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
}
@Test
public void testResponseChildDocuments() throws IOException {
public void testAllTypes() throws IOException {
List<Object> obj = generateAllDataTypes();
compareObjects(
(List) getObject(getBytes(obj)),
(List) obj
);
}
private static Object serializeAndDeserialize(Object o) throws IOException {
return getObject(getBytes(o));
}
private static byte[] getBytes(Object o) throws IOException {
JavaBinCodec javabin = new JavaBinCodec();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
javabin.marshal(generateSolrDocumentWithChildDocs(), baos);
javabin.marshal(o, baos);
return baos.toByteArray();
}
SolrDocument result = (SolrDocument) javabin.unmarshal(new ByteArrayInputStream(baos.toByteArray()));
private static Object getObject(byte[] bytes) throws IOException {
return new JavaBinCodec().unmarshal(new ByteArrayInputStream(bytes));
}
@Test
public void testResponseChildDocuments() throws IOException {
SolrDocument result = (SolrDocument) serializeAndDeserialize(generateSolrDocumentWithChildDocs());
assertEquals(2, result.size());
assertEquals("1", result.getFieldValue("id"));
assertEquals("parentDocument", result.getFieldValue("subject"));
@ -305,13 +327,11 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
@Test
public void testStringCaching() throws Exception {
Map<String, Object> m = Utils.makeMap("key1", "val1", "key2", "val2");
byte[] b1 = getBytes(m);//copy 1
byte[] b2 = getBytes(m);//copy 2
Map m1 = (Map) getObject(b1);
Map m2 = (Map) getObject(b1);
ByteArrayOutputStream os1 = new ByteArrayOutputStream();
new JavaBinCodec().marshal(m, os1);
Map m1 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os1.toByteArray()));
ByteArrayOutputStream os2 = new ByteArrayOutputStream();
new JavaBinCodec().marshal(m, os2);
Map m2 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os2.toByteArray()));
List l1 = new ArrayList<>(m1.keySet());
List l2 = new ArrayList<>(m2.keySet());
@ -346,8 +366,8 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
});
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os1.toByteArray()));
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os2.toByteArray()));
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b1));
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b2));
l1 = new ArrayList<>(m1.keySet());
l2 = new ArrayList<>(m2.keySet());
assertTrue(l1.get(0).equals(l2.get(0)));
@ -359,26 +379,19 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
}
public void genBinaryFiles() throws IOException {
JavaBinCodec javabin = new JavaBinCodec();
ByteArrayOutputStream os = new ByteArrayOutputStream();
Object data = generateAllDataTypes();
javabin.marshal(data, os);
byte[] out = os.toByteArray();
byte[] out = getBytes(data);
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
BufferedOutputStream bos = new BufferedOutputStream(fs);
bos.write(out);
bos.close();
//Binary file with child documents
javabin = new JavaBinCodec();
SolrDocument sdoc = generateSolrDocumentWithChildDocs();
os = new ByteArrayOutputStream();
javabin.marshal(sdoc, os);
fs = new FileOutputStream(new File(BIN_FILE_LOCATION_CHILD_DOCS));
bos = new BufferedOutputStream(fs);
bos.write(os.toByteArray());
bos.write(getBytes(sdoc));
bos.close();
}
@ -553,12 +566,7 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
sdoc.put("some_boolean", ""+r.nextBoolean());
sdoc.put("another_boolean", ""+r.nextBoolean());
JavaBinCodec javabin = new JavaBinCodec();
ByteArrayOutputStream os = new ByteArrayOutputStream();
javabin.marshal(sdoc, os);
os.toByteArray();
buffers[bufnum] = os.toByteArray();
buffers[bufnum] = getBytes(sdoc);
}
int ret = 0;

View File

@ -51,9 +51,6 @@
<!-- redefine the clover setup, because we dont want to run clover for the test-framework -->
<target name="-clover.setup" if="run.clover"/>
<!-- redefine the test compilation, so it's just a no-op -->
<target name="compile-test"/>
<!-- redefine the forbidden apis for tests, as we check ourselves -->
<target name="-check-forbidden-tests" depends="-init-forbidden-apis,compile-core">

View File

@ -374,7 +374,7 @@ public class MiniSolrCloudCluster {
* @throws Exception on error
*/
public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception {
jetty.start();
jetty.start(false);
jettys.add(jetty);
return jetty;
}

View File

@ -27,6 +27,7 @@ import java.util.List;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.common.cloud.SolrZkClient;
import org.junit.AfterClass;
import org.junit.Before;
@ -143,6 +144,10 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
/** The cluster */
protected static MiniSolrCloudCluster cluster;
protected SolrZkClient zkClient() {
return cluster.getSolrClient().getZkStateReader().getZkClient();
}
/**
* Call this to configure a cluster of n nodes.
*

View File

@ -0,0 +1,11 @@
# Logging level
log4j.rootLogger=INFO, CONSOLE
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.Target=System.err
log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
log4j.logger.org.apache.zookeeper=WARN
log4j.logger.org.apache.hadoop=WARN
log4j.logger.org.apache.directory=WARN
log4j.logger.org.apache.solr.hadoop=INFO

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.junit.Test;
import static org.hamcrest.core.IsNot.not;
public class JettySolrRunnerTest extends SolrTestCaseJ4 {
@Test
public void testRestartPorts() throws Exception {
Path solrHome = createTempDir();
Files.write(solrHome.resolve("solr.xml"), MiniSolrCloudCluster.DEFAULT_CLOUD_SOLR_XML.getBytes(Charset.defaultCharset()));
JettyConfig config = JettyConfig.builder().build();
JettySolrRunner jetty = new JettySolrRunner(solrHome.toString(), config);
try {
jetty.start();
URL url = jetty.getBaseUrl();
int usedPort = url.getPort();
jetty.stop();
jetty.start();
assertEquals("After restart, jetty port should be the same", usedPort, jetty.getBaseUrl().getPort());
jetty.stop();
jetty.start(false);
assertThat("After restart, jetty port should be different", jetty.getBaseUrl().getPort(), not(usedPort));
}
finally {
if (jetty.isRunning())
jetty.stop();
}
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.solr.client.solrj.embedded.JettyConfig;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
public class MiniSolrCloudClusterTest extends LuceneTestCase {
@ClassRule
public static TestRule solrClassRules = RuleChain.outerRule(
new SystemPropertiesRestoreRule()).around(
new RevertDefaultThreadHandlerRule());
@Test
public void testErrorsInStartup() throws Exception {
AtomicInteger jettyIndex = new AtomicInteger();
MiniSolrCloudCluster cluster = null;
try {
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
@Override
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
if (jettyIndex.incrementAndGet() != 2)
return super.startJettySolrRunner(name, context, config);
throw new IOException("Fake exception on startup!");
}
};
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
}
catch (Exception e) {
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
}
finally {
if (cluster != null)
cluster.shutdown();
}
}
@Test
public void testErrorsInShutdown() throws Exception {
AtomicInteger jettyIndex = new AtomicInteger();
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
@Override
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
JettySolrRunner j = super.stopJettySolrRunner(jetty);
if (jettyIndex.incrementAndGet() == 2)
throw new IOException("Fake IOException on shutdown!");
return j;
}
};
try {
cluster.shutdown();
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
}
catch (Exception e) {
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
}
}
@Test
public void testExtraFilters() throws Exception {
JettyConfig.Builder jettyConfig = JettyConfig.builder();
jettyConfig.waitForLoadingCoresToFinish(null);
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(random().nextInt(3) + 1, createTempDir(), jettyConfig.build());
cluster.shutdown();
}
}