mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
652065e14e
|
@ -7,6 +7,8 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
|
|
|
@ -61,6 +61,11 @@
|
|||
<directory>${module-path}/src/resources</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${module-path}/src/test-files</directory>
|
||||
</testResource>
|
||||
</testResources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
|
|
|
@ -86,16 +86,24 @@ def check_url_list(lst):
|
|||
if mirror_contains_file(url):
|
||||
p('.')
|
||||
else:
|
||||
p('X')
|
||||
p('\nFAIL: ' + url + '\n' if args.details else 'X')
|
||||
ret.append(url)
|
||||
|
||||
return ret
|
||||
|
||||
parser = argparse.ArgumentParser(description='Checks that all Lucene mirrors contain a copy of a release')
|
||||
parser.add_argument('-version', '-v', help='Lucene version to check', required=True)
|
||||
parser.add_argument('-interval', '-i', help='seconds to wait to query again pending mirrors', type=int, default=300)
|
||||
desc = 'Periodically checks that all Lucene/Solr mirrors contain either a copy of a release or a specified path'
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('-version', '-v', help='Lucene/Solr version to check')
|
||||
parser.add_argument('-path', '-p', help='instead of a versioned release, check for some/explicit/path')
|
||||
parser.add_argument('-interval', '-i', help='seconds to wait before re-querying mirrors', type=int, default=300)
|
||||
parser.add_argument('-details', '-d', help='print missing mirror URLs', action='store_true', default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
if (args.version is None and args.path is None) \
|
||||
or (args.version is not None and args.path is not None):
|
||||
p('You must specify either -version or -path but not both!\n')
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
conn = http.HTTPConnection('www.apache.org')
|
||||
conn.request('GET', '/mirrors/')
|
||||
|
@ -105,9 +113,9 @@ except Exception as e:
|
|||
p('Unable to fetch the Apache mirrors list!\n')
|
||||
sys.exit(1)
|
||||
|
||||
apache_path = 'lucene/java/{}/changes/Changes.html'.format(args.version);
|
||||
maven_url = 'http://repo1.maven.org/maven2/' \
|
||||
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
|
||||
mirror_path = args.path if args.path is not None else 'lucene/java/{}/changes/Changes.html'.format(args.version)
|
||||
maven_url = None if args.version is None else 'http://repo1.maven.org/maven2/' \
|
||||
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
|
||||
maven_available = False
|
||||
|
||||
pending_mirrors = []
|
||||
|
@ -119,18 +127,19 @@ for match in re.finditer('<TR>(.*?)</TR>', str(html), re.MULTILINE | re.IGNORECA
|
|||
|
||||
match = re.search('<A\s+HREF\s*=\s*"([^"]+)"\s*>', row, re.MULTILINE | re.IGNORECASE)
|
||||
if match:
|
||||
pending_mirrors.append(match.group(1) + apache_path)
|
||||
pending_mirrors.append(match.group(1) + mirror_path)
|
||||
|
||||
total_mirrors = len(pending_mirrors)
|
||||
|
||||
label = args.version if args.version is not None else args.path
|
||||
while True:
|
||||
p('\n' + str(datetime.datetime.now()))
|
||||
p('\n{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
|
||||
p('\nPolling {} Apache Mirrors'.format(len(pending_mirrors)))
|
||||
if not maven_available:
|
||||
if maven_url is not None and not maven_available:
|
||||
p(' and Maven Central')
|
||||
p('...\n')
|
||||
|
||||
if not maven_available:
|
||||
if maven_url is not None and not maven_available:
|
||||
maven_available = mirror_contains_file(maven_url)
|
||||
|
||||
start = time.time()
|
||||
|
@ -140,14 +149,14 @@ while True:
|
|||
|
||||
available_mirrors = total_mirrors - len(pending_mirrors)
|
||||
|
||||
p('\n\n{} is{}downloadable from Maven Central\n'.format(args.version, maven_available and ' ' or ' not '))
|
||||
p('{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'.format(args.version, available_mirrors,
|
||||
total_mirrors,
|
||||
available_mirrors * 100 / total_mirrors))
|
||||
if maven_url is not None:
|
||||
p('\n\n{} is{}downloadable from Maven Central'.format(label, ' ' if maven_available else ' not '))
|
||||
p('\n{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'
|
||||
.format(label, available_mirrors, total_mirrors, available_mirrors * 100 / total_mirrors))
|
||||
if len(pending_mirrors) == 0:
|
||||
break
|
||||
|
||||
if remaining > 0:
|
||||
p('Sleeping for {} seconds...\n'.format(remaining))
|
||||
p('Sleeping for {:d} seconds...\n'.format(int(remaining + 0.5)))
|
||||
time.sleep(remaining)
|
||||
|
||||
|
|
|
@ -36,7 +36,36 @@ Other
|
|||
|
||||
======================= Lucene 6.3.0 =======================
|
||||
|
||||
(No changes)
|
||||
API Changes
|
||||
|
||||
* LUCENE-7436: MinHashFilter's constructor, and some of its default
|
||||
settings, should be public. (Doug Turnbull via Mike McCandless)
|
||||
|
||||
New Features
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
|
||||
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
|
||||
term. (Thomas Kappler via David Smiley)
|
||||
|
||||
* LUCENE-7440: Document id skipping (PostingsEnum.advance) could throw an
|
||||
ArrayIndexOutOfBoundsException exception on large index segments (>1.8B docs)
|
||||
with large skips. (yonik)
|
||||
|
||||
* LUCENE-7442: MinHashFilter's ctor should validate its args.
|
||||
(Cao Manh Dat via Steve Rowe)
|
||||
|
||||
* LUCENE-7318: Fix backwards compatibility issues around StandardAnalyzer
|
||||
and its components, introduced with Lucene 6.2.0. The moved classes
|
||||
were restored in their original packages: LowercaseFilter and StopFilter,
|
||||
as well as several utility classes. (Uwe Schindler, Mike McCandless)
|
||||
|
||||
Improvements
|
||||
|
||||
Optimizations
|
||||
|
||||
Other
|
||||
|
||||
======================= Lucene 6.2.0 =======================
|
||||
|
||||
|
@ -632,6 +661,9 @@ Other
|
|||
* LUCENE-7095: Add point values support to the numeric field query time join.
|
||||
(Martijn van Groningen, Mike McCandless)
|
||||
|
||||
======================= Lucene 5.5.3 =======================
|
||||
(No Changes)
|
||||
|
||||
======================= Lucene 5.5.2 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Normalizes token text to lower case.
|
||||
* <p>
|
||||
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
|
||||
* is preserved for documentation purposes and consistency with filter factory.
|
||||
* @see org.apache.lucene.analysis.LowerCaseFilter
|
||||
* @see LowerCaseFilterFactory
|
||||
*/
|
||||
public final class LowerCaseFilter extends org.apache.lucene.analysis.LowerCaseFilter {
|
||||
|
||||
/**
|
||||
* Create a new LowerCaseFilter, that normalizes token text to lower case.
|
||||
*
|
||||
* @param in TokenStream to filter
|
||||
*/
|
||||
public LowerCaseFilter(TokenStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
}
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* Removes stop words from a token stream.
|
||||
* <p>
|
||||
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
|
||||
* is preserved for documentation purposes and consistency with filter factory.
|
||||
* @see org.apache.lucene.analysis.StopFilter
|
||||
* @see StopFilterFactory
|
||||
*/
|
||||
public final class StopFilter extends org.apache.lucene.analysis.StopFilter {
|
||||
|
||||
/**
|
||||
* Constructs a filter which removes words from the input TokenStream that are
|
||||
* named in the Set.
|
||||
*
|
||||
* @param in
|
||||
* Input stream
|
||||
* @param stopWords
|
||||
* A {@link CharArraySet} representing the stopwords.
|
||||
* @see #makeStopSet(java.lang.String...)
|
||||
*/
|
||||
public StopFilter(TokenStream in, CharArraySet stopWords) {
|
||||
super(in, stopWords);
|
||||
}
|
||||
|
||||
}
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WordlistLoader; // jdocs
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
|
|
|
@ -49,11 +49,11 @@ public class MinHashFilter extends TokenFilter {
|
|||
|
||||
private static final LongPair[] cachedIntHashes = new LongPair[HASH_CACHE_SIZE];
|
||||
|
||||
static final int DEFAULT_HASH_COUNT = 1;
|
||||
public static final int DEFAULT_HASH_COUNT = 1;
|
||||
|
||||
static final int DEFAULT_HASH_SET_SIZE = 1;
|
||||
public static final int DEFAULT_HASH_SET_SIZE = 1;
|
||||
|
||||
static final int DEFAULT_BUCKET_COUNT = 512;
|
||||
public static final int DEFAULT_BUCKET_COUNT = 512;
|
||||
|
||||
static final String MIN_HASH_TYPE = "MIN_HASH";
|
||||
|
||||
|
@ -112,8 +112,17 @@ public class MinHashFilter extends TokenFilter {
|
|||
* @param hashSetSize the no. of min hashes to keep
|
||||
* @param withRotation whether rotate or not hashes while incrementing tokens
|
||||
*/
|
||||
MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
|
||||
public MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
|
||||
super(input);
|
||||
if (hashCount <= 0) {
|
||||
throw new IllegalArgumentException("hashCount must be greater than zero");
|
||||
}
|
||||
if (bucketCount <= 0) {
|
||||
throw new IllegalArgumentException("bucketCount must be greater than zero");
|
||||
}
|
||||
if (hashSetSize <= 0) {
|
||||
throw new IllegalArgumentException("hashSetSize must be greater than zero");
|
||||
}
|
||||
this.hashCount = hashCount;
|
||||
this.bucketCount = bucketCount;
|
||||
this.hashSetSize = hashSetSize;
|
||||
|
|
|
@ -46,5 +46,9 @@
|
|||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||
</li>
|
||||
</ul>
|
||||
<p>
|
||||
This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
|
||||
and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
|
||||
The factories for those components (e.g., used in Solr) are still part of this module.
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -106,7 +106,9 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
|
|||
SnowballFilter.class, // this is called SnowballPorterFilterFactory
|
||||
PatternKeywordMarkerFilter.class,
|
||||
SetKeywordMarkerFilter.class,
|
||||
UnicodeWhitespaceTokenizer.class // a supported option via WhitespaceTokenizerFactory
|
||||
UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
|
||||
org.apache.lucene.analysis.StopFilter.class, // class from core, but StopFilterFactory creates one from this module
|
||||
org.apache.lucene.analysis.LowerCaseFilter.class // class from core, but LowerCaseFilterFactory creates one from this module
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -166,7 +166,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// also randomly pick it:
|
||||
ValidatingTokenFilter.class,
|
||||
// TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or similar following will then cause pain)
|
||||
WordDelimiterFilter.class)) {
|
||||
WordDelimiterFilter.class,
|
||||
// clones of core's filters:
|
||||
org.apache.lucene.analysis.core.StopFilter.class,
|
||||
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
|
||||
for (Constructor<?> ctor : c.getConstructors()) {
|
||||
brokenConstructors.put(ctor, ALWAYS);
|
||||
}
|
||||
|
|
|
@ -357,7 +357,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
"5.5.1-cfs",
|
||||
"5.5.1-nocfs",
|
||||
"5.5.2-cfs",
|
||||
"5.5.2-nocfs"
|
||||
"5.5.2-nocfs",
|
||||
"5.5.3-cfs",
|
||||
"5.5.3-nocfs"
|
||||
};
|
||||
|
||||
// TODO: on 6.0.0 release, gen the single segment indices and add here:
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.CharacterUtils;
|
|||
/**
|
||||
* Normalizes token text to lower case.
|
||||
*/
|
||||
public final class LowerCaseFilter extends TokenFilter {
|
||||
public class LowerCaseFilter extends TokenFilter {
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/**
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
/**
|
||||
* Removes stop words from a token stream.
|
||||
*/
|
||||
public final class StopFilter extends FilteringTokenFilter {
|
||||
public class StopFilter extends FilteringTokenFilter {
|
||||
|
||||
private final CharArraySet stopWords;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -63,7 +63,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
/** skipInterval of each level. */
|
||||
private int skipInterval[];
|
||||
|
||||
/** Number of docs skipped per level. */
|
||||
/** Number of docs skipped per level.
|
||||
* It's possible for some values to overflow a signed int, but this has been accounted for.
|
||||
*/
|
||||
private int[] numSkipped;
|
||||
|
||||
/** Doc id of current skip entry per level. */
|
||||
|
@ -150,8 +152,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||
setLastSkipData(level);
|
||||
|
||||
numSkipped[level] += skipInterval[level];
|
||||
|
||||
if (numSkipped[level] > docCount) {
|
||||
|
||||
// numSkipped may overflow a signed int, so compare as unsigned.
|
||||
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
|
||||
// this skip list is exhausted
|
||||
skipDoc[level] = Integer.MAX_VALUE;
|
||||
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||
|
|
|
@ -429,12 +429,10 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
|
||||
SegmentCommitInfo info;
|
||||
float level;
|
||||
int index;
|
||||
|
||||
public SegmentInfoAndLevel(SegmentCommitInfo info, float level, int index) {
|
||||
public SegmentInfoAndLevel(SegmentCommitInfo info, float level) {
|
||||
this.info = info;
|
||||
this.level = level;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
// Sorts largest to smallest
|
||||
|
@ -475,7 +473,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||
size = 1;
|
||||
}
|
||||
|
||||
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
|
||||
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm);
|
||||
levels.add(infoLevel);
|
||||
|
||||
if (verbose(writer)) {
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
|||
* though you can explicitly choose classic Levenshtein by passing <code>false</code>
|
||||
* to the <code>transpositions</code> parameter.
|
||||
*
|
||||
* <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
|
||||
* <p>This query uses {@link MultiTermQuery.TopTermsBlendedFreqScoringRewrite}
|
||||
* as default. So terms will be collected and scored according to their
|
||||
* edit distance. Only the top terms are used for building the {@link BooleanQuery}.
|
||||
* It is not recommended to change the rewrite mode for fuzzy queries.
|
||||
|
|
|
@ -17,12 +17,7 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -35,10 +30,12 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/** Subclass of TermsEnum for enumerating all terms that are similar
|
||||
* to the specified filter term.
|
||||
*
|
||||
|
@ -46,38 +43,46 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
|||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
||||
* greater than all that precede it.</p>
|
||||
*/
|
||||
public class FuzzyTermsEnum extends TermsEnum {
|
||||
public final class FuzzyTermsEnum extends TermsEnum {
|
||||
|
||||
// NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum:
|
||||
private TermsEnum actualEnum;
|
||||
private BoostAttribute actualBoostAtt;
|
||||
|
||||
private final BoostAttribute boostAtt =
|
||||
attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
// We use this to communicate the score (boost) of the current matched term we are on back to
|
||||
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite that is collecting the best (default 50) matched terms:
|
||||
private final BoostAttribute boostAtt;
|
||||
|
||||
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite tells us the worst boost still in its queue using this att,
|
||||
// which we use to know when we can reduce the automaton from ed=2 to ed=1, or ed=0 if only single top term is collected:
|
||||
private final MaxNonCompetitiveBoostAttribute maxBoostAtt;
|
||||
|
||||
// We use this to share the pre-built (once for the query) Levenshtein automata across segments:
|
||||
private final LevenshteinAutomataAttribute dfaAtt;
|
||||
|
||||
private float bottom;
|
||||
private BytesRef bottomTerm;
|
||||
|
||||
protected final float minSimilarity;
|
||||
protected final float scale_factor;
|
||||
|
||||
protected final int termLength;
|
||||
|
||||
protected int maxEdits;
|
||||
protected final boolean raw;
|
||||
private final CompiledAutomaton automata[];
|
||||
|
||||
protected final Terms terms;
|
||||
private final Term term;
|
||||
protected final int termText[];
|
||||
protected final int realPrefixLength;
|
||||
|
||||
private final boolean transpositions;
|
||||
private BytesRef queuedBottom;
|
||||
|
||||
final int termLength;
|
||||
|
||||
// Maximum number of edits we will accept. This is either 2 or 1 (or, degenerately, 0) passed by the user originally,
|
||||
// but as we collect terms, we can lower this (e.g. from 2 to 1) if we detect that the term queue is full, and all
|
||||
// collected terms are ed=1:
|
||||
private int maxEdits;
|
||||
|
||||
final Terms terms;
|
||||
final Term term;
|
||||
final int termText[];
|
||||
final int realPrefixLength;
|
||||
|
||||
// True (the default, in FuzzyQuery) if a transposition should count as a single edit:
|
||||
final boolean transpositions;
|
||||
|
||||
/**
|
||||
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
||||
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
|
||||
* <code>minSimilarity</code>.
|
||||
* length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
|
||||
* <p>
|
||||
* After calling the constructor the enumeration is already pointing to the first
|
||||
* valid term if such a term exists.
|
||||
|
@ -87,105 +92,88 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
* thats contains information about competitive boosts during rewrite. It is also used
|
||||
* to cache DFAs between segment transitions.
|
||||
* @param term Pattern term.
|
||||
* @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
|
||||
* representing edit distance. Passing a fraction is deprecated.
|
||||
* @param maxEdits Maximum edit distance.
|
||||
* @param prefixLength Length of required common prefix. Default value is 0.
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
|
||||
final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
|
||||
if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
|
||||
throw new IllegalArgumentException("fractional edit distances are not allowed");
|
||||
if (minSimilarity < 0.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
|
||||
if(prefixLength < 0)
|
||||
final int maxEdits, final int prefixLength, boolean transpositions) throws IOException {
|
||||
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("max edits must be 0.." + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got: " + maxEdits);
|
||||
}
|
||||
if (prefixLength < 0) {
|
||||
throw new IllegalArgumentException("prefixLength cannot be less than 0");
|
||||
}
|
||||
this.maxEdits = maxEdits;
|
||||
this.terms = terms;
|
||||
this.term = term;
|
||||
|
||||
|
||||
// convert the string into a utf32 int[] representation for fast comparisons
|
||||
final String utf16 = term.text();
|
||||
this.termText = new int[utf16.codePointCount(0, utf16.length())];
|
||||
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
|
||||
termText[j++] = cp = utf16.codePointAt(i);
|
||||
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
|
||||
termText[j++] = cp = utf16.codePointAt(i);
|
||||
}
|
||||
this.termLength = termText.length;
|
||||
|
||||
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
|
||||
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||
|
||||
// NOTE: boostAtt must pulled from attributes() not from atts! This is because TopTermsRewrite looks for boostAtt from this TermsEnum's
|
||||
// private attributes() and not the global atts passed to us from MultiTermQuery:
|
||||
this.boostAtt = attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
//The prefix could be longer than the word.
|
||||
//It's kind of silly though. It means we must match the entire word.
|
||||
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
|
||||
// if minSimilarity >= 1, we treat it as number of edits
|
||||
if (minSimilarity >= 1f) {
|
||||
this.minSimilarity = 0; // just driven by number of edits
|
||||
maxEdits = (int) minSimilarity;
|
||||
raw = true;
|
||||
} else {
|
||||
this.minSimilarity = minSimilarity;
|
||||
// calculate the maximum k edits for this similarity
|
||||
maxEdits = initialMaxDistance(this.minSimilarity, termLength);
|
||||
raw = false;
|
||||
}
|
||||
if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new UnsupportedOperationException("with transpositions enabled, distances > "
|
||||
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
|
||||
}
|
||||
this.transpositions = transpositions;
|
||||
this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
|
||||
|
||||
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||
CompiledAutomaton[] prevAutomata = dfaAtt.automata();
|
||||
if (prevAutomata == null) {
|
||||
prevAutomata = new CompiledAutomaton[maxEdits+1];
|
||||
|
||||
LevenshteinAutomata builder =
|
||||
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
|
||||
|
||||
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
||||
for (int i = 0; i <= maxEdits; i++) {
|
||||
Automaton a = builder.toAutomaton(i, prefix);
|
||||
prevAutomata[i] = new CompiledAutomaton(a, true, false);
|
||||
}
|
||||
|
||||
// first segment computes the automata, and we share with subsequent segments via this Attribute:
|
||||
dfaAtt.setAutomata(prevAutomata);
|
||||
}
|
||||
|
||||
this.automata = prevAutomata;
|
||||
bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
||||
bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
||||
bottomChanged(null, true);
|
||||
bottomChanged(null);
|
||||
}
|
||||
|
||||
/**
|
||||
* return an automata-based enum for matching up to editDistance from
|
||||
* lastTerm, if possible
|
||||
*/
|
||||
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
|
||||
throws IOException {
|
||||
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
|
||||
if (editDistance < runAutomata.size()) {
|
||||
//System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
|
||||
final CompiledAutomaton compiled = runAutomata.get(editDistance);
|
||||
return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())),
|
||||
runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
|
||||
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException {
|
||||
assert editDistance < automata.length;
|
||||
final CompiledAutomaton compiled = automata[editDistance];
|
||||
BytesRef initialSeekTerm;
|
||||
if (lastTerm == null) {
|
||||
// This is the first enum we are pulling:
|
||||
initialSeekTerm = null;
|
||||
} else {
|
||||
return null;
|
||||
// We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2):
|
||||
initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder());
|
||||
}
|
||||
return terms.intersect(compiled, initialSeekTerm);
|
||||
}
|
||||
|
||||
/** initialize levenshtein DFAs up to maxDistance, if possible */
|
||||
private List<CompiledAutomaton> initAutomata(int maxDistance) {
|
||||
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
|
||||
//System.out.println("cached automata size: " + runAutomata.size());
|
||||
if (runAutomata.size() <= maxDistance &&
|
||||
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
LevenshteinAutomata builder =
|
||||
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
|
||||
|
||||
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
||||
for (int i = runAutomata.size(); i <= maxDistance; i++) {
|
||||
Automaton a = builder.toAutomaton(i, prefix);
|
||||
//System.out.println("compute automaton n=" + i);
|
||||
runAutomata.add(new CompiledAutomaton(a, true, false));
|
||||
}
|
||||
}
|
||||
return runAutomata;
|
||||
}
|
||||
|
||||
/** swap in a new actual enum to proxy to */
|
||||
protected void setEnum(TermsEnum actualEnum) {
|
||||
this.actualEnum = actualEnum;
|
||||
this.actualBoostAtt = actualEnum.attributes().addAttribute(BoostAttribute.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* fired when the max non-competitive boost has changed. this is the hook to
|
||||
* swap in a smarter actualEnum
|
||||
* swap in a smarter actualEnum.
|
||||
*/
|
||||
private void bottomChanged(BytesRef lastTerm, boolean init)
|
||||
throws IOException {
|
||||
private void bottomChanged(BytesRef lastTerm) throws IOException {
|
||||
int oldMaxEdits = maxEdits;
|
||||
|
||||
// true if the last term encountered is lexicographically equal or after the bottom term in the PQ
|
||||
|
@ -193,49 +181,73 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
|
||||
// as long as the max non-competitive boost is >= the max boost
|
||||
// for some edit distance, keep dropping the max edit distance.
|
||||
while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
|
||||
while (maxEdits > 0) {
|
||||
float maxBoost = 1.0f - ((float) maxEdits / (float) termLength);
|
||||
if (bottom < maxBoost || (bottom == maxBoost && termAfter == false)) {
|
||||
break;
|
||||
}
|
||||
maxEdits--;
|
||||
}
|
||||
|
||||
// TODO: this opto could be improved, e.g. if the worst term in the queue is zzzz with ed=2, then, really, on the next segment, we
|
||||
// should only be looking for ed=1 terms up until zzzz, then ed=2. Tricky :)
|
||||
|
||||
if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
|
||||
maxEditDistanceChanged(lastTerm, maxEdits, init);
|
||||
if (oldMaxEdits != maxEdits || lastTerm == null) {
|
||||
// This is a very powerful optimization: the maximum edit distance has changed. This happens because we collect only the top scoring
|
||||
// N (= 50, by default) terms, and if e.g. maxEdits=2, and the queue is now full of matching terms, and we notice that the worst entry
|
||||
// in that queue is ed=1, then we can switch the automata here to ed=1 which is a big speedup.
|
||||
actualEnum = getAutomatonEnum(maxEdits, lastTerm);
|
||||
}
|
||||
}
|
||||
|
||||
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
|
||||
throws IOException {
|
||||
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
|
||||
// instead of assert, we do a hard check in case someone uses our enum directly
|
||||
// assert newEnum != null;
|
||||
if (newEnum == null) {
|
||||
assert maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
||||
throw new IllegalArgumentException("maxEdits cannot be > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE");
|
||||
}
|
||||
setEnum(newEnum);
|
||||
}
|
||||
|
||||
// for some raw min similarity and input term length, the maximum # of edits
|
||||
private int initialMaxDistance(float minimumSimilarity, int termLen) {
|
||||
return (int) ((1D-minimumSimilarity) * termLen);
|
||||
}
|
||||
|
||||
// for some number of edits, the maximum possible scaled boost
|
||||
private float calculateMaxBoost(int nEdits) {
|
||||
final float similarity = 1.0f - ((float) nEdits / (float) (termLength));
|
||||
return (similarity - minSimilarity) * scale_factor;
|
||||
}
|
||||
|
||||
private BytesRef queuedBottom = null;
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
|
||||
if (queuedBottom != null) {
|
||||
bottomChanged(queuedBottom, false);
|
||||
bottomChanged(queuedBottom);
|
||||
queuedBottom = null;
|
||||
}
|
||||
|
||||
BytesRef term = actualEnum.next();
|
||||
boostAtt.setBoost(actualBoostAtt.getBoost());
|
||||
|
||||
|
||||
BytesRef term;
|
||||
|
||||
// while loop because we skip short terms even if they are within the specified edit distance (see the NOTE in FuzzyQuery class javadocs)
|
||||
while (true) {
|
||||
|
||||
term = actualEnum.next();
|
||||
if (term == null) {
|
||||
// end
|
||||
break;
|
||||
}
|
||||
|
||||
int ed = maxEdits;
|
||||
|
||||
// we know the outer DFA always matches.
|
||||
// now compute exact edit distance
|
||||
while (ed > 0) {
|
||||
if (matches(term, ed - 1)) {
|
||||
ed--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ed == 0) { // exact match
|
||||
boostAtt.setBoost(1.0F);
|
||||
break;
|
||||
} else {
|
||||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||
int minTermLength = Math.min(codePointCount, termLength);
|
||||
|
||||
// only accept a matching term if it's longer than the edit distance:
|
||||
if (minTermLength > ed) {
|
||||
float similarity = 1.0f - (float) ed / (float) minTermLength;
|
||||
boostAtt.setBoost(similarity);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
||||
final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
||||
if (term != null && (bottom != this.bottom || bottomTerm != this.bottomTerm)) {
|
||||
|
@ -243,11 +255,18 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
this.bottomTerm = bottomTerm;
|
||||
// clone the term before potentially doing something with it
|
||||
// this is a rare but wonderful occurrence anyway
|
||||
|
||||
// We must delay bottomChanged until the next next() call otherwise we mess up docFreq(), etc., for the current term:
|
||||
queuedBottom = BytesRef.deepCopyOf(term);
|
||||
}
|
||||
|
||||
return term;
|
||||
}
|
||||
|
||||
/** returns true if term is within k edits of the query term */
|
||||
private boolean matches(BytesRef termIn, int k) {
|
||||
return k == 0 ? termIn.equals(term.bytes()) : automata[k].runAutomaton.run(termIn.bytes, termIn.offset, termIn.length);
|
||||
}
|
||||
|
||||
// proxy all other enum calls to the actual enum
|
||||
@Override
|
||||
|
@ -300,109 +319,43 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
return actualEnum.term();
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement fuzzy enumeration with Terms.intersect.
|
||||
* <p>
|
||||
* This is the fastest method as opposed to LinearFuzzyTermsEnum:
|
||||
* as enumeration is logarithmic to the number of terms (instead of linear)
|
||||
* and comparison is linear to length of the term (rather than quadratic)
|
||||
*/
|
||||
private class AutomatonFuzzyTermsEnum extends FilteredTermsEnum {
|
||||
private final ByteRunAutomaton matchers[];
|
||||
|
||||
private final BytesRef termRef;
|
||||
|
||||
private final BoostAttribute boostAtt =
|
||||
attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) {
|
||||
super(tenum, false);
|
||||
this.matchers = new ByteRunAutomaton[compiled.length];
|
||||
for (int i = 0; i < compiled.length; i++)
|
||||
this.matchers[i] = compiled[i].runAutomaton;
|
||||
termRef = new BytesRef(term.text());
|
||||
}
|
||||
|
||||
/** finds the smallest Lev(n) DFA that accepts the term. */
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
//System.out.println("AFTE.accept term=" + term);
|
||||
int ed = matchers.length - 1;
|
||||
|
||||
// we are wrapping either an intersect() TermsEnum or an AutomatonTermsENum,
|
||||
// so we know the outer DFA always matches.
|
||||
// now compute exact edit distance
|
||||
while (ed > 0) {
|
||||
if (matches(term, ed - 1)) {
|
||||
ed--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
//System.out.println("CHECK term=" + term.utf8ToString() + " ed=" + ed);
|
||||
|
||||
// scale to a boost and return (if similarity > minSimilarity)
|
||||
if (ed == 0) { // exact match
|
||||
boostAtt.setBoost(1.0F);
|
||||
//System.out.println(" yes");
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||
final float similarity = 1.0f - ((float) ed / (float)
|
||||
(Math.min(codePointCount, termLength)));
|
||||
if (similarity > minSimilarity) {
|
||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||
//System.out.println(" yes");
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** returns true if term is within k edits of the query term */
|
||||
final boolean matches(BytesRef term, int k) {
|
||||
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
|
||||
}
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public float getMinSimilarity() {
|
||||
return minSimilarity;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public float getScaleFactor() {
|
||||
return scale_factor;
|
||||
}
|
||||
|
||||
/**
|
||||
* reuses compiled automata across different segments,
|
||||
* because they are independent of the index
|
||||
* @lucene.internal */
|
||||
public static interface LevenshteinAutomataAttribute extends Attribute {
|
||||
public List<CompiledAutomaton> automata();
|
||||
public CompiledAutomaton[] automata();
|
||||
public void setAutomata(CompiledAutomaton[] automata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores compiled automata as a list (indexed by edit distance)
|
||||
* @lucene.internal */
|
||||
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
||||
private final List<CompiledAutomaton> automata = new ArrayList<>();
|
||||
private CompiledAutomaton[] automata;
|
||||
|
||||
@Override
|
||||
public List<CompiledAutomaton> automata() {
|
||||
public CompiledAutomaton[] automata() {
|
||||
return automata;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAutomata(CompiledAutomaton[] automata) {
|
||||
this.automata = automata;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
automata.clear();
|
||||
automata = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return automata.hashCode();
|
||||
if (automata == null) {
|
||||
return 0;
|
||||
} else {
|
||||
return automata.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -411,15 +364,17 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
return true;
|
||||
if (!(other instanceof LevenshteinAutomataAttributeImpl))
|
||||
return false;
|
||||
return automata.equals(((LevenshteinAutomataAttributeImpl) other).automata);
|
||||
return Arrays.equals(automata, ((LevenshteinAutomataAttributeImpl) other).automata);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
final List<CompiledAutomaton> targetAutomata =
|
||||
((LevenshteinAutomataAttribute) target).automata();
|
||||
targetAutomata.clear();
|
||||
targetAutomata.addAll(automata);
|
||||
public void copyTo(AttributeImpl _target) {
|
||||
LevenshteinAutomataAttribute target = (LevenshteinAutomataAttribute) _target;
|
||||
if (automata == null) {
|
||||
target.setAutomata(null);
|
||||
} else {
|
||||
target.setAutomata(automata);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.Monster;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
|
||||
@SuppressCodecs({"SimpleText", "Memory", "Direct"})
|
||||
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
|
||||
@Monster("Takes ~30min")
|
||||
@SuppressSysoutChecks(bugUrl = "Stuff gets printed")
|
||||
public class Test2BDocs extends LuceneTestCase {
|
||||
|
||||
// indexes Integer.MAX_VALUE docs with indexed field(s)
|
||||
public void test2BDocs() throws Exception {
|
||||
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BDocs"));
|
||||
if (dir instanceof MockDirectoryWrapper) {
|
||||
((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
|
||||
}
|
||||
|
||||
IndexWriter w = new IndexWriter(dir,
|
||||
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||
.setRAMBufferSizeMB(256.0)
|
||||
.setMergeScheduler(new ConcurrentMergeScheduler())
|
||||
.setMergePolicy(newLogMergePolicy(false, 10))
|
||||
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||
.setCodec(TestUtil.getDefaultCodec()));
|
||||
|
||||
Document doc = new Document();
|
||||
Field field = new Field("f1", "a", StringField.TYPE_NOT_STORED);
|
||||
doc.add(field);
|
||||
|
||||
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
|
||||
w.addDocument(doc);
|
||||
if (i % (10*1000*1000) == 0) {
|
||||
System.out.println("indexed: " + i);
|
||||
System.out.flush();
|
||||
}
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
w.close();
|
||||
|
||||
System.out.println("verifying...");
|
||||
System.out.flush();
|
||||
|
||||
DirectoryReader r = DirectoryReader.open(dir);
|
||||
|
||||
BytesRef term = new BytesRef(1);
|
||||
term.bytes[0] = (byte)'a';
|
||||
term.length = 1;
|
||||
|
||||
long skips = 0;
|
||||
|
||||
Random rnd = random();
|
||||
|
||||
long start = System.nanoTime();
|
||||
|
||||
for (LeafReaderContext context : r.leaves()) {
|
||||
LeafReader reader = context.reader();
|
||||
int lim = context.reader().maxDoc();
|
||||
|
||||
Terms terms = reader.fields().terms("f1");
|
||||
for (int i=0; i<10000; i++) {
|
||||
TermsEnum te = terms.iterator();
|
||||
assertTrue( te.seekExact(term) );
|
||||
PostingsEnum docs = te.postings(null);
|
||||
|
||||
// skip randomly through the term
|
||||
for (int target = -1;;)
|
||||
{
|
||||
int maxSkipSize = lim - target + 1;
|
||||
// do a smaller skip half of the time
|
||||
if (rnd.nextBoolean()) {
|
||||
maxSkipSize = Math.min(256, maxSkipSize);
|
||||
}
|
||||
int newTarget = target + rnd.nextInt(maxSkipSize) + 1;
|
||||
if (newTarget >= lim) {
|
||||
if (target+1 >= lim) break; // we already skipped to end, so break.
|
||||
newTarget = lim-1; // skip to end
|
||||
}
|
||||
target = newTarget;
|
||||
|
||||
int res = docs.advance(target);
|
||||
if (res == PostingsEnum.NO_MORE_DOCS) break;
|
||||
|
||||
assertTrue( res >= target );
|
||||
|
||||
skips++;
|
||||
target = res;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
|
||||
long end = System.nanoTime();
|
||||
|
||||
System.out.println("Skip count=" + skips + " seconds=" + TimeUnit.NANOSECONDS.toSeconds(end-start));
|
||||
assert skips > 0;
|
||||
}
|
||||
|
||||
}
|
|
@ -18,11 +18,13 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -36,7 +38,6 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -66,19 +67,38 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
private static Directory dir2;
|
||||
private static int mulFactor;
|
||||
|
||||
private static Directory copyOf(Directory dir) throws IOException {
|
||||
Directory copy = newFSDirectory(createTempDir());
|
||||
for(String name : dir.listAll()) {
|
||||
if (name.startsWith("extra")) {
|
||||
continue;
|
||||
}
|
||||
copy.copyFrom(dir, name, name, IOContext.DEFAULT);
|
||||
copy.sync(Collections.singleton(name));
|
||||
}
|
||||
return copy;
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
// in some runs, test immediate adjacency of matches - in others, force a full bucket gap between docs
|
||||
NUM_FILLER_DOCS = random().nextBoolean() ? 0 : BooleanScorer.SIZE;
|
||||
PRE_FILLER_DOCS = TestUtil.nextInt(random(), 0, (NUM_FILLER_DOCS / 2));
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: NUM_FILLER_DOCS=" + NUM_FILLER_DOCS + " PRE_FILLER_DOCS=" + PRE_FILLER_DOCS);
|
||||
}
|
||||
|
||||
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
|
||||
directory = newFSDirectory(createTempDir());
|
||||
} else {
|
||||
directory = newDirectory();
|
||||
}
|
||||
|
||||
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
// randomized codecs are sometimes too costly for this test:
|
||||
iwc.setCodec(Codec.forName("Lucene62"));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, iwc);
|
||||
// we'll make a ton of docs, disable store/norms/vectors
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setOmitNorms(true);
|
||||
|
@ -118,8 +138,10 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
singleSegmentDirectory.sync(Collections.singleton(fileName));
|
||||
}
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
// we need docID order to be preserved:
|
||||
// randomized codecs are sometimes too costly for this test:
|
||||
iwc.setCodec(Codec.forName("Lucene62"));
|
||||
iwc.setMergePolicy(newLogMergePolicy());
|
||||
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
|
||||
w.forceMerge(1, true);
|
||||
|
@ -129,7 +151,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true));
|
||||
|
||||
// Make big index
|
||||
dir2 = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(directory));
|
||||
dir2 = copyOf(directory);
|
||||
|
||||
// First multiply small test index:
|
||||
mulFactor = 1;
|
||||
|
@ -141,9 +163,14 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: cycle...");
|
||||
}
|
||||
final Directory copy = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir2));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir2);
|
||||
final Directory copy = copyOf(dir2);
|
||||
|
||||
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
// randomized codecs are sometimes too costly for this test:
|
||||
iwc.setCodec(Codec.forName("Lucene62"));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
|
||||
w.addIndexes(copy);
|
||||
copy.close();
|
||||
docCount = w.maxDoc();
|
||||
w.close();
|
||||
mulFactor *= 2;
|
||||
|
|
|
@ -18,13 +18,19 @@ package org.apache.lucene.search;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -32,7 +38,10 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
/**
|
||||
|
@ -489,4 +498,210 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
doc.add(newTextField("field", text, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
private String randomSimpleString(int digits) {
|
||||
int termLength = TestUtil.nextInt(random(), 1, 8);
|
||||
char[] chars = new char[termLength];
|
||||
for(int i=0;i<termLength;i++) {
|
||||
chars[i] = (char) ('a' + random().nextInt(digits));
|
||||
}
|
||||
return new String(chars);
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked","rawtypes"})
|
||||
public void testRandom() throws Exception {
|
||||
int numTerms = atLeast(100);
|
||||
int digits = TestUtil.nextInt(random(), 2, 3);
|
||||
Set<String> terms = new HashSet<>();
|
||||
while (terms.size() < numTerms) {
|
||||
terms.add(randomSimpleString(digits));
|
||||
}
|
||||
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
for(String term : terms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.YES));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
DirectoryReader r = w.getReader();
|
||||
//System.out.println("TEST: reader=" + r);
|
||||
IndexSearcher s = newSearcher(r);
|
||||
int iters = atLeast(1000);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
String queryTerm = randomSimpleString(digits);
|
||||
int prefixLength = random().nextInt(queryTerm.length());
|
||||
String queryPrefix = queryTerm.substring(0, prefixLength);
|
||||
|
||||
// we don't look at scores here:
|
||||
List<TermAndScore>[] expected = new List[3];
|
||||
for(int ed=0;ed<3;ed++) {
|
||||
expected[ed] = new ArrayList<TermAndScore>();
|
||||
}
|
||||
for(String term : terms) {
|
||||
if (term.startsWith(queryPrefix) == false) {
|
||||
continue;
|
||||
}
|
||||
int ed = getDistance(term, queryTerm);
|
||||
if (Math.min(queryTerm.length(), term.length()) > ed) {
|
||||
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
|
||||
while (ed < 3) {
|
||||
expected[ed].add(new TermAndScore(term, score));
|
||||
ed++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int ed=0;ed<3;ed++) {
|
||||
Collections.sort(expected[ed]);
|
||||
int queueSize = TestUtil.nextInt(random(), 1, terms.size());
|
||||
/*
|
||||
System.out.println("\nTEST: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength);
|
||||
for(TermAndScore ent : expected[ed]) {
|
||||
System.out.println(" " + ent);
|
||||
}
|
||||
*/
|
||||
FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, queueSize, true);
|
||||
TopDocs hits = s.search(query, terms.size());
|
||||
Set<String> actual = new HashSet<>();
|
||||
for(ScoreDoc hit : hits.scoreDocs) {
|
||||
Document doc = s.doc(hit.doc);
|
||||
actual.add(doc.get("field"));
|
||||
//System.out.println(" actual: " + doc.get("field") + " score=" + hit.score);
|
||||
}
|
||||
Set<String> expectedTop = new HashSet<>();
|
||||
int limit = Math.min(queueSize, expected[ed].size());
|
||||
for(int i=0;i<limit;i++) {
|
||||
expectedTop.add(expected[ed].get(i).term);
|
||||
}
|
||||
|
||||
if (actual.equals(expectedTop) == false) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("FAILED: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength + "\n");
|
||||
|
||||
boolean first = true;
|
||||
for(String term : actual) {
|
||||
if (expectedTop.contains(term) == false) {
|
||||
if (first) {
|
||||
sb.append(" these matched but shouldn't:\n");
|
||||
first = false;
|
||||
}
|
||||
sb.append(" " + term + "\n");
|
||||
}
|
||||
}
|
||||
first = true;
|
||||
for(String term : expectedTop) {
|
||||
if (actual.contains(term) == false) {
|
||||
if (first) {
|
||||
sb.append(" these did not match but should:\n");
|
||||
first = false;
|
||||
}
|
||||
sb.append(" " + term + "\n");
|
||||
}
|
||||
}
|
||||
throw new AssertionError(sb.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
IOUtils.close(r, w, dir);
|
||||
}
|
||||
|
||||
private static class TermAndScore implements Comparable<TermAndScore> {
|
||||
final String term;
|
||||
final float score;
|
||||
|
||||
public TermAndScore(String term, float score) {
|
||||
this.term = term;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TermAndScore other) {
|
||||
// higher score sorts first, and if scores are tied, lower term sorts first
|
||||
if (score > other.score) {
|
||||
return -1;
|
||||
} else if (score < other.score) {
|
||||
return 1;
|
||||
} else {
|
||||
return term.compareTo(other.term);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return term + " score=" + score;
|
||||
}
|
||||
}
|
||||
|
||||
// Poached from LuceneLevenshteinDistance.java (from suggest module): it supports transpositions (treats them as ed=1, not ed=2)
|
||||
private static int getDistance(String target, String other) {
|
||||
IntsRef targetPoints;
|
||||
IntsRef otherPoints;
|
||||
int n;
|
||||
int d[][]; // cost array
|
||||
|
||||
// NOTE: if we cared, we could 3*m space instead of m*n space, similar to
|
||||
// what LevenshteinDistance does, except cycling thru a ring of three
|
||||
// horizontal cost arrays... but this comparator is never actually used by
|
||||
// DirectSpellChecker, it's only used for merging results from multiple shards
|
||||
// in "distributed spellcheck", and it's inefficient in other ways too...
|
||||
|
||||
// cheaper to do this up front once
|
||||
targetPoints = toIntsRef(target);
|
||||
otherPoints = toIntsRef(other);
|
||||
n = targetPoints.length;
|
||||
final int m = otherPoints.length;
|
||||
d = new int[n+1][m+1];
|
||||
|
||||
if (n == 0 || m == 0) {
|
||||
if (n == m) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return Math.max(n, m);
|
||||
}
|
||||
}
|
||||
|
||||
// indexes into strings s and t
|
||||
int i; // iterates through s
|
||||
int j; // iterates through t
|
||||
|
||||
int t_j; // jth character of t
|
||||
|
||||
int cost; // cost
|
||||
|
||||
for (i = 0; i<=n; i++) {
|
||||
d[i][0] = i;
|
||||
}
|
||||
|
||||
for (j = 0; j<=m; j++) {
|
||||
d[0][j] = j;
|
||||
}
|
||||
|
||||
for (j = 1; j<=m; j++) {
|
||||
t_j = otherPoints.ints[j-1];
|
||||
|
||||
for (i=1; i<=n; i++) {
|
||||
cost = targetPoints.ints[i-1]==t_j ? 0 : 1;
|
||||
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost
|
||||
d[i][j] = Math.min(Math.min(d[i-1][j]+1, d[i][j-1]+1), d[i-1][j-1]+cost);
|
||||
// transposition
|
||||
if (i > 1 && j > 1 && targetPoints.ints[i-1] == otherPoints.ints[j-2] && targetPoints.ints[i-2] == otherPoints.ints[j-1]) {
|
||||
d[i][j] = Math.min(d[i][j], d[i-2][j-2] + cost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return d[n][m];
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRef(String s) {
|
||||
IntsRef ref = new IntsRef(s.length()); // worst case
|
||||
int utf16Len = s.length();
|
||||
for (int i = 0, cp = 0; i < utf16Len; i += Character.charCount(cp)) {
|
||||
cp = ref.ints[ref.length++] = Character.codePointAt(s, i);
|
||||
}
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -539,7 +539,9 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
|
|||
public void testConcurrentIndexCloseSearchAndRefresh() throws Exception {
|
||||
final Directory dir = newFSDirectory(createTempDir());
|
||||
AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
|
||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
|
||||
final MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(IndexWriter.MAX_TERM_LENGTH);
|
||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
|
||||
|
||||
AtomicReference<SearcherManager> mgrRef = new AtomicReference<>();
|
||||
mgrRef.set(new SearcherManager(writerRef.get(), null));
|
||||
|
@ -561,7 +563,7 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
|
|||
} else {
|
||||
w.rollback();
|
||||
}
|
||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
|
||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
|
||||
}
|
||||
}
|
||||
docs.close();
|
||||
|
|
|
@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
|
|||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||
if (phraseQueryTerms.length == 1) {
|
||||
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||
|
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
|
|||
// this query is TermContext sensitive.
|
||||
extractWeightedTerms(terms, query, boost);
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
||||
extract(iterator.next(), boost, terms);
|
||||
for (Query clause : ((DisjunctionMaxQuery) query)) {
|
||||
extract(clause, boost, terms);
|
||||
}
|
||||
} else if (query instanceof ToParentBlockJoinQuery) {
|
||||
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
|
||||
|
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
|
|||
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
|
||||
++distinctPositions;
|
||||
}
|
||||
for (int j = 0; j < termArray.length; ++j) {
|
||||
disjuncts.add(new SpanTermQuery(termArray[j]));
|
||||
for (Term aTermArray : termArray) {
|
||||
disjuncts.add(new SpanTermQuery(aTermArray));
|
||||
}
|
||||
}
|
||||
|
||||
int positionGaps = 0;
|
||||
int position = 0;
|
||||
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
||||
for (int i = 0; i < disjunctLists.length; ++i) {
|
||||
List<SpanQuery> disjuncts = disjunctLists[i];
|
||||
for (List<SpanQuery> disjuncts : disjunctLists) {
|
||||
if (disjuncts != null) {
|
||||
clauses[position++] = new SpanOrQuery(disjuncts
|
||||
.toArray(new SpanQuery[disjuncts.size()]));
|
||||
|
@ -202,11 +200,15 @@ public class WeightedSpanTermExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
final int slop = mpq.getSlop();
|
||||
final boolean inorder = (slop == 0);
|
||||
if (clauses.length == 1) {
|
||||
extractWeightedSpanTerms(terms, clauses[0], boost);
|
||||
} else {
|
||||
final int slop = mpq.getSlop();
|
||||
final boolean inorder = (slop == 0);
|
||||
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
||||
extractWeightedSpanTerms(terms, sp, boost);
|
||||
}
|
||||
}
|
||||
} else if (query instanceof MatchAllDocsQuery) {
|
||||
//nothing
|
||||
|
|
|
@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.junit.Test;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
|
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
helper.start();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
final String fieldName = "substring";
|
||||
|
||||
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
|
||||
|
||||
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||
}
|
||||
|
||||
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||
final String fieldName = "substring";
|
||||
|
||||
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
|
||||
.add(new Term(fieldName, "uchu")).build();
|
||||
|
||||
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||
}
|
||||
|
||||
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
|
||||
throws IOException, InvalidTokenOffsetsException {
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
||||
}
|
||||
};
|
||||
final String fieldName = "substring";
|
||||
|
||||
final List<BytesRef> list = new ArrayList<>();
|
||||
list.add(new BytesRef("uchu"));
|
||||
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
|
||||
|
||||
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
||||
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
|
||||
|
||||
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
|
||||
|
||||
assertEquals("B<b>uchu</b>ng",fragment);
|
||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
|
||||
|
||||
assertEquals(expected, fragment);
|
||||
}
|
||||
|
||||
public void testUnRewrittenQuery() throws Exception {
|
||||
|
|
|
@ -21,8 +21,8 @@ import org.apache.lucene.queryparser.xml.DOMUtils;
|
|||
import org.apache.lucene.queryparser.xml.ParserException;
|
||||
import org.apache.lucene.queryparser.xml.QueryBuilder;
|
||||
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
|
||||
import org.apache.lucene.sandbox.queries.SlowFuzzyQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
@ -33,7 +33,7 @@ import org.w3c.dom.NodeList;
|
|||
public class FuzzyLikeThisQueryBuilder implements QueryBuilder {
|
||||
|
||||
private static final int DEFAULT_MAX_NUM_TERMS = 50;
|
||||
private static final float DEFAULT_MIN_SIMILARITY = SlowFuzzyQuery.defaultMinSimilarity;
|
||||
private static final float DEFAULT_MIN_SIMILARITY = FuzzyQuery.defaultMinSimilarity;
|
||||
private static final int DEFAULT_PREFIX_LENGTH = 1;
|
||||
private static final boolean DEFAULT_IGNORE_TF = false;
|
||||
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.BoostAttribute;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -46,6 +47,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
|||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
/**
|
||||
* Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms.
|
||||
|
@ -64,62 +66,62 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*/
|
||||
public class FuzzyLikeThisQuery extends Query
|
||||
{
|
||||
// TODO: generalize this query (at least it should not reuse this static sim!
|
||||
// a better way might be to convert this into multitermquery rewrite methods.
|
||||
// the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq)
|
||||
// provided to TermQuery, so that the general idea is agnostic to any scoring system...
|
||||
static TFIDFSimilarity sim=new ClassicSimilarity();
|
||||
ArrayList<FieldVals> fieldVals=new ArrayList<>();
|
||||
Analyzer analyzer;
|
||||
// TODO: generalize this query (at least it should not reuse this static sim!
|
||||
// a better way might be to convert this into multitermquery rewrite methods.
|
||||
// the rewrite method can 'average' the TermContext's term statistics (docfreq,totalTermFreq)
|
||||
// provided to TermQuery, so that the general idea is agnostic to any scoring system...
|
||||
static TFIDFSimilarity sim=new ClassicSimilarity();
|
||||
ArrayList<FieldVals> fieldVals=new ArrayList<>();
|
||||
Analyzer analyzer;
|
||||
|
||||
int MAX_VARIANTS_PER_TERM=50;
|
||||
boolean ignoreTF=false;
|
||||
private int maxNumTerms;
|
||||
int MAX_VARIANTS_PER_TERM=50;
|
||||
boolean ignoreTF=false;
|
||||
private int maxNumTerms;
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int prime = 31;
|
||||
int result = classHash();
|
||||
result = prime * result + Objects.hashCode(analyzer);
|
||||
result = prime * result + Objects.hashCode(fieldVals);
|
||||
result = prime * result + (ignoreTF ? 1231 : 1237);
|
||||
result = prime * result + maxNumTerms;
|
||||
return result;
|
||||
}
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int prime = 31;
|
||||
int result = classHash();
|
||||
result = prime * result + Objects.hashCode(analyzer);
|
||||
result = prime * result + Objects.hashCode(fieldVals);
|
||||
result = prime * result + (ignoreTF ? 1231 : 1237);
|
||||
result = prime * result + maxNumTerms;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return sameClassAs(other) &&
|
||||
equalsTo(getClass().cast(other));
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
return sameClassAs(other) &&
|
||||
equalsTo(getClass().cast(other));
|
||||
}
|
||||
|
||||
private boolean equalsTo(FuzzyLikeThisQuery other) {
|
||||
return Objects.equals(analyzer, other.analyzer) &&
|
||||
Objects.equals(fieldVals, other.fieldVals) &&
|
||||
ignoreTF == other.ignoreTF &&
|
||||
maxNumTerms == other.maxNumTerms;
|
||||
}
|
||||
private boolean equalsTo(FuzzyLikeThisQuery other) {
|
||||
return Objects.equals(analyzer, other.analyzer) &&
|
||||
Objects.equals(fieldVals, other.fieldVals) &&
|
||||
ignoreTF == other.ignoreTF &&
|
||||
maxNumTerms == other.maxNumTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery
|
||||
*/
|
||||
public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer)
|
||||
{
|
||||
this.analyzer=analyzer;
|
||||
this.maxNumTerms = maxNumTerms;
|
||||
}
|
||||
/**
|
||||
*
|
||||
* @param maxNumTerms The total number of terms clauses that will appear once rewritten as a BooleanQuery
|
||||
*/
|
||||
public FuzzyLikeThisQuery(int maxNumTerms, Analyzer analyzer)
|
||||
{
|
||||
this.analyzer=analyzer;
|
||||
this.maxNumTerms = maxNumTerms;
|
||||
}
|
||||
|
||||
class FieldVals
|
||||
{
|
||||
String queryString;
|
||||
String fieldName;
|
||||
float minSimilarity;
|
||||
int prefixLength;
|
||||
public FieldVals(String name, float similarity, int length, String queryString)
|
||||
class FieldVals
|
||||
{
|
||||
String queryString;
|
||||
String fieldName;
|
||||
int maxEdits;
|
||||
int prefixLength;
|
||||
public FieldVals(String name, int maxEdits, int length, String queryString)
|
||||
{
|
||||
fieldName = name;
|
||||
minSimilarity = similarity;
|
||||
this.maxEdits = maxEdits;
|
||||
prefixLength = length;
|
||||
this.queryString = queryString;
|
||||
}
|
||||
|
@ -129,11 +131,11 @@ public class FuzzyLikeThisQuery extends Query
|
|||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result
|
||||
+ ((fieldName == null) ? 0 : fieldName.hashCode());
|
||||
result = prime * result + Float.floatToIntBits(minSimilarity);
|
||||
+ ((fieldName == null) ? 0 : fieldName.hashCode());
|
||||
result = prime * result + maxEdits;
|
||||
result = prime * result + prefixLength;
|
||||
result = prime * result
|
||||
+ ((queryString == null) ? 0 : queryString.hashCode());
|
||||
+ ((queryString == null) ? 0 : queryString.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -151,9 +153,9 @@ public class FuzzyLikeThisQuery extends Query
|
|||
return false;
|
||||
} else if (!fieldName.equals(other.fieldName))
|
||||
return false;
|
||||
if (Float.floatToIntBits(minSimilarity) != Float
|
||||
.floatToIntBits(other.minSimilarity))
|
||||
if (maxEdits != other.maxEdits) {
|
||||
return false;
|
||||
}
|
||||
if (prefixLength != other.prefixLength)
|
||||
return false;
|
||||
if (queryString == null) {
|
||||
|
@ -166,18 +168,22 @@ public class FuzzyLikeThisQuery extends Query
|
|||
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds user input for "fuzzification"
|
||||
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
|
||||
* @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum)
|
||||
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
|
||||
*/
|
||||
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
|
||||
{
|
||||
fieldVals.add(new FieldVals(fieldName,minSimilarity,prefixLength,queryString));
|
||||
/**
|
||||
* Adds user input for "fuzzification"
|
||||
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
|
||||
* @param minSimilarity The minimum similarity of the term variants; must be 0, 1 or 2 (see FuzzyTermsEnum)
|
||||
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
|
||||
*/
|
||||
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
|
||||
{
|
||||
int maxEdits = (int) minSimilarity;
|
||||
if (maxEdits != minSimilarity || maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("minSimilarity must integer value between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got " + minSimilarity);
|
||||
}
|
||||
fieldVals.add(new FieldVals(fieldName,maxEdits,prefixLength,queryString));
|
||||
}
|
||||
|
||||
|
||||
private void addTerms(IndexReader reader, FieldVals f, ScoreTermQueue q) throws IOException {
|
||||
|
@ -202,7 +208,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||
AttributeSource atts = new AttributeSource();
|
||||
MaxNonCompetitiveBoostAttribute maxBoostAtt =
|
||||
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
|
||||
FuzzyTermsEnum fe = new FuzzyTermsEnum(terms, atts, startTerm, f.maxEdits, f.prefixLength, true);
|
||||
//store the df so all variants use same idf
|
||||
int df = reader.docFreq(startTerm);
|
||||
int numVariants = 0;
|
||||
|
@ -225,9 +231,9 @@ public class FuzzyLikeThisQuery extends Query
|
|||
if (numVariants > 0) {
|
||||
int avgDf = totalVariantDocFreqs / numVariants;
|
||||
if (df == 0)//no direct match we can use as df for all variants
|
||||
{
|
||||
df = avgDf; //use avg df of all variants
|
||||
}
|
||||
{
|
||||
df = avgDf; //use avg df of all variants
|
||||
}
|
||||
|
||||
// take the top variants (scored by edit distance) and reset the score
|
||||
// to include an IDF factor then add to the global queue for ranking
|
||||
|
@ -267,105 +273,105 @@ public class FuzzyLikeThisQuery extends Query
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException
|
||||
{
|
||||
ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
|
||||
//load up the list of possible terms
|
||||
for (FieldVals f : fieldVals) {
|
||||
addTerms(reader, f, q);
|
||||
}
|
||||
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
|
||||
//create BooleanQueries to hold the variants for each token/field pair and ensure it
|
||||
// has no coord factor
|
||||
//Step 1: sort the termqueries by term/field
|
||||
HashMap<Term,ArrayList<ScoreTerm>> variantQueries=new HashMap<>();
|
||||
int size = q.size();
|
||||
for(int i = 0; i < size; i++)
|
||||
{
|
||||
ScoreTerm st = q.pop();
|
||||
ArrayList<ScoreTerm> l= variantQueries.get(st.fuzziedSourceTerm);
|
||||
if(l==null)
|
||||
{
|
||||
l=new ArrayList<>();
|
||||
variantQueries.put(st.fuzziedSourceTerm,l);
|
||||
}
|
||||
l.add(st);
|
||||
}
|
||||
//Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
|
||||
for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext();)
|
||||
{
|
||||
ArrayList<ScoreTerm> variants = iter.next();
|
||||
if(variants.size()==1)
|
||||
{
|
||||
//optimize where only one selected variant
|
||||
ScoreTerm st= variants.get(0);
|
||||
Query tq = newTermQuery(reader, st.term);
|
||||
// set the boost to a mix of IDF and score
|
||||
bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
else
|
||||
{
|
||||
BooleanQuery.Builder termVariants=new BooleanQuery.Builder();
|
||||
for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2
|
||||
.hasNext();)
|
||||
{
|
||||
ScoreTerm st = iterator2.next();
|
||||
// found a match
|
||||
Query tq = newTermQuery(reader, st.term);
|
||||
// set the boost using the ScoreTerm's score
|
||||
termVariants.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
}
|
||||
//TODO possible alternative step 3 - organize above booleans into a new layer of field-based
|
||||
// booleans with a minimum-should-match of NumFields-1?
|
||||
return bq.build();
|
||||
public Query rewrite(IndexReader reader) throws IOException
|
||||
{
|
||||
ScoreTermQueue q = new ScoreTermQueue(maxNumTerms);
|
||||
//load up the list of possible terms
|
||||
for (FieldVals f : fieldVals) {
|
||||
addTerms(reader, f, q);
|
||||
}
|
||||
|
||||
//Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
|
||||
// term variants) then is reset with IDF for use in ranking against all other
|
||||
// terms/fields
|
||||
private static class ScoreTerm{
|
||||
public Term term;
|
||||
public float score;
|
||||
Term fuzziedSourceTerm;
|
||||
|
||||
public ScoreTerm(Term term, float score, Term fuzziedSourceTerm){
|
||||
this.term = term;
|
||||
this.score = score;
|
||||
this.fuzziedSourceTerm=fuzziedSourceTerm;
|
||||
}
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
|
||||
//create BooleanQueries to hold the variants for each token/field pair and ensure it
|
||||
// has no coord factor
|
||||
//Step 1: sort the termqueries by term/field
|
||||
HashMap<Term,ArrayList<ScoreTerm>> variantQueries=new HashMap<>();
|
||||
int size = q.size();
|
||||
for(int i = 0; i < size; i++)
|
||||
{
|
||||
ScoreTerm st = q.pop();
|
||||
ArrayList<ScoreTerm> l= variantQueries.get(st.fuzziedSourceTerm);
|
||||
if(l==null)
|
||||
{
|
||||
l=new ArrayList<>();
|
||||
variantQueries.put(st.fuzziedSourceTerm,l);
|
||||
}
|
||||
l.add(st);
|
||||
}
|
||||
|
||||
private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
|
||||
public ScoreTermQueue(int size){
|
||||
super(size);
|
||||
}
|
||||
//Step 2: Organize the sorted termqueries into zero-coord scoring boolean queries
|
||||
for (Iterator<ArrayList<ScoreTerm>> iter = variantQueries.values().iterator(); iter.hasNext();)
|
||||
{
|
||||
ArrayList<ScoreTerm> variants = iter.next();
|
||||
if(variants.size()==1)
|
||||
{
|
||||
//optimize where only one selected variant
|
||||
ScoreTerm st= variants.get(0);
|
||||
Query tq = newTermQuery(reader, st.term);
|
||||
// set the boost to a mix of IDF and score
|
||||
bq.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
else
|
||||
{
|
||||
BooleanQuery.Builder termVariants=new BooleanQuery.Builder();
|
||||
for (Iterator<ScoreTerm> iterator2 = variants.iterator(); iterator2
|
||||
.hasNext();)
|
||||
{
|
||||
ScoreTerm st = iterator2.next();
|
||||
// found a match
|
||||
Query tq = newTermQuery(reader, st.term);
|
||||
// set the boost using the ScoreTerm's score
|
||||
termVariants.add(new BoostQuery(tq, st.score), BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
bq.add(termVariants.build(), BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
}
|
||||
//TODO possible alternative step 3 - organize above booleans into a new layer of field-based
|
||||
// booleans with a minimum-should-match of NumFields-1?
|
||||
return bq.build();
|
||||
}
|
||||
|
||||
//Holds info for a fuzzy term variant - initially score is set to edit distance (for ranking best
|
||||
// term variants) then is reset with IDF for use in ranking against all other
|
||||
// terms/fields
|
||||
private static class ScoreTerm{
|
||||
public Term term;
|
||||
public float score;
|
||||
Term fuzziedSourceTerm;
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
@Override
|
||||
protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
|
||||
if (termA.score== termB.score)
|
||||
return termA.term.compareTo(termB.term) > 0;
|
||||
else
|
||||
return termA.score < termB.score;
|
||||
}
|
||||
|
||||
}
|
||||
public ScoreTerm(Term term, float score, Term fuzziedSourceTerm){
|
||||
this.term = term;
|
||||
this.score = score;
|
||||
this.fuzziedSourceTerm=fuzziedSourceTerm;
|
||||
}
|
||||
}
|
||||
|
||||
private static class ScoreTermQueue extends PriorityQueue<ScoreTerm> {
|
||||
public ScoreTermQueue(int size){
|
||||
super(size);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Query#toString(java.lang.String)
|
||||
* @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object)
|
||||
*/
|
||||
@Override
|
||||
public String toString(String field)
|
||||
{
|
||||
return null;
|
||||
protected boolean lessThan(ScoreTerm termA, ScoreTerm termB) {
|
||||
if (termA.score== termB.score)
|
||||
return termA.term.compareTo(termB.term) > 0;
|
||||
else
|
||||
return termA.score < termB.score;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
* @see org.apache.lucene.search.Query#toString(java.lang.String)
|
||||
*/
|
||||
@Override
|
||||
public String toString(String field)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
public boolean isIgnoreTF()
|
||||
|
|
|
@ -1,201 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.SingleTermsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanQuery; // javadocs
|
||||
import org.apache.lucene.search.FuzzyQuery; // javadocs
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
|
||||
/** Implements the classic fuzzy search query. The similarity measurement
|
||||
* is based on the Levenshtein (edit distance) algorithm.
|
||||
* <p>
|
||||
* Note that, unlike {@link FuzzyQuery}, this query will silently allow
|
||||
* for a (possibly huge) number of edit distances in comparisons, and may
|
||||
* be extremely slow (comparing every term in the index).
|
||||
*
|
||||
* @deprecated Use {@link FuzzyQuery} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public class SlowFuzzyQuery extends MultiTermQuery {
|
||||
|
||||
public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
||||
public final static int defaultPrefixLength = 0;
|
||||
public final static int defaultMaxExpansions = 50;
|
||||
|
||||
private float minimumSimilarity;
|
||||
private int prefixLength;
|
||||
private boolean termLongEnough = false;
|
||||
|
||||
protected Term term;
|
||||
|
||||
/**
|
||||
* Create a new SlowFuzzyQuery that will match terms with a similarity
|
||||
* of at least <code>minimumSimilarity</code> to <code>term</code>.
|
||||
* If a <code>prefixLength</code> > 0 is specified, a common prefix
|
||||
* of that length is also required.
|
||||
*
|
||||
* @param term the term to search for
|
||||
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
|
||||
* between the query term and the matching terms. For example, for a
|
||||
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
|
||||
* as the query term is considered similar to the query term if the edit distance
|
||||
* between both terms is less than <code>length(term)*0.5</code>
|
||||
* <p>
|
||||
* Alternatively, if <code>minimumSimilarity</code> is >= 1f, it is interpreted
|
||||
* as a pure Levenshtein edit distance. For example, a value of <code>2f</code>
|
||||
* will match all terms within an edit distance of <code>2</code> from the
|
||||
* query term. Edit distances specified in this way may not be fractional.
|
||||
*
|
||||
* @param prefixLength length of common (non-fuzzy) prefix
|
||||
* @param maxExpansions the maximum number of terms to match. If this number is
|
||||
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
|
||||
* then the maxClauseCount will be used instead.
|
||||
* @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0
|
||||
* or if prefixLength < 0
|
||||
*/
|
||||
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
|
||||
int maxExpansions) {
|
||||
super(term.field());
|
||||
this.term = term;
|
||||
|
||||
if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity)
|
||||
throw new IllegalArgumentException("fractional edit distances are not allowed");
|
||||
if (minimumSimilarity < 0.0f)
|
||||
throw new IllegalArgumentException("minimumSimilarity < 0");
|
||||
if (prefixLength < 0)
|
||||
throw new IllegalArgumentException("prefixLength < 0");
|
||||
if (maxExpansions < 0)
|
||||
throw new IllegalArgumentException("maxExpansions < 0");
|
||||
|
||||
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
|
||||
|
||||
String text = term.text();
|
||||
int len = text.codePointCount(0, text.length());
|
||||
if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) {
|
||||
this.termLongEnough = true;
|
||||
}
|
||||
|
||||
this.minimumSimilarity = minimumSimilarity;
|
||||
this.prefixLength = prefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)}.
|
||||
*/
|
||||
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
|
||||
this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)}.
|
||||
*/
|
||||
public SlowFuzzyQuery(Term term, float minimumSimilarity) {
|
||||
this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)}.
|
||||
*/
|
||||
public SlowFuzzyQuery(Term term) {
|
||||
this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the minimum similarity that is required for this query to match.
|
||||
* @return float value between 0.0 and 1.0
|
||||
*/
|
||||
public float getMinSimilarity() {
|
||||
return minimumSimilarity;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the non-fuzzy prefix length. This is the number of characters at the start
|
||||
* of a term that must be identical (not fuzzy) to the query term if the query
|
||||
* is to match that term.
|
||||
*/
|
||||
public int getPrefixLength() {
|
||||
return prefixLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
if (!termLongEnough) { // can only match if it's exact
|
||||
return new SingleTermsEnum(terms.iterator(), term.bytes());
|
||||
}
|
||||
return new SlowFuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the pattern term.
|
||||
*/
|
||||
public Term getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
final StringBuilder buffer = new StringBuilder();
|
||||
if (!term.field().equals(field)) {
|
||||
buffer.append(term.field());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(term.text());
|
||||
buffer.append('~');
|
||||
buffer.append(Float.toString(minimumSimilarity));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = super.hashCode();
|
||||
result = prime * result + Float.floatToIntBits(minimumSimilarity);
|
||||
result = prime * result + prefixLength;
|
||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (!super.equals(obj))
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
SlowFuzzyQuery other = (SlowFuzzyQuery) obj;
|
||||
if (Float.floatToIntBits(minimumSimilarity) != Float
|
||||
.floatToIntBits(other.minimumSimilarity))
|
||||
return false;
|
||||
if (prefixLength != other.prefixLength)
|
||||
return false;
|
||||
if (term == null) {
|
||||
if (other.term != null)
|
||||
return false;
|
||||
} else if (!term.equals(other.term))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -1,263 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.search.BoostAttribute;
|
||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
/** Potentially slow fuzzy TermsEnum for enumerating all terms that are similar
|
||||
* to the specified filter term.
|
||||
* <p> If the minSimilarity or maxEdits is greater than the Automaton's
|
||||
* allowable range, this backs off to the classic (brute force)
|
||||
* fuzzy terms enum method by calling FuzzyTermsEnum's getAutomatonEnum.
|
||||
* </p>
|
||||
* <p>Term enumerations are always ordered by
|
||||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
||||
* greater than all that precede it.</p>
|
||||
*
|
||||
* @deprecated Use {@link FuzzyTermsEnum} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public final class SlowFuzzyTermsEnum extends FuzzyTermsEnum {
|
||||
|
||||
public SlowFuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
|
||||
float minSimilarity, int prefixLength) throws IOException {
|
||||
super(terms, atts, term, minSimilarity, prefixLength, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
|
||||
throws IOException {
|
||||
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
|
||||
if (newEnum != null) {
|
||||
setEnum(newEnum);
|
||||
} else if (init) {
|
||||
setEnum(new LinearFuzzyTermsEnum());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implement fuzzy enumeration with linear brute force.
|
||||
*/
|
||||
private class LinearFuzzyTermsEnum extends FilteredTermsEnum {
|
||||
/* Allows us save time required to create a new array
|
||||
* every time similarity is called.
|
||||
*/
|
||||
private int[] d;
|
||||
private int[] p;
|
||||
|
||||
// this is the text, minus the prefix
|
||||
private final int[] text;
|
||||
|
||||
private final BoostAttribute boostAtt =
|
||||
attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
/**
|
||||
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
||||
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
|
||||
* <code>minSimilarity</code>.
|
||||
* <p>
|
||||
* After calling the constructor the enumeration is already pointing to the first
|
||||
* valid term if such a term exists.
|
||||
*
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
*/
|
||||
public LinearFuzzyTermsEnum() throws IOException {
|
||||
super(terms.iterator());
|
||||
|
||||
this.text = new int[termLength - realPrefixLength];
|
||||
System.arraycopy(termText, realPrefixLength, text, 0, text.length);
|
||||
final String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
||||
prefixBytesRef = new BytesRef(prefix);
|
||||
this.d = new int[this.text.length + 1];
|
||||
this.p = new int[this.text.length + 1];
|
||||
|
||||
setInitialSeekTerm(prefixBytesRef);
|
||||
}
|
||||
|
||||
private final BytesRef prefixBytesRef;
|
||||
// used for unicode conversion from BytesRef byte[] to int[]
|
||||
private final IntsRefBuilder utf32 = new IntsRefBuilder();
|
||||
|
||||
/**
|
||||
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
|
||||
* calculate the distance between the given term and the comparing term.
|
||||
* </p>
|
||||
* <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
|
||||
* Otherwise, this method uses the following logic to calculate similarity.
|
||||
* <pre>
|
||||
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
|
||||
* </pre>
|
||||
* where distance is the Levenshtein distance for the two words.
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
protected final AcceptStatus accept(BytesRef term) {
|
||||
if (StringHelper.startsWith(term, prefixBytesRef)) {
|
||||
utf32.copyUTF8Bytes(term);
|
||||
final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength);
|
||||
|
||||
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
|
||||
if (distance == Integer.MIN_VALUE){
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
//no need to calc similarity, if raw is true and distance > maxEdits
|
||||
if (raw == true && distance > maxEdits){
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length);
|
||||
|
||||
//if raw is true, then distance must also be <= maxEdits by now
|
||||
//given the previous if statement
|
||||
if (raw == true ||
|
||||
(raw == false && similarity > minSimilarity)) {
|
||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
} else {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
}
|
||||
|
||||
/******************************
|
||||
* Compute Levenshtein distance
|
||||
******************************/
|
||||
|
||||
/**
|
||||
* <p>calcDistance returns the Levenshtein distance between the query term
|
||||
* and the target term.</p>
|
||||
*
|
||||
* <p>Embedded within this algorithm is a fail-fast Levenshtein distance
|
||||
* algorithm. The fail-fast algorithm differs from the standard Levenshtein
|
||||
* distance algorithm in that it is aborted if it is discovered that the
|
||||
* minimum distance between the words is greater than some threshold.
|
||||
|
||||
* <p>Levenshtein distance (also known as edit distance) is a measure of similarity
|
||||
* between two strings where the distance is measured as the number of character
|
||||
* deletions, insertions or substitutions required to transform one string to
|
||||
* the other string.
|
||||
* @param target the target word or phrase
|
||||
* @param offset the offset at which to start the comparison
|
||||
* @param length the length of what's left of the string to compare
|
||||
* @return the number of edits or Integer.MIN_VALUE if the edit distance is
|
||||
* greater than maxDistance.
|
||||
*/
|
||||
private final int calcDistance(final int[] target, int offset, int length) {
|
||||
final int m = length;
|
||||
final int n = text.length;
|
||||
if (n == 0) {
|
||||
//we don't have anything to compare. That means if we just add
|
||||
//the letters for m we get the new word
|
||||
return m;
|
||||
}
|
||||
if (m == 0) {
|
||||
return n;
|
||||
}
|
||||
|
||||
final int maxDistance = calculateMaxDistance(m);
|
||||
|
||||
if (maxDistance < Math.abs(m-n)) {
|
||||
//just adding the characters of m to n or vice-versa results in
|
||||
//too many edits
|
||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
||||
//which is 8-3 or more precisely Math.abs(3-8).
|
||||
//if our maximum edit distance is 4, then we can discard this word
|
||||
//without looking at it.
|
||||
return Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
// init matrix d
|
||||
for (int i = 0; i <=n; ++i) {
|
||||
p[i] = i;
|
||||
}
|
||||
|
||||
// start computing edit distance
|
||||
for (int j = 1; j<=m; ++j) { // iterates through target
|
||||
int bestPossibleEditDistance = m;
|
||||
final int t_j = target[offset+j-1]; // jth character of t
|
||||
d[0] = j;
|
||||
|
||||
for (int i=1; i<=n; ++i) { // iterates through text
|
||||
// minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
|
||||
if (t_j != text[i-1]) {
|
||||
d[i] = Math.min(Math.min(d[i-1], p[i]), p[i-1]) + 1;
|
||||
} else {
|
||||
d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]);
|
||||
}
|
||||
bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i]);
|
||||
}
|
||||
|
||||
//After calculating row i, the best possible edit distance
|
||||
//can be found by found by finding the smallest value in a given column.
|
||||
//If the bestPossibleEditDistance is greater than the max distance, abort.
|
||||
|
||||
if (j > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater
|
||||
//the closest the target can be to the text is just too far away.
|
||||
//this target is leaving the party early.
|
||||
return Integer.MIN_VALUE;
|
||||
}
|
||||
|
||||
// copy current distance counts to 'previous row' distance counts: swap p and d
|
||||
int _d[] = p;
|
||||
p = d;
|
||||
d = _d;
|
||||
}
|
||||
|
||||
// our last action in the above loop was to switch d and p, so p now
|
||||
// actually has the most recent cost counts
|
||||
|
||||
return p[n];
|
||||
}
|
||||
|
||||
private float calcSimilarity(int edits, int m, int n){
|
||||
// this will return less than 0.0 when the edit distance is
|
||||
// greater than the number of characters in the shorter word.
|
||||
// but this was the formula that was previously used in FuzzyTermEnum,
|
||||
// so it has not been changed (even though minimumSimilarity must be
|
||||
// greater than 0.0)
|
||||
|
||||
return 1.0f - ((float)edits / (float) (realPrefixLength + Math.min(n, m)));
|
||||
}
|
||||
|
||||
/**
|
||||
* The max Distance is the maximum Levenshtein distance for the text
|
||||
* compared to some other value that results in score that is
|
||||
* better than the minimum similarity.
|
||||
* @param m the length of the "other value"
|
||||
* @return the maximum levenshtein distance that we care about
|
||||
*/
|
||||
private int calculateMaxDistance(int m) {
|
||||
return raw ? maxEdits : Math.min(maxEdits,
|
||||
(int)((1-minSimilarity) * (Math.min(text.length, m) + realPrefixLength)));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -77,7 +77,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
|
||||
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
|
||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||
flt.addTerms("smith", "name", 0.3f, 1);
|
||||
flt.addTerms("smith", "name", 2, 1);
|
||||
Query q = flt.rewrite(searcher.getIndexReader());
|
||||
HashSet<Term> queryTerms = new HashSet<>();
|
||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||
|
@ -94,7 +94,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
//Test multiple input words are having variants produced
|
||||
public void testMultiWord() throws Throwable {
|
||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
|
||||
flt.addTerms("jonathin smoth", "name", 2, 1);
|
||||
Query q = flt.rewrite(searcher.getIndexReader());
|
||||
HashSet<Term> queryTerms = new HashSet<>();
|
||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||
|
@ -110,8 +110,8 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
// LUCENE-4809
|
||||
public void testNonExistingField() throws Throwable {
|
||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
|
||||
flt.addTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
|
||||
flt.addTerms("jonathin smoth", "name", 2, 1);
|
||||
flt.addTerms("jonathin smoth", "this field does not exist", 2, 1);
|
||||
// don't fail here just because the field doesn't exits
|
||||
Query q = flt.rewrite(searcher.getIndexReader());
|
||||
HashSet<Term> queryTerms = new HashSet<>();
|
||||
|
@ -129,7 +129,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
//Test bug found when first query word does not match anything
|
||||
public void testNoMatchFirstWordBug() throws Throwable {
|
||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||
flt.addTerms("fernando smith", "name", 0.3f, 1);
|
||||
flt.addTerms("fernando smith", "name", 2, 1);
|
||||
Query q = flt.rewrite(searcher.getIndexReader());
|
||||
HashSet<Term> queryTerms = new HashSet<>();
|
||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||
|
@ -144,9 +144,9 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
public void testFuzzyLikeThisQueryEquals() {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
|
||||
fltq1.addTerms("javi", "subject", 0.5f, 2);
|
||||
fltq1.addTerms("javi", "subject", 2, 2);
|
||||
FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
|
||||
fltq2.addTerms("javi", "subject", 0.5f, 2);
|
||||
fltq2.addTerms("javi", "subject", 2, 2);
|
||||
assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
|
||||
fltq2);
|
||||
}
|
||||
|
|
|
@ -1,487 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.sandbox.queries;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Tests {@link SlowFuzzyQuery}.
|
||||
*
|
||||
*/
|
||||
public class TestSlowFuzzyQuery extends LuceneTestCase {
|
||||
|
||||
public void testFuzziness() throws Exception {
|
||||
//every test with SlowFuzzyQuery.defaultMinSimilarity
|
||||
//is exercising the Automaton, not the brute force linear method
|
||||
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("aaaaa", writer);
|
||||
addDoc("aaaab", writer);
|
||||
addDoc("aaabb", writer);
|
||||
addDoc("aabbb", writer);
|
||||
addDoc("abbbb", writer);
|
||||
addDoc("bbbbb", writer);
|
||||
addDoc("ddddd", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
|
||||
// same with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
// test scoring
|
||||
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals("3 documents should match", 3, hits.length);
|
||||
List<String> order = Arrays.asList("bbbbb","abbbb","aabbb");
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
final String term = searcher.doc(hits[i].doc).get("field");
|
||||
//System.out.println(hits[i].score);
|
||||
assertEquals(order.get(i), term);
|
||||
}
|
||||
|
||||
// test pq size by supplying maxExpansions=2
|
||||
// This query would normally return 3 documents, because 3 terms match (see above):
|
||||
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals("only 2 documents should match", 2, hits.length);
|
||||
order = Arrays.asList("bbbbb","abbbb");
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
final String term = searcher.doc(hits[i].doc).get("field");
|
||||
//System.out.println(hits[i].score);
|
||||
assertEquals(order.get(i), term);
|
||||
}
|
||||
|
||||
// not similar enough:
|
||||
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// query identical to a word in the index:
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
// default allows for up to two edits:
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
||||
|
||||
// query similar to a word in the index:
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
||||
|
||||
// now with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
||||
|
||||
// now with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
|
||||
// different field = no match:
|
||||
query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testFuzzinessLong2() throws Exception {
|
||||
//Lucene-5033
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("abcdef", writer);
|
||||
addDoc("segment", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
SlowFuzzyQuery query;
|
||||
|
||||
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0);
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testFuzzinessLong() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("aaaaaaa", writer);
|
||||
addDoc("segment", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
SlowFuzzyQuery query;
|
||||
// not similar enough:
|
||||
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
|
||||
// in testDefaultFuzziness so a bigger difference is allowed:
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
||||
|
||||
// now with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// no match, more than half of the characters is wrong:
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// now with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// "student" and "stellent" are indeed similar to "segment" by default:
|
||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
// now with prefix
|
||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// "student" doesn't match anymore thanks to increased minimum similarity:
|
||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
|
||||
});
|
||||
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
|
||||
});
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* MultiTermQuery provides (via attribute) information about which values
|
||||
* must be competitive to enter the priority queue.
|
||||
*
|
||||
* SlowFuzzyQuery optimizes itself around this information, if the attribute
|
||||
* is not implemented correctly, there will be problems!
|
||||
*/
|
||||
public void testTieBreaker() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("a123456", writer);
|
||||
addDoc("c123456", writer);
|
||||
addDoc("d123456", writer);
|
||||
addDoc("e123456", writer);
|
||||
|
||||
Directory directory2 = newDirectory();
|
||||
RandomIndexWriter writer2 = new RandomIndexWriter(random(), directory2);
|
||||
addDoc("a123456", writer2);
|
||||
addDoc("b123456", writer2);
|
||||
addDoc("b123456", writer2);
|
||||
addDoc("b123456", writer2);
|
||||
addDoc("c123456", writer2);
|
||||
addDoc("f123456", writer2);
|
||||
|
||||
IndexReader ir1 = writer.getReader();
|
||||
IndexReader ir2 = writer2.getReader();
|
||||
|
||||
MultiReader mr = new MultiReader(ir1, ir2);
|
||||
IndexSearcher searcher = newSearcher(mr);
|
||||
SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2);
|
||||
TopDocs docs = searcher.search(fq, 2);
|
||||
assertEquals(5, docs.totalHits); // 5 docs, from the a and b's
|
||||
mr.close();
|
||||
ir1.close();
|
||||
ir2.close();
|
||||
writer.close();
|
||||
writer2.close();
|
||||
directory.close();
|
||||
directory2.close();
|
||||
}
|
||||
|
||||
public void testTokenLengthOpt() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("12345678911", writer);
|
||||
addDoc("segment", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
Query query;
|
||||
// term not over 10 chars, so optimization shortcuts
|
||||
query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f);
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// 10 chars, so no optimization
|
||||
query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
// over 10 chars, so no optimization
|
||||
query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
||||
// over 10 chars, no match
|
||||
query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(0, hits.length);
|
||||
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
|
||||
public void testBoostOnlyRewrite() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
||||
addDoc("Lucene", writer);
|
||||
addDoc("Lucene", writer);
|
||||
addDoc("Lucenne", writer);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
|
||||
query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
|
||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
// normally, 'Lucenne' would be the first result as IDF will skew the score.
|
||||
assertEquals("Lucene", reader.document(hits[0].doc).get("field"));
|
||||
assertEquals("Lucene", reader.document(hits[1].doc).get("field"));
|
||||
assertEquals("Lucenne", reader.document(hits[2].doc).get("field"));
|
||||
reader.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testGiga() throws Exception {
|
||||
|
||||
Directory index = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), index);
|
||||
|
||||
addDoc("Lucene in Action", w);
|
||||
addDoc("Lucene for Dummies", w);
|
||||
|
||||
//addDoc("Giga", w);
|
||||
addDoc("Giga byte", w);
|
||||
|
||||
addDoc("ManagingGigabytesManagingGigabyte", w);
|
||||
addDoc("ManagingGigabytesManagingGigabytes", w);
|
||||
|
||||
addDoc("The Art of Computer Science", w);
|
||||
addDoc("J. K. Rowling", w);
|
||||
addDoc("JK Rowling", w);
|
||||
addDoc("Joanne K Roling", w);
|
||||
addDoc("Bruce Willis", w);
|
||||
addDoc("Willis bruce", w);
|
||||
addDoc("Brute willis", w);
|
||||
addDoc("B. willis", w);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f);
|
||||
|
||||
// 3. search
|
||||
IndexSearcher searcher = newSearcher(r);
|
||||
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("Giga byte", searcher.doc(hits[0].doc).get("field"));
|
||||
r.close();
|
||||
index.close();
|
||||
}
|
||||
|
||||
public void testDistanceAsEditsSearching() throws Exception {
|
||||
Directory index = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), index);
|
||||
addDoc("foobar", w);
|
||||
addDoc("test", w);
|
||||
addDoc("working", w);
|
||||
IndexReader reader = w.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
w.close();
|
||||
|
||||
SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2);
|
||||
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "foubara"), 2);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
|
||||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
||||
hits = searcher.search(q, 10).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
||||
|
||||
reader.close();
|
||||
index.close();
|
||||
}
|
||||
|
||||
private void addDoc(String text, RandomIndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(newTextField("field", text, Field.Store.YES));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
}
|
|
@ -415,40 +415,42 @@ public class DirectSpellChecker {
|
|||
BoostAttribute boostAtt =
|
||||
e.attributes().addAttribute(BoostAttribute.class);
|
||||
while ((candidateTerm = e.next()) != null) {
|
||||
final float boost = boostAtt.getBoost();
|
||||
// For FuzzyQuery, boost is the score:
|
||||
float score = boostAtt.getBoost();
|
||||
// ignore uncompetitive hits
|
||||
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
|
||||
if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// ignore exact match of the same term
|
||||
if (queryTerm.bytesEquals(candidateTerm))
|
||||
if (queryTerm.bytesEquals(candidateTerm)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int df = e.docFreq();
|
||||
|
||||
// check docFreq if required
|
||||
if (df <= docfreq)
|
||||
if (df <= docfreq) {
|
||||
continue;
|
||||
}
|
||||
|
||||
final float score;
|
||||
final String termAsString;
|
||||
if (distance == INTERNAL_LEVENSHTEIN) {
|
||||
// delay creating strings until the end
|
||||
termAsString = null;
|
||||
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
|
||||
score = boost / e.getScaleFactor() + e.getMinSimilarity();
|
||||
} else {
|
||||
spare.copyUTF8Bytes(candidateTerm);
|
||||
termAsString = spare.toString();
|
||||
score = distance.getDistance(term.text(), termAsString);
|
||||
}
|
||||
|
||||
if (score < accuracy)
|
||||
if (score < accuracy) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// add new entry in PQ
|
||||
st.term = BytesRef.deepCopyOf(candidateTerm);
|
||||
st.boost = boost;
|
||||
st.boost = score;
|
||||
st.docfreq = df;
|
||||
st.termAsString = termAsString;
|
||||
st.score = score;
|
||||
|
|
|
@ -77,6 +77,15 @@ prefix, then you will now get an error as these options are incompatible with nu
|
|||
|
||||
New Features
|
||||
----------------------
|
||||
* SOLR-5725: facet.method=enum can bypass exact counts calculation with facet.exists=true, it just returns 1 for
|
||||
terms which exists in result docset. (Alexey Kozhemiakin, Sebastian Koziel, Radoslaw Zielinski via Mikhail Khludnev)
|
||||
|
||||
* SOLR-9127: Excel workbook (.xlsx) response writer. use 'wt=xlsx' (Tony Moriarty, noble)
|
||||
|
||||
* SOLR-9469: JettySolrRunner now has the option of restarting using a different
|
||||
port (Alan Woodward)
|
||||
|
||||
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
@ -103,12 +112,19 @@ Bug Fixes
|
|||
|
||||
* SOLR-9461: DELETENODE, REPLACENODE should pass down the 'async' param to subcommands (shalin, noble)
|
||||
|
||||
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble )
|
||||
|
||||
* SOLR-9444: Fix path usage for cloud backup/restore. (Hrishikesh Gadre, Uwe Schindler, Varun Thacker)
|
||||
|
||||
* SOLR-9381: Snitch for freedisk uses '/' instead of 'coreRootDirectory' (Tim Owen, noble)
|
||||
|
||||
* SOLR-9488: Shard split can fail to write commit data on shutdown/restart causing replicas to recover
|
||||
without replicating the index. This can cause data loss. (shalin)
|
||||
|
||||
* SOLR-9490: Fixed bugs in BoolField that caused it to erroneously return "false" for all docs depending
|
||||
on usage (Colvin Cowie, Dan Fox, hossman)
|
||||
|
||||
* SOLR-9438: Shard split can be marked successful and sub-shard states switched to 'active' even when
|
||||
one or more sub-shards replicas do not recover due to the leader crashing or restarting between the time
|
||||
the replicas are created and before they can recover. This can cause data loss. (shalin)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
@ -135,6 +151,27 @@ Other Changes
|
|||
|
||||
* SOLR-9406: SolrSuggester should selectively register close hook (Gethin James, Joel Bernstein)
|
||||
|
||||
* SOLR-8961: Add a test module for solr-test-framework (Alan Woodward)
|
||||
|
||||
* SOLR-9474: MiniSolrCloudCluster will not reuse ports by default when
|
||||
restarting its JettySolrRunners (Alan Woodward)
|
||||
|
||||
* SOLR-9498: Remove HDFS properties from DIH solrconfig.xml, as started in SOLR-6943 (Alexandre Rafalovitch)
|
||||
|
||||
* SOLR-9365: Reduce noise in solr logs during graceful shutdown. (Cao Manh Dat via shalin)
|
||||
|
||||
================== 6.2.1 ==================
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
* SOLR-9494: Use of {!collapse} sometimes doesn't correctly return true for Collector.needsScores(), especially when the
|
||||
query was cached. This can cause an exception when 'q' is a SpanQuery or potentially others. (David Smiley)
|
||||
|
||||
* SOLR-9408: Fix TreeMergeOutputFormat to add timestamp metadata to a commit. SolrCloud replication relies on this.
|
||||
(Jessica Cheng Mallet via Varun Thacker)
|
||||
|
||||
|
||||
================== 6.2.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
@ -1168,6 +1205,23 @@ Other Changes
|
|||
* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
|
||||
(David Smiley)
|
||||
|
||||
======================= 5.5.3 =======================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
||||
Versions of Major Components
|
||||
---------------------
|
||||
Apache Tika 1.13
|
||||
Carrot2 3.12.0
|
||||
Velocity 1.7 and Velocity Tools 2.0
|
||||
Apache UIMA 2.3.1
|
||||
Apache ZooKeeper 3.4.6
|
||||
Jetty 9.3.8.v20160314
|
||||
|
||||
|
||||
(No Changes)
|
||||
|
||||
|
||||
======================= 5.5.2 =======================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -0,0 +1,414 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.extraction;
|
||||
|
||||
import java.io.CharArrayWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Font;
|
||||
import org.apache.poi.ss.usermodel.IndexedColors;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.BasicResultContext;
|
||||
import org.apache.solr.response.RawResponseWriter;
|
||||
import org.apache.solr.response.ResultContext;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.StrField;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.ReturnFields;
|
||||
|
||||
public class XLSXResponseWriter extends RawResponseWriter {
|
||||
|
||||
@Override
|
||||
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
|
||||
// throw away arraywriter just to satisfy super requirements; we're grabbing
|
||||
// all writes before they go to it anyway
|
||||
XLSXWriter w = new XLSXWriter(new CharArrayWriter(), req, rsp);
|
||||
|
||||
LinkedHashMap<String,String> reqNamesMap = new LinkedHashMap<>();
|
||||
LinkedHashMap<String,Integer> reqWidthsMap = new LinkedHashMap<>();
|
||||
|
||||
Iterator<String> paramNamesIter = req.getParams().getParameterNamesIterator();
|
||||
while (paramNamesIter.hasNext()) {
|
||||
String nextParam = paramNamesIter.next();
|
||||
if (nextParam.startsWith("colname.")) {
|
||||
String field = nextParam.substring("colname.".length());
|
||||
reqNamesMap.put(field, req.getParams().get(nextParam));
|
||||
} else if (nextParam.startsWith("colwidth.")) {
|
||||
String field = nextParam.substring("colwidth.".length());
|
||||
reqWidthsMap.put(field, req.getParams().getInt(nextParam));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
w.writeResponse(out, reqNamesMap, reqWidthsMap);
|
||||
} finally {
|
||||
w.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
|
||||
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
||||
}
|
||||
}
|
||||
|
||||
class XLSXWriter extends TextResponseWriter {
|
||||
|
||||
SolrQueryRequest req;
|
||||
SolrQueryResponse rsp;
|
||||
|
||||
class SerialWriteWorkbook {
|
||||
SXSSFWorkbook swb;
|
||||
Sheet sh;
|
||||
|
||||
XSSFCellStyle headerStyle;
|
||||
int rowIndex;
|
||||
Row curRow;
|
||||
int cellIndex;
|
||||
|
||||
SerialWriteWorkbook() {
|
||||
this.swb = new SXSSFWorkbook(100);
|
||||
this.sh = this.swb.createSheet();
|
||||
|
||||
this.rowIndex = 0;
|
||||
|
||||
this.headerStyle = (XSSFCellStyle)swb.createCellStyle();
|
||||
this.headerStyle.setFillBackgroundColor(IndexedColors.BLACK.getIndex());
|
||||
//solid fill
|
||||
this.headerStyle.setFillPattern((short)1);
|
||||
Font headerFont = swb.createFont();
|
||||
headerFont.setFontHeightInPoints((short)14);
|
||||
headerFont.setBoldweight(Font.BOLDWEIGHT_BOLD);
|
||||
headerFont.setColor(IndexedColors.WHITE.getIndex());
|
||||
this.headerStyle.setFont(headerFont);
|
||||
}
|
||||
|
||||
void addRow() {
|
||||
curRow = sh.createRow(rowIndex++);
|
||||
cellIndex = 0;
|
||||
}
|
||||
|
||||
void setHeaderRow() {
|
||||
curRow.setHeightInPoints((short)21);
|
||||
}
|
||||
|
||||
//sets last created cell to have header style
|
||||
void setHeaderCell() {
|
||||
curRow.getCell(cellIndex - 1).setCellStyle(this.headerStyle);
|
||||
}
|
||||
|
||||
//set the width of the most recently created column
|
||||
void setColWidth(int charWidth) {
|
||||
//width in poi is units of 1/256th of a character width for some reason
|
||||
this.sh.setColumnWidth(cellIndex - 1, 256*charWidth);
|
||||
}
|
||||
|
||||
void writeCell(String value) {
|
||||
Cell cell = curRow.createCell(cellIndex++);
|
||||
cell.setCellValue(value);
|
||||
}
|
||||
|
||||
void flush(OutputStream out) {
|
||||
try {
|
||||
swb.write(out);
|
||||
} catch (IOException e) {
|
||||
StringWriter sw = new StringWriter();
|
||||
e.printStackTrace(new PrintWriter(sw));
|
||||
String stacktrace = sw.toString();
|
||||
}finally {
|
||||
swb.dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SerialWriteWorkbook wb = new SerialWriteWorkbook();
|
||||
|
||||
static class XLField {
|
||||
String name;
|
||||
SchemaField sf;
|
||||
}
|
||||
|
||||
private Map<String,XLField> xlFields = new LinkedHashMap<String,XLField>();
|
||||
|
||||
public XLSXWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp){
|
||||
super(writer, req, rsp);
|
||||
this.req = req;
|
||||
this.rsp = rsp;
|
||||
}
|
||||
|
||||
public void writeResponse(OutputStream out, LinkedHashMap<String, String> colNamesMap,
|
||||
LinkedHashMap<String, Integer> colWidthsMap) throws IOException {
|
||||
SolrParams params = req.getParams();
|
||||
|
||||
Collection<String> fields = returnFields.getRequestedFieldNames();
|
||||
Object responseObj = rsp.getValues().get("response");
|
||||
boolean returnOnlyStored = false;
|
||||
if (fields==null||returnFields.hasPatternMatching()) {
|
||||
if (responseObj instanceof SolrDocumentList) {
|
||||
// get the list of fields from the SolrDocumentList
|
||||
if(fields==null) {
|
||||
fields = new LinkedHashSet<String>();
|
||||
}
|
||||
for (SolrDocument sdoc: (SolrDocumentList)responseObj) {
|
||||
fields.addAll(sdoc.getFieldNames());
|
||||
}
|
||||
} else {
|
||||
// get the list of fields from the index
|
||||
Iterable<String> all = req.getSearcher().getFieldNames();
|
||||
if (fields == null) {
|
||||
fields = Sets.newHashSet(all);
|
||||
} else {
|
||||
Iterables.addAll(fields, all);
|
||||
}
|
||||
}
|
||||
if (returnFields.wantsScore()) {
|
||||
fields.add("score");
|
||||
} else {
|
||||
fields.remove("score");
|
||||
}
|
||||
returnOnlyStored = true;
|
||||
}
|
||||
|
||||
for (String field : fields) {
|
||||
if (!returnFields.wantsField(field)) {
|
||||
continue;
|
||||
}
|
||||
if (field.equals("score")) {
|
||||
XLField xlField = new XLField();
|
||||
xlField.name = "score";
|
||||
xlFields.put("score", xlField);
|
||||
continue;
|
||||
}
|
||||
|
||||
SchemaField sf = schema.getFieldOrNull(field);
|
||||
if (sf == null) {
|
||||
FieldType ft = new StrField();
|
||||
sf = new SchemaField(field, ft);
|
||||
}
|
||||
|
||||
// Return only stored fields, unless an explicit field list is specified
|
||||
if (returnOnlyStored && sf != null && !sf.stored()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
XLField xlField = new XLField();
|
||||
xlField.name = field;
|
||||
xlField.sf = sf;
|
||||
xlFields.put(field, xlField);
|
||||
}
|
||||
|
||||
|
||||
|
||||
wb.addRow();
|
||||
//write header
|
||||
for (XLField xlField : xlFields.values()) {
|
||||
String printName = xlField.name;
|
||||
int colWidth = 14;
|
||||
|
||||
String niceName = colNamesMap.get(xlField.name);
|
||||
if (niceName != null) {
|
||||
printName = niceName;
|
||||
}
|
||||
|
||||
Integer niceWidth = colWidthsMap.get(xlField.name);
|
||||
if (niceWidth != null) {
|
||||
colWidth = niceWidth.intValue();
|
||||
}
|
||||
|
||||
writeStr(xlField.name, printName, false);
|
||||
wb.setColWidth(colWidth);
|
||||
wb.setHeaderCell();
|
||||
}
|
||||
wb.setHeaderRow();
|
||||
wb.addRow();
|
||||
|
||||
if (responseObj instanceof ResultContext) {
|
||||
writeDocuments(null, (ResultContext)responseObj );
|
||||
}
|
||||
else if (responseObj instanceof DocList) {
|
||||
ResultContext ctx = new BasicResultContext((DocList)responseObj, returnFields, null, null, req);
|
||||
writeDocuments(null, ctx );
|
||||
} else if (responseObj instanceof SolrDocumentList) {
|
||||
writeSolrDocumentList(null, (SolrDocumentList)responseObj, returnFields );
|
||||
}
|
||||
|
||||
wb.flush(out);
|
||||
wb = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeNamedList(String name, NamedList val) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeStartDocumentList(String name,
|
||||
long start, int size, long numFound, Float maxScore) throws IOException
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeEndDocumentList() throws IOException
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
//NOTE: a document cannot currently contain another document
|
||||
List tmpList;
|
||||
@Override
|
||||
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx ) throws IOException {
|
||||
if (tmpList == null) {
|
||||
tmpList = new ArrayList(1);
|
||||
tmpList.add(null);
|
||||
}
|
||||
|
||||
for (XLField xlField : xlFields.values()) {
|
||||
Object val = doc.getFieldValue(xlField.name);
|
||||
int nVals = val instanceof Collection ? ((Collection)val).size() : (val==null ? 0 : 1);
|
||||
if (nVals == 0) {
|
||||
writeNull(xlField.name);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((xlField.sf != null && xlField.sf.multiValued()) || nVals > 1) {
|
||||
Collection values;
|
||||
// normalize to a collection
|
||||
if (val instanceof Collection) {
|
||||
values = (Collection)val;
|
||||
} else {
|
||||
tmpList.set(0, val);
|
||||
values = tmpList;
|
||||
}
|
||||
|
||||
writeArray(xlField.name, values.iterator());
|
||||
|
||||
} else {
|
||||
// normalize to first value
|
||||
if (val instanceof Collection) {
|
||||
Collection values = (Collection)val;
|
||||
val = values.iterator().next();
|
||||
}
|
||||
writeVal(xlField.name, val);
|
||||
}
|
||||
}
|
||||
wb.addRow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeArray(String name, Iterator val) throws IOException {
|
||||
StringBuffer output = new StringBuffer();
|
||||
while (val.hasNext()) {
|
||||
Object v = val.next();
|
||||
if (v instanceof IndexableField) {
|
||||
IndexableField f = (IndexableField)v;
|
||||
if (v instanceof Date) {
|
||||
output.append(((Date) val).toInstant().toString() + "; ");
|
||||
} else {
|
||||
output.append(f.stringValue() + "; ");
|
||||
}
|
||||
} else {
|
||||
output.append(v.toString() + "; ");
|
||||
}
|
||||
}
|
||||
if (output.length() > 0) {
|
||||
output.deleteCharAt(output.length()-1);
|
||||
output.deleteCharAt(output.length()-1);
|
||||
}
|
||||
writeStr(name, output.toString(), false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeNull(String name) throws IOException {
|
||||
wb.writeCell("");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeInt(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeLong(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBool(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeFloat(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeDouble(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeDate(String name, Date val) throws IOException {
|
||||
writeDate(name, val.toInstant().toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeDate(String name, String val) throws IOException {
|
||||
wb.writeCell(val);
|
||||
}
|
||||
}
|
|
@ -415,6 +415,7 @@
|
|||
-->
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
|
@ -422,6 +423,7 @@
|
|||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_dt1" type="date" indexed="true" stored="true" multiValued="false"/>
|
||||
|
||||
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||
|
|
|
@ -0,0 +1,257 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.extraction;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.time.Instant;
|
||||
import java.util.Date;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.QueryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.response.RawResponseWriter;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestXLSXResponseWriter extends SolrTestCaseJ4 {
|
||||
|
||||
private static XLSXResponseWriter writerXlsx;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
System.setProperty("enable.update.log", "false");
|
||||
initCore("solrconfig.xml","schema.xml",getFile("extraction/solr").getAbsolutePath());
|
||||
createIndex();
|
||||
//find a reference to the default response writer so we can redirect its output later
|
||||
SolrCore testCore = h.getCore();
|
||||
QueryResponseWriter writer = testCore.getQueryResponseWriter("xlsx");
|
||||
if (writer instanceof XLSXResponseWriter) {
|
||||
writerXlsx = (XLSXResponseWriter) testCore.getQueryResponseWriter("xlsx");
|
||||
} else {
|
||||
throw new Exception("XLSXResponseWriter not registered with solr core");
|
||||
}
|
||||
}
|
||||
|
||||
public static void createIndex() {
|
||||
assertU(adoc("id","1", "foo_i","-1", "foo_s","hi", "foo_l","12345678987654321", "foo_b","false", "foo_f","1.414","foo_d","-1.0E300","foo_dt1","2000-01-02T03:04:05Z"));
|
||||
assertU(adoc("id","2", "v_ss","hi", "v_ss","there", "v2_ss","nice", "v2_ss","output", "shouldbeunstored","foo"));
|
||||
assertU(adoc("id","3", "shouldbeunstored","foo"));
|
||||
assertU(adoc("id","4", "foo_s1","foo"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void cleanupWriter() throws Exception {
|
||||
writerXlsx = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStructuredDataViaBaseWriters() throws IOException, Exception {
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
// Don't send a ContentStream back, this will fall back to the configured base writer.
|
||||
// But abuse the CONTENT key to ensure writer is also checking type
|
||||
rsp.add(RawResponseWriter.CONTENT, "test");
|
||||
rsp.add("foo", "bar");
|
||||
|
||||
SolrQueryRequest r = req();
|
||||
|
||||
// check Content-Type
|
||||
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", writerXlsx.getContentType(r, rsp));
|
||||
|
||||
// test our basic types,and that fields come back in the requested order
|
||||
XSSFSheet resultSheet = getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1"));
|
||||
|
||||
assertEquals("id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1\n1,hi,-1,12345678987654321,F,1.414,-1.0E300,2000-01-02T03:04:05Z\n"
|
||||
, getStringFromSheet(resultSheet));
|
||||
|
||||
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "fl","id,score,foo_s"));
|
||||
// test retrieving score
|
||||
assertEquals("id,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
|
||||
|
||||
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "colname.id", "I.D.", "colwidth.id", "10",
|
||||
"fl","id,score,foo_s"));
|
||||
// test override colname/width
|
||||
assertEquals("I.D.,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
|
||||
// test colwidth (value returned is in 256ths of a character as per excel standard)
|
||||
assertEquals(10*256, resultSheet.getColumnWidth(0));
|
||||
|
||||
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,v_ss"));
|
||||
// test multivalued
|
||||
assertEquals("id,v_ss\n2,hi; there\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// test retrieving fields from index
|
||||
resultSheet = getWSResultForQuery(req("q","*:*", "wt","xslx", "fl","*,score"));
|
||||
String result = getStringFromSheet(resultSheet);
|
||||
for (String field : "id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss,score".split(",")) {
|
||||
assertTrue(result.indexOf(field) >= 0);
|
||||
}
|
||||
|
||||
// test null values
|
||||
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,foo_s,v_ss"));
|
||||
assertEquals("id,foo_s,v_ss\n2,,hi; there\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// now test SolrDocumentList
|
||||
SolrDocument d = new SolrDocument();
|
||||
SolrDocument d1 = d;
|
||||
d.addField("id","1");
|
||||
d.addField("foo_i",-1);
|
||||
d.addField("foo_s","hi");
|
||||
d.addField("foo_l","12345678987654321L");
|
||||
d.addField("foo_b",false);
|
||||
d.addField("foo_f",1.414f);
|
||||
d.addField("foo_d",-1.0E300);
|
||||
d.addField("foo_dt1", new Date(Instant.parse("2000-01-02T03:04:05Z").toEpochMilli()));
|
||||
d.addField("score", "2.718");
|
||||
|
||||
d = new SolrDocument();
|
||||
SolrDocument d2 = d;
|
||||
d.addField("id","2");
|
||||
d.addField("v_ss","hi");
|
||||
d.addField("v_ss","there");
|
||||
d.addField("v2_ss","nice");
|
||||
d.addField("v2_ss","output");
|
||||
d.addField("score", "89.83");
|
||||
d.addField("shouldbeunstored","foo");
|
||||
|
||||
SolrDocumentList sdl = new SolrDocumentList();
|
||||
sdl.add(d1);
|
||||
sdl.add(d2);
|
||||
|
||||
SolrQueryRequest req = req("q","*:*");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.addResponse(sdl);
|
||||
|
||||
rsp.setReturnFields( new SolrReturnFields("id,foo_s", req) );
|
||||
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("id,foo_s\n1,hi\n2,\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// try scores
|
||||
rsp.setReturnFields( new SolrReturnFields("id,score,foo_s", req) );
|
||||
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("id,score,foo_s\n1,2.718,hi\n2,89.83,\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// get field values from docs... should be ordered and not include score unless requested
|
||||
rsp.setReturnFields( new SolrReturnFields("*", req) );
|
||||
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss\n" +
|
||||
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z,,\n" +
|
||||
"2,,,,,,,,hi; there,nice; output\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// get field values and scores - just check that the scores are there... we don't guarantee where
|
||||
rsp.setReturnFields( new SolrReturnFields("*,score", req) );
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
String s = getStringFromSheet(resultSheet);
|
||||
assertTrue(s.indexOf("score") >=0 && s.indexOf("2.718") > 0 && s.indexOf("89.83") > 0 );
|
||||
|
||||
// Test field globs
|
||||
rsp.setReturnFields( new SolrReturnFields("id,foo*", req) );
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1\n" +
|
||||
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z\n" +
|
||||
"2,,,,,,,\n", getStringFromSheet(resultSheet));
|
||||
|
||||
rsp.setReturnFields( new SolrReturnFields("id,*_d*", req) );
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("id,foo_d,foo_dt1\n" +
|
||||
"1,-1.0E300,2000-01-02T03:04:05Z\n" +
|
||||
"2,,\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// Test function queries
|
||||
rsp.setReturnFields( new SolrReturnFields("sum(1,1),id,exists(foo_s1),div(9,1),foo_f", req) );
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("sum(1,1),id,exists(foo_s1),div(9,1),foo_f\n" +
|
||||
",1,,,1.414\n" +
|
||||
",2,,,\n", getStringFromSheet(resultSheet));
|
||||
|
||||
// Test transformers
|
||||
rsp.setReturnFields( new SolrReturnFields("mydocid:[docid],[explain]", req) );
|
||||
resultSheet = getWSResultForQuery(req, rsp);
|
||||
assertEquals("mydocid,[explain]\n" +
|
||||
",\n" +
|
||||
",\n", getStringFromSheet(resultSheet));
|
||||
|
||||
req.close();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPseudoFields() throws Exception {
|
||||
// Use Pseudo Field
|
||||
SolrQueryRequest req = req("q","id:1", "wt","xlsx", "fl","XXX:id,foo_s");
|
||||
XSSFSheet resultSheet = getWSResultForQuery(req);
|
||||
assertEquals("XXX,foo_s\n1,hi\n", getStringFromSheet(resultSheet));
|
||||
|
||||
String txt = getStringFromSheet(getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","XXX:id,YYY:[docid],FOO:foo_s")));
|
||||
String[] lines = txt.split("\n");
|
||||
assertEquals(2, lines.length);
|
||||
assertEquals("XXX,YYY,FOO", lines[0] );
|
||||
assertEquals("1,0,hi", lines[1] );
|
||||
|
||||
//assertions specific to multiple pseudofields functions like abs, div, exists, etc.. (SOLR-5423)
|
||||
String funcText = getStringFromSheet(getWSResultForQuery(req("q","*", "wt","xlsx", "fl","XXX:id,YYY:exists(foo_s1)")));
|
||||
String[] funcLines = funcText.split("\n");
|
||||
assertEquals(5, funcLines.length);
|
||||
assertEquals("XXX,YYY", funcLines[0] );
|
||||
assertEquals("1,false", funcLines[1] );
|
||||
assertEquals("3,false", funcLines[3] );
|
||||
}
|
||||
|
||||
// returns first worksheet as XLSXResponseWriter only returns one sheet
|
||||
private XSSFSheet getWSResultForQuery(SolrQueryRequest req) throws IOException, Exception {
|
||||
SolrQueryResponse rsp = h.queryAndResponse("standard", req);
|
||||
return getWSResultForQuery(req, rsp);
|
||||
}
|
||||
|
||||
private XSSFSheet getWSResultForQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, Exception {
|
||||
ByteArrayOutputStream xmlBout = new ByteArrayOutputStream();
|
||||
writerXlsx.write(xmlBout, req, rsp);
|
||||
XSSFWorkbook output = new XSSFWorkbook(new ByteArrayInputStream(xmlBout.toByteArray()));
|
||||
XSSFSheet sheet = output.getSheetAt(0);
|
||||
req.close();
|
||||
output.close();
|
||||
return sheet;
|
||||
}
|
||||
|
||||
private String getStringFromSheet(XSSFSheet sheet) {
|
||||
StringBuilder output = new StringBuilder();
|
||||
for (Row row: sheet) {
|
||||
for (Cell cell: row) {
|
||||
output.append(cell.getStringCellValue());
|
||||
output.append(",");
|
||||
}
|
||||
output.setLength(output.length() - 1);
|
||||
output.append("\n");
|
||||
}
|
||||
return output.toString();
|
||||
}
|
||||
}
|
|
@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.NullWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
|
@ -40,12 +41,11 @@ import org.apache.lucene.index.TieredMergePolicy;
|
|||
import org.apache.lucene.misc.IndexMergeTool;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.solr.store.hdfs.HdfsDirectory;
|
||||
import org.apache.solr.update.SolrIndexWriter;
|
||||
import org.apache.solr.util.RTimer;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
|
||||
/**
|
||||
* See {@link IndexMergeTool}.
|
||||
*/
|
||||
|
@ -151,7 +151,7 @@ public class TreeMergeOutputFormat extends FileOutputFormat<Text, NullWritable>
|
|||
LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
|
||||
timer = new RTimer();
|
||||
if (maxSegments < Integer.MAX_VALUE) {
|
||||
writer.forceMerge(maxSegments);
|
||||
writer.forceMerge(maxSegments);
|
||||
// TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data
|
||||
// see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
|
||||
}
|
||||
|
@ -161,6 +161,10 @@ public class TreeMergeOutputFormat extends FileOutputFormat<Text, NullWritable>
|
|||
}
|
||||
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime());
|
||||
|
||||
// Set Solr's commit data so the created index is usable by SolrCloud. E.g. Currently SolrCloud relies on
|
||||
// commitTimeMSec in the commit data to do replication.
|
||||
SolrIndexWriter.setCommitData(writer);
|
||||
|
||||
timer = new RTimer();
|
||||
LOG.info("Optimizing Solr: Closing index writer");
|
||||
writer.close();
|
||||
|
|
|
@ -307,9 +307,24 @@ public class JettySolrRunner {
|
|||
/**
|
||||
* Start the Jetty server
|
||||
*
|
||||
* If the server has been started before, it will restart using the same port
|
||||
*
|
||||
* @throws Exception if an error occurs on startup
|
||||
*/
|
||||
public void start() throws Exception {
|
||||
start(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the Jetty server
|
||||
*
|
||||
* @param reusePort when true, will start up on the same port as used by any
|
||||
* previous runs of this JettySolrRunner. If false, will use
|
||||
* the port specified by the server's JettyConfig.
|
||||
*
|
||||
* @throws Exception if an error occurs on startup
|
||||
*/
|
||||
public void start(boolean reusePort) throws Exception {
|
||||
// Do not let Jetty/Solr pollute the MDC for this thread
|
||||
Map<String, String> prevContext = MDC.getCopyOfContextMap();
|
||||
MDC.clear();
|
||||
|
@ -317,7 +332,8 @@ public class JettySolrRunner {
|
|||
// if started before, make a new server
|
||||
if (startedBefore) {
|
||||
waitOnSolr = false;
|
||||
init(lastPort);
|
||||
int port = reusePort ? lastPort : this.config.port;
|
||||
init(port);
|
||||
} else {
|
||||
startedBefore = true;
|
||||
}
|
||||
|
|
|
@ -79,7 +79,7 @@ public class DeleteShardCmd implements Cmd {
|
|||
// TODO: Add check for range gaps on Slice deletion
|
||||
final Slice.State state = slice.getState();
|
||||
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY
|
||||
|| state == Slice.State.CONSTRUCTION)) {
|
||||
|| state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state
|
||||
+ ". Only non-active (or custom-hashed) slices can be deleted.");
|
||||
}
|
||||
|
|
|
@ -288,8 +288,12 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
|
||||
ActionThrottle lt;
|
||||
try (SolrCore core = cc.getCore(coreName)) {
|
||||
if (core == null) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||
if (core == null ) {
|
||||
if (cc.isShutDown()) {
|
||||
return;
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||
}
|
||||
}
|
||||
MDCLoggingContext.setCore(core);
|
||||
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
|
||||
|
@ -325,9 +329,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||
try (SolrCore core = cc.getCore(coreName)) {
|
||||
|
||||
if (core == null) {
|
||||
cancelElection();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||
if (!zkController.getCoreContainer().isShutDown()) {
|
||||
cancelElection();
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// should I be leader?
|
||||
|
|
|
@ -347,7 +347,9 @@ public class LeaderElector {
|
|||
// am I the next leader?
|
||||
checkIfIamLeader(context, true);
|
||||
} catch (Exception e) {
|
||||
log.warn("", e);
|
||||
if (!zkClient.isClosed()) {
|
||||
log.warn("", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -176,7 +176,9 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||
try {
|
||||
prioritizer.prioritizeOverseerNodes(myId);
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to prioritize overseer ", e);
|
||||
if (!zkStateReader.getZkClient().isClosed()) {
|
||||
log.error("Unable to prioritize overseer ", e);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Make maxThreads configurable.
|
||||
|
|
|
@ -582,7 +582,7 @@ public class RecoveryStrategy extends Thread implements Closeable {
|
|||
prepCmd.setCheckLive(true);
|
||||
prepCmd.setOnlyIfLeader(true);
|
||||
final Slice.State state = slice.getState();
|
||||
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY) {
|
||||
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
|
||||
prepCmd.setOnlyIfLeaderActive(true);
|
||||
}
|
||||
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.handler.component.ShardHandler;
|
||||
import org.apache.solr.util.TestInjection;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -126,6 +127,13 @@ public class SplitShardCmd implements Cmd {
|
|||
Thread.currentThread().interrupt();
|
||||
}
|
||||
|
||||
// let's record the ephemeralOwner of the parent leader node
|
||||
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
|
||||
if (leaderZnodeStat == null) {
|
||||
// we just got to know the leader but its live node is gone already!
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
|
||||
}
|
||||
|
||||
DocRouter.Range range = parentSlice.getRange();
|
||||
if (range == null) {
|
||||
range = new PlainIdRouter().fullRange();
|
||||
|
@ -253,6 +261,8 @@ public class SplitShardCmd implements Cmd {
|
|||
propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
|
||||
propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
|
||||
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
|
||||
propMap.put("shard_parent_node", parentShardLeader.getNodeName());
|
||||
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||
|
||||
|
@ -420,6 +430,32 @@ public class SplitShardCmd implements Cmd {
|
|||
|
||||
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
|
||||
|
||||
long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
|
||||
// compare against the ephemeralOwner of the parent leader node
|
||||
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
|
||||
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||
// put sub-shards in recovery_failed state
|
||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||
for (String subSlice : subSlices) {
|
||||
propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
inQueue.offer(Utils.toJSON(m));
|
||||
|
||||
if (leaderZnodeStat == null) {
|
||||
// the leader is not live anymore, fail the split!
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
|
||||
} else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||
// there's a new leader, fail the split!
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from "
|
||||
+ ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
|
||||
}
|
||||
}
|
||||
|
||||
// we must set the slice state into recovery before actually creating the replica cores
|
||||
// this ensures that the logic inside Overseer to update sub-shard state to 'active'
|
||||
// always gets a chance to execute. See SOLR-7673
|
||||
|
|
|
@ -59,11 +59,19 @@ public class CollectionMutator {
|
|||
String shardRange = message.getStr(ZkStateReader.SHARD_RANGE_PROP);
|
||||
String shardState = message.getStr(ZkStateReader.SHARD_STATE_PROP);
|
||||
String shardParent = message.getStr(ZkStateReader.SHARD_PARENT_PROP);
|
||||
String shardParentZkSession = message.getStr("shard_parent_zk_session");
|
||||
String shardParentNode = message.getStr("shard_parent_node");
|
||||
sliceProps.put(Slice.RANGE, shardRange);
|
||||
sliceProps.put(ZkStateReader.STATE_PROP, shardState);
|
||||
if (shardParent != null) {
|
||||
sliceProps.put(Slice.PARENT, shardParent);
|
||||
}
|
||||
if (shardParentZkSession != null) {
|
||||
sliceProps.put("shard_parent_zk_session", shardParentZkSession);
|
||||
}
|
||||
if (shardParentNode != null) {
|
||||
sliceProps.put("shard_parent_node", shardParentNode);
|
||||
}
|
||||
collection = updateSlice(collectionName, collection, new Slice(shardId, replicas, sliceProps));
|
||||
return new ZkWriteCommand(collectionName, collection);
|
||||
} else {
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.solr.common.cloud.Slice;
|
|||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -403,19 +404,57 @@ public class ReplicaMutator {
|
|||
}
|
||||
if (allActive) {
|
||||
// hurray, all sub shard replicas are active
|
||||
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE. Preparing to switch shard states.", sliceName);
|
||||
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE.", sliceName);
|
||||
String parentSliceName = (String) sliceProps.remove(Slice.PARENT);
|
||||
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
||||
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
|
||||
propMap.put(sliceName, Slice.State.ACTIVE.toString());
|
||||
for (Slice subShardSlice : subShardSlices) {
|
||||
propMap.put(subShardSlice.getName(), Slice.State.ACTIVE.toString());
|
||||
// now lets see if the parent leader is still the same or else there's a chance of data loss
|
||||
// see SOLR-9438 for details
|
||||
String shardParentZkSession = (String) sliceProps.remove("shard_parent_zk_session");
|
||||
String shardParentNode = (String) sliceProps.remove("shard_parent_node");
|
||||
boolean isLeaderSame = true;
|
||||
if (shardParentNode != null && shardParentZkSession != null) {
|
||||
log.info("Checking whether sub-shard leader node is still the same one at {} with ZK session id {}", shardParentNode, shardParentZkSession);
|
||||
try {
|
||||
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE
|
||||
+ "/" + shardParentNode, null, true);
|
||||
if (leaderZnodeStat == null) {
|
||||
log.error("The shard leader node: {} is not live anymore!", shardParentNode);
|
||||
isLeaderSame = false;
|
||||
} else if (leaderZnodeStat.getEphemeralOwner() != Long.parseLong(shardParentZkSession)) {
|
||||
log.error("The zk session id for shard leader node: {} has changed from {} to {}",
|
||||
shardParentNode, shardParentZkSession, leaderZnodeStat.getEphemeralOwner());
|
||||
isLeaderSame = false;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Error occurred while checking if parent shard node is still live with the same zk session id. " +
|
||||
"We cannot switch shard states at this time.", e);
|
||||
return collection; // we aren't going to make any changes right now
|
||||
}
|
||||
}
|
||||
|
||||
if (isLeaderSame) {
|
||||
log.info("Sub-shard leader node is still the same one at {} with ZK session id {}. Preparing to switch shard states.", shardParentNode, shardParentZkSession);
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
||||
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
|
||||
propMap.put(sliceName, Slice.State.ACTIVE.toString());
|
||||
for (Slice subShardSlice : subShardSlices) {
|
||||
propMap.put(subShardSlice.getName(), Slice.State.ACTIVE.toString());
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
||||
} else {
|
||||
// we must mark the shard split as failed by switching sub-shards to recovery_failed state
|
||||
Map<String, Object> propMap = new HashMap<>();
|
||||
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
||||
propMap.put(sliceName, Slice.State.RECOVERY_FAILED.toString());
|
||||
for (Slice subShardSlice : subShardSlices) {
|
||||
propMap.put(subShardSlice.getName(), Slice.State.RECOVERY_FAILED.toString());
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
||||
}
|
||||
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -164,9 +164,10 @@ public class SliceMutator {
|
|||
log.info("Update shard state " + key + " to " + message.getStr(key));
|
||||
Map<String, Object> props = slice.shallowCopy();
|
||||
|
||||
if (Slice.State.getState((String) props.get(ZkStateReader.STATE_PROP)) == Slice.State.RECOVERY
|
||||
&& Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
|
||||
if (Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
|
||||
props.remove(Slice.PARENT);
|
||||
props.remove("shard_parent_node");
|
||||
props.remove("shard_parent_zk_session");
|
||||
}
|
||||
props.put(ZkStateReader.STATE_PROP, message.getStr(key));
|
||||
Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props);
|
||||
|
|
|
@ -2215,6 +2215,12 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
m.put("smile", new SmileResponseWriter());
|
||||
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
|
||||
DEFAULT_RESPONSE_WRITERS = Collections.unmodifiableMap(m);
|
||||
try {
|
||||
m.put("xlsx",
|
||||
(QueryResponseWriter) Class.forName("org.apache.solr.handler.extraction.XLSXResponseWriter").newInstance());
|
||||
} catch (Exception e) {
|
||||
//don't worry; solrcell contrib not in class path
|
||||
}
|
||||
}
|
||||
|
||||
private static BinaryResponseWriter getFileStreamWriter() {
|
||||
|
@ -2237,7 +2243,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
}
|
||||
|
||||
public interface RawWriter {
|
||||
public void write(OutputStream os) throws IOException ;
|
||||
void write(OutputStream os) throws IOException ;
|
||||
}
|
||||
|
||||
/** Configure the query response writers. There will always be a default writer; additional
|
||||
|
|
|
@ -308,6 +308,9 @@ public class IndexFetcher {
|
|||
long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
|
||||
long latestGeneration = (Long) response.get(GENERATION);
|
||||
|
||||
LOG.info("Master's generation: " + latestGeneration);
|
||||
LOG.info("Master's version: " + latestVersion);
|
||||
|
||||
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
||||
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
|
||||
if (commit == null) {
|
||||
|
@ -326,6 +329,7 @@ public class IndexFetcher {
|
|||
}
|
||||
}
|
||||
|
||||
LOG.info("Slave's generation: " + commit.getGeneration());
|
||||
|
||||
if (latestVersion == 0L) {
|
||||
if (forceReplication && commit.getGeneration() != 0) {
|
||||
|
@ -353,8 +357,6 @@ public class IndexFetcher {
|
|||
successfulInstall = true;
|
||||
return true;
|
||||
}
|
||||
LOG.info("Master's generation: " + latestGeneration);
|
||||
LOG.info("Slave's generation: " + commit.getGeneration());
|
||||
LOG.info("Starting replication process");
|
||||
// get the list of files first
|
||||
fetchFileList(latestGeneration);
|
||||
|
|
|
@ -1265,7 +1265,14 @@ public class FacetComponent extends SearchComponent {
|
|||
if (facetFs != null) {
|
||||
|
||||
for (String field : facetFs) {
|
||||
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
|
||||
final DistribFieldFacet ff;
|
||||
|
||||
if (params.getFieldBool(field, FacetParams.FACET_EXISTS, false)) {
|
||||
// cap facet count by 1 with this method
|
||||
ff = new DistribFacetExistsField(rb, field);
|
||||
} else {
|
||||
ff = new DistribFieldFacet(rb, field);
|
||||
}
|
||||
facets.put(ff.getKey(), ff);
|
||||
}
|
||||
}
|
||||
|
@ -1469,7 +1476,7 @@ public class FacetComponent extends SearchComponent {
|
|||
sfc.termNum = termNum++;
|
||||
counts.put(name, sfc);
|
||||
}
|
||||
sfc.count += count;
|
||||
incCount(sfc, count);
|
||||
terms.set(sfc.termNum);
|
||||
last = count;
|
||||
}
|
||||
|
@ -1485,6 +1492,10 @@ public class FacetComponent extends SearchComponent {
|
|||
missingMax[shardNum] = last;
|
||||
counted[shardNum] = terms;
|
||||
}
|
||||
|
||||
protected void incCount(ShardFacetCount sfc, long count) {
|
||||
sfc.count += count;
|
||||
}
|
||||
|
||||
public ShardFacetCount[] getLexSorted() {
|
||||
ShardFacetCount[] arr
|
||||
|
@ -1530,7 +1541,7 @@ public class FacetComponent extends SearchComponent {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <b>This API is experimental and subject to change</b>
|
||||
*/
|
||||
|
@ -1547,4 +1558,18 @@ public class FacetComponent extends SearchComponent {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private static final class DistribFacetExistsField extends DistribFieldFacet {
|
||||
private DistribFacetExistsField(ResponseBuilder rb, String facetStr) {
|
||||
super(rb, facetStr);
|
||||
SimpleFacets.checkMincountOnExists(field, minCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void incCount(ShardFacetCount sfc, long count) {
|
||||
if (count>0) {
|
||||
sfc.count = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -406,7 +406,8 @@ public class SimpleFacets {
|
|||
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
|
||||
String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
|
||||
boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
|
||||
|
||||
boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
|
||||
|
||||
NamedList<Integer> counts;
|
||||
SchemaField sf = searcher.getSchema().getField(field);
|
||||
FieldType ft = sf.getType();
|
||||
|
@ -422,13 +423,15 @@ public class SimpleFacets {
|
|||
requestedMethod = FacetMethod.FC;
|
||||
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
|
||||
requestedMethod = FacetMethod.UIF;
|
||||
}else{
|
||||
} else {
|
||||
requestedMethod=null;
|
||||
}
|
||||
|
||||
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
|
||||
|
||||
FacetMethod appliedFacetMethod = selectFacetMethod(sf, requestedMethod, mincount);
|
||||
FacetMethod appliedFacetMethod = selectFacetMethod(field,
|
||||
sf, requestedMethod, mincount,
|
||||
exists);
|
||||
|
||||
RTimer timer = null;
|
||||
if (fdebug != null) {
|
||||
|
@ -446,7 +449,8 @@ public class SimpleFacets {
|
|||
switch (appliedFacetMethod) {
|
||||
case ENUM:
|
||||
assert TrieField.getMainValuePrefix(ft) == null;
|
||||
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase, params);
|
||||
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase,
|
||||
exists);
|
||||
break;
|
||||
case FCS:
|
||||
assert !multiToken;
|
||||
|
@ -538,6 +542,29 @@ public class SimpleFacets {
|
|||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param existsRequested facet.exists=true is passed for the given field
|
||||
* */
|
||||
static FacetMethod selectFacetMethod(String fieldName,
|
||||
SchemaField field, FacetMethod method, Integer mincount,
|
||||
boolean existsRequested) {
|
||||
if (existsRequested) {
|
||||
checkMincountOnExists(fieldName, mincount);
|
||||
if (method == null) {
|
||||
method = FacetMethod.ENUM;
|
||||
}
|
||||
}
|
||||
final FacetMethod facetMethod = selectFacetMethod(field, method, mincount);
|
||||
|
||||
if (existsRequested && facetMethod!=FacetMethod.ENUM) {
|
||||
throw new SolrException (ErrorCode.BAD_REQUEST,
|
||||
FacetParams.FACET_EXISTS + "=true is requested, but "+
|
||||
FacetParams.FACET_METHOD+"="+FacetParams.FACET_METHOD_enum+ " can't be used with "+fieldName
|
||||
);
|
||||
}
|
||||
return facetMethod;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method will force the appropriate facet method even if the user provided a different one as a request parameter
|
||||
*
|
||||
|
@ -811,7 +838,8 @@ public class SimpleFacets {
|
|||
* @see FacetParams#FACET_ZEROS
|
||||
* @see FacetParams#FACET_MISSING
|
||||
*/
|
||||
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase, SolrParams params)
|
||||
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing,
|
||||
String sort, String prefix, String contains, boolean ignoreCase, boolean intersectsCheck)
|
||||
throws IOException {
|
||||
|
||||
/* :TODO: potential optimization...
|
||||
|
@ -901,7 +929,11 @@ public class SimpleFacets {
|
|||
deState.postingsEnum = postingsEnum;
|
||||
}
|
||||
|
||||
c = searcher.numDocs(docs, deState);
|
||||
if (intersectsCheck) {
|
||||
c = searcher.intersects(docs, deState) ? 1 : 0;
|
||||
} else {
|
||||
c = searcher.numDocs(docs, deState);
|
||||
}
|
||||
|
||||
postingsEnum = deState.postingsEnum;
|
||||
} else {
|
||||
|
@ -916,19 +948,33 @@ public class SimpleFacets {
|
|||
if (postingsEnum instanceof MultiPostingsEnum) {
|
||||
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
|
||||
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
|
||||
|
||||
SEGMENTS_LOOP:
|
||||
for (int subindex = 0; subindex < numSubs; subindex++) {
|
||||
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
|
||||
if (sub.postingsEnum == null) continue;
|
||||
int base = sub.slice.start;
|
||||
int docid;
|
||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid + base)) c++;
|
||||
if (fastForRandomSet.exists(docid + base)) {
|
||||
c++;
|
||||
if (intersectsCheck) {
|
||||
assert c==1;
|
||||
break SEGMENTS_LOOP;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
int docid;
|
||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (fastForRandomSet.exists(docid)) c++;
|
||||
if (fastForRandomSet.exists(docid)) {
|
||||
c++;
|
||||
if (intersectsCheck) {
|
||||
assert c==1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -969,6 +1015,15 @@ public class SimpleFacets {
|
|||
return res;
|
||||
}
|
||||
|
||||
public static void checkMincountOnExists(String fieldName, int mincount) {
|
||||
if (mincount > 1) {
|
||||
throw new SolrException (ErrorCode.BAD_REQUEST,
|
||||
FacetParams.FACET_MINCOUNT + "="+mincount+" exceed 1 that's not supported with " +
|
||||
FacetParams.FACET_EXISTS + "=true for " + fieldName
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple key=>val pair whose natural order is such that
|
||||
* <b>higher</b> vals come before lower vals.
|
||||
|
|
|
@ -19,7 +19,7 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
|
|
@ -128,11 +128,13 @@ public class BoolField extends PrimitiveFieldType {
|
|||
|
||||
@Override
|
||||
public String toExternal(IndexableField f) {
|
||||
if (f.binaryValue() == null) {
|
||||
return null;
|
||||
if (null != f.binaryValue()) {
|
||||
return indexedToReadable(f.binaryValue().utf8ToString());
|
||||
}
|
||||
|
||||
return indexedToReadable(f.binaryValue().utf8ToString());
|
||||
if (null != f.stringValue()) {
|
||||
return indexedToReadable(f.stringValue());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -517,6 +517,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
@Override public boolean needsScores() { return true; }
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
this.contexts[context.ord] = context;
|
||||
|
@ -726,6 +728,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
|
||||
}
|
||||
|
||||
@Override public boolean needsScores() { return true; }
|
||||
|
||||
@Override
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
this.contexts[context.ord] = context;
|
||||
|
@ -909,6 +913,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
|
||||
|
||||
public void setScorer(Scorer scorer) {
|
||||
this.collapseStrategy.setScorer(scorer);
|
||||
}
|
||||
|
@ -1069,6 +1075,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
}
|
||||
}
|
||||
|
||||
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
|
||||
|
||||
public void setScorer(Scorer scorer) {
|
||||
this.collapseStrategy.setScorer(scorer);
|
||||
}
|
||||
|
@ -1686,7 +1694,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
private float[] ordVals;
|
||||
private Map rcontext;
|
||||
private final CollapseScore collapseScore = new CollapseScore();
|
||||
private final boolean cscore;
|
||||
private float score;
|
||||
|
||||
public OrdValueSourceStrategy(int maxDoc,
|
||||
|
@ -1714,7 +1721,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
Arrays.fill(ordVals, Float.MAX_VALUE);
|
||||
}
|
||||
|
||||
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||
|
||||
if(this.needsScores) {
|
||||
this.scores = new float[ords.length];
|
||||
|
@ -1735,7 +1742,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
this.boostDocs.add(globalDoc);
|
||||
}
|
||||
|
||||
if(needsScores || cscore) {
|
||||
if (needsScores) {
|
||||
this.score = scorer.score();
|
||||
this.collapseScore.score = score;
|
||||
}
|
||||
|
@ -2208,7 +2215,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
private FunctionValues functionValues;
|
||||
private Map rcontext;
|
||||
private final CollapseScore collapseScore = new CollapseScore();
|
||||
private final boolean cscore;
|
||||
private float score;
|
||||
private int index=-1;
|
||||
|
||||
|
@ -2240,7 +2246,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
comp = new MinFloatComp();
|
||||
}
|
||||
|
||||
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||
|
||||
if(needsScores) {
|
||||
this.scores = new float[size];
|
||||
|
@ -2263,7 +2269,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
return;
|
||||
}
|
||||
|
||||
if(needsScores || cscore) {
|
||||
if (needsScores) {
|
||||
this.score = scorer.score();
|
||||
this.collapseScore.score = score;
|
||||
}
|
||||
|
|
|
@ -2285,6 +2285,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return all.andNotSize(positiveA.union(positiveB));
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public boolean intersects(DocSet a, DocsEnumState deState) throws IOException {
|
||||
return a.intersects(getDocSet(deState));
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
|
||||
*/
|
||||
|
|
|
@ -20,10 +20,8 @@ import java.io.IOException;
|
|||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
|
@ -47,7 +45,6 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.SolrConfig.UpdateHandlerInfo;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
|
@ -516,16 +513,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
|
||||
" but currently suspiciously used for replication as well")
|
||||
private void setCommitData(IndexWriter iw) {
|
||||
final Map<String,String> commitData = new HashMap<>();
|
||||
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY,
|
||||
String.valueOf(System.currentTimeMillis()));
|
||||
iw.setLiveCommitData(commitData.entrySet());
|
||||
}
|
||||
|
||||
|
||||
public void prepareCommit(CommitUpdateCommand cmd) throws IOException {
|
||||
|
||||
boolean error=true;
|
||||
|
@ -534,7 +522,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
log.info("start "+cmd);
|
||||
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
||||
try {
|
||||
setCommitData(iw.get());
|
||||
SolrIndexWriter.setCommitData(iw.get());
|
||||
iw.get().prepareCommit();
|
||||
} finally {
|
||||
iw.decref();
|
||||
|
@ -615,7 +603,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
// SolrCore.verbose("writer.commit() start writer=",writer);
|
||||
|
||||
if (writer.hasUncommittedChanges()) {
|
||||
setCommitData(writer);
|
||||
SolrIndexWriter.setCommitData(writer);
|
||||
writer.commit();
|
||||
} else {
|
||||
log.info("No uncommitted changes. Skipping IW.commit.");
|
||||
|
@ -800,7 +788,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||
}
|
||||
|
||||
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
|
||||
setCommitData(writer);
|
||||
SolrIndexWriter.setCommitData(writer);
|
||||
writer.commit();
|
||||
|
||||
synchronized (solrCoreState.getUpdateLock()) {
|
||||
|
|
|
@ -22,12 +22,12 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.CodecReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterCodecReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SlowCodecReaderWrapper;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -134,6 +134,11 @@ public class SolrIndexSplitter {
|
|||
CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
|
||||
iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
|
||||
}
|
||||
// we commit explicitly instead of sending a CommitUpdateCommand through the processor chain
|
||||
// because the sub-shard cores will just ignore such a commit because the update log is not
|
||||
// in active state at this time.
|
||||
SolrIndexWriter.setCommitData(iw);
|
||||
iw.commit();
|
||||
success = true;
|
||||
} finally {
|
||||
if (iwRef != null) {
|
||||
|
@ -151,8 +156,6 @@ public class SolrIndexSplitter {
|
|||
|
||||
}
|
||||
|
||||
|
||||
|
||||
FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
|
||||
LeafReader reader = readerContext.reader();
|
||||
FixedBitSet[] docSets = new FixedBitSet[numPieces];
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.solr.update;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -27,8 +29,9 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.solr.common.util.IOUtils;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.core.DirectoryFactory;
|
||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -86,7 +89,16 @@ public class SolrIndexWriter extends IndexWriter {
|
|||
this.directory = directory;
|
||||
numOpens.incrementAndGet();
|
||||
}
|
||||
|
||||
|
||||
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
|
||||
" but currently suspiciously used for replication as well")
|
||||
public static void setCommitData(IndexWriter iw) {
|
||||
log.info("Calling setCommitData with IW:" + iw.toString());
|
||||
final Map<String,String> commitData = new HashMap<>();
|
||||
commitData.put(COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
|
||||
iw.setLiveCommitData(commitData.entrySet());
|
||||
}
|
||||
|
||||
private void setDirectoryFactory(DirectoryFactory factory) {
|
||||
this.directoryFactory = factory;
|
||||
}
|
||||
|
|
|
@ -1169,12 +1169,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||
if (upShards.contains(s)) {
|
||||
// this is no longer true if there was a query timeout on an up shard
|
||||
// assertTrue("Expected to find numFound in the up shard info",info.get("numFound") != null);
|
||||
assertTrue("Expected to find shardAddress in the up shard info",info.get("shardAddress") != null);
|
||||
assertTrue("Expected to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
|
||||
}
|
||||
else {
|
||||
assertEquals("Expected to find the "+SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY+" header set if a shard is down",
|
||||
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
|
||||
assertTrue("Expected to find error in the down shard info",info.get("error") != null);
|
||||
assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,22 +16,39 @@
|
|||
*/
|
||||
package org.apache.solr;
|
||||
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.noggit.JSONUtil;
|
||||
import org.noggit.ObjectBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.*;
|
||||
|
||||
@Slow
|
||||
public class TestRandomFaceting extends SolrTestCaseJ4 {
|
||||
|
||||
private static final Pattern trieFields = Pattern.compile(".*_t.");
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public static final String FOO_STRING_FIELD = "foo_s1";
|
||||
|
@ -80,6 +97,21 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||
types.add(new FldType("missing_ss",new IRange(0,0), new SVal('a','b',1,1)));
|
||||
|
||||
// TODO: doubles, multi-floats, ints with precisionStep>0, booleans
|
||||
types.add(new FldType("small_tf",ZERO_ONE, new FVal(-4,5)));
|
||||
assert trieFields.matcher("small_tf").matches();
|
||||
assert !trieFields.matcher("small_f").matches();
|
||||
|
||||
types.add(new FldType("foo_ti",ZERO_ONE, new IRange(-2,indexSize)));
|
||||
assert trieFields.matcher("foo_ti").matches();
|
||||
assert !trieFields.matcher("foo_i").matches();
|
||||
|
||||
types.add(new FldType("bool_b",ZERO_ONE, new Vals(){
|
||||
@Override
|
||||
public Comparable get() {
|
||||
return random().nextBoolean();
|
||||
}
|
||||
|
||||
}));
|
||||
}
|
||||
|
||||
void addMoreDocs(int ndocs) throws Exception {
|
||||
|
@ -144,8 +176,8 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
|
||||
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc"});
|
||||
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs"});
|
||||
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc", null});
|
||||
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs", null});
|
||||
|
||||
|
||||
void doFacetTests(FldType ftype) throws Exception {
|
||||
|
@ -154,10 +186,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||
Random rand = random();
|
||||
boolean validate = validateResponses;
|
||||
ModifiableSolrParams params = params("facet","true", "wt","json", "indent","true", "omitHeader","true");
|
||||
params.add("q","*:*", "rows","0"); // TODO: select subsets
|
||||
params.add("q","*:*"); // TODO: select subsets
|
||||
params.add("rows","0");
|
||||
|
||||
|
||||
SchemaField sf = req.getSchema().getField(ftype.fname);
|
||||
boolean multiValued = sf.getType().multiValuedFieldCache();
|
||||
|
||||
|
@ -198,6 +229,10 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||
params.add("facet.missing", "true");
|
||||
}
|
||||
|
||||
if (rand.nextBoolean()) {
|
||||
params.add("facet.enum.cache.minDf",""+ rand.nextInt(indexSize));
|
||||
}
|
||||
|
||||
// TODO: randomly add other facet params
|
||||
String key = ftype.fname;
|
||||
String facet_field = ftype.fname;
|
||||
|
@ -210,45 +245,207 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||
List<String> methods = multiValued ? multiValuedMethods : singleValuedMethods;
|
||||
List<String> responses = new ArrayList<>(methods.size());
|
||||
for (String method : methods) {
|
||||
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
|
||||
// TODO: allow method to be passed on local params?
|
||||
|
||||
params.set("facet.method", method);
|
||||
|
||||
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
|
||||
|
||||
String strResponse = h.query(req(params));
|
||||
// Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
||||
// System.out.println(strResponse);
|
||||
|
||||
responses.add(strResponse);
|
||||
for (boolean exists : new boolean [] {false, true}) {
|
||||
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
|
||||
// TODO: allow method to be passed on local params?
|
||||
if (method!=null) {
|
||||
params.set("facet.method", method);
|
||||
} else {
|
||||
params.remove("facet.method");
|
||||
}
|
||||
|
||||
params.set("facet.exists", ""+exists);
|
||||
if (!exists && rand.nextBoolean()) {
|
||||
params.remove("facet.exists");
|
||||
}
|
||||
|
||||
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
|
||||
if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation
|
||||
if (exists && params.getInt("facet.mincount", 0)>1) {
|
||||
assertQEx("no mincount on facet.exists",
|
||||
rand.nextBoolean() ? "facet.exists":"facet.mincount",
|
||||
req(params), ErrorCode.BAD_REQUEST);
|
||||
continue;
|
||||
}
|
||||
// facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
|
||||
final boolean notEnum = method != null && !method.equals("enum");
|
||||
final boolean trieField = trieFields.matcher(ftype.fname).matches();
|
||||
if ((notEnum || trieField) && exists) {
|
||||
assertQEx("facet.exists only when enum or ommitted",
|
||||
"facet.exists", req(params), ErrorCode.BAD_REQUEST);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
String strResponse = h.query(req(params));
|
||||
responses.add(strResponse);
|
||||
|
||||
if (responses.size()>1) {
|
||||
validateResponse(responses.get(0), strResponse, params, method, methods);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
String strResponse = h.query(req(params));
|
||||
Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
||||
**/
|
||||
|
||||
if (validate) {
|
||||
for (int i=1; i<methods.size(); i++) {
|
||||
String err = JSONTestUtil.match("/", responses.get(i), responses.get(0), 0.0);
|
||||
if (err != null) {
|
||||
log.error("ERROR: mismatch facet response: " + err +
|
||||
"\n expected =" + responses.get(0) +
|
||||
"\n response = " + responses.get(i) +
|
||||
"\n request = " + params
|
||||
);
|
||||
fail(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
}
|
||||
private void validateResponse(String expected, String actual, ModifiableSolrParams params, String method,
|
||||
List<String> methods) throws Exception {
|
||||
if (params.getBool("facet.exists", false)) {
|
||||
if (isSortByCount(params)) { // it's challenged with facet.sort=count
|
||||
expected = getExpectationForSortByCount(params, methods);// that requires to recalculate expactation
|
||||
} else { // facet.sort=index
|
||||
expected = capFacetCountsTo1(expected);
|
||||
}
|
||||
}
|
||||
|
||||
String err = JSONTestUtil.match("/", actual, expected, 0.0);
|
||||
if (err != null) {
|
||||
log.error("ERROR: mismatch facet response: " + err +
|
||||
"\n expected =" + expected +
|
||||
"\n response = " + actual +
|
||||
"\n request = " + params
|
||||
);
|
||||
fail(err);
|
||||
}
|
||||
}
|
||||
|
||||
/** if facet.exists=true with facet.sort=counts,
|
||||
* it should return all values with 1 hits ordered by label index
|
||||
* then all vals with 0 , and then missing count with null label,
|
||||
* in the implementation below they are called three stratas
|
||||
* */
|
||||
private String getExpectationForSortByCount( ModifiableSolrParams params, List<String> methods) throws Exception {
|
||||
String indexSortedResponse = getIndexSortedAllFacetValues(params, methods);
|
||||
|
||||
return transformFacetFields(indexSortedResponse, e -> {
|
||||
List<Object> facetSortedByIndex = (List<Object>) e.getValue();
|
||||
Map<Integer,List<Object>> stratas = new HashMap<Integer,List<Object>>(){
|
||||
@Override // poor man multimap, I won't do that anymore, I swear.
|
||||
public List<Object> get(Object key) {
|
||||
if (!containsKey(key)) {
|
||||
put((Integer) key, new ArrayList<>());
|
||||
}
|
||||
return super.get(key);
|
||||
}
|
||||
};
|
||||
|
||||
for (Iterator iterator = facetSortedByIndex.iterator(); iterator.hasNext();) {
|
||||
Object label = (Object) iterator.next();
|
||||
Long count = (Long) iterator.next();
|
||||
final Integer strata;
|
||||
if (label==null) { // missing (here "stratas" seems like overengineering )
|
||||
strata = null;
|
||||
}else {
|
||||
if (count>0) {
|
||||
count = 1L; // capping here
|
||||
strata = 1; // non-zero count become zero
|
||||
} else {
|
||||
strata = 0; // zero-count
|
||||
}
|
||||
}
|
||||
final List<Object> facet = stratas.get(strata);
|
||||
facet.add(label);
|
||||
facet.add(count);
|
||||
}
|
||||
List stratified =new ArrayList<>();
|
||||
for(Integer s : new Integer[]{1, 0}) { // non-zero capped to one goes first, zeroes go then
|
||||
stratified.addAll(stratas.get(s));
|
||||
}// cropping them now
|
||||
int offset=params.getInt("facet.offset", 0) * 2;
|
||||
int end = offset + params.getInt("facet.limit", 100) * 2 ;
|
||||
int fromIndex = offset > stratified.size() ? stratified.size() : offset;
|
||||
stratified = stratified.subList(fromIndex,
|
||||
end > stratified.size() ? stratified.size() : end);
|
||||
|
||||
if (params.getInt("facet.limit", 100)>0) { /// limit=0 omits even miss count
|
||||
stratified.addAll(stratas.get(null));
|
||||
}
|
||||
facetSortedByIndex.clear();
|
||||
facetSortedByIndex.addAll(stratified);
|
||||
});
|
||||
}
|
||||
|
||||
private String getIndexSortedAllFacetValues(ModifiableSolrParams in, List<String> methods) throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams(in);
|
||||
params.set("facet.sort", "index");
|
||||
String goodOldMethod = methods.get(random().nextInt( methods.size()));
|
||||
params.set("facet.method", goodOldMethod);
|
||||
params.set("facet.exists", "false");
|
||||
if (random().nextBoolean()) {
|
||||
params.remove("facet.exists");
|
||||
}
|
||||
params.set("facet.limit",-1);
|
||||
params.set("facet.offset",0);
|
||||
final String query;
|
||||
SolrQueryRequest req = null;
|
||||
try {
|
||||
req = req(params);
|
||||
query = h.query(req);
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
private boolean isSortByCount(ModifiableSolrParams in) {
|
||||
boolean sortIsCount;
|
||||
String sortParam = in.get("facet.sort");
|
||||
sortIsCount = "count".equals(sortParam) || (sortParam==null && in.getInt("facet.limit",100)>0);
|
||||
return sortIsCount;
|
||||
}
|
||||
|
||||
/*
|
||||
* {
|
||||
"response":{"numFound":6,"start":0,"docs":[]
|
||||
},
|
||||
"facet_counts":{
|
||||
"facet_queries":{},
|
||||
"facet_fields":{
|
||||
"foo_i":[
|
||||
"6",2,
|
||||
"2",1,
|
||||
"3",1]},
|
||||
"facet_ranges":{},
|
||||
"facet_intervals":{},
|
||||
"facet_heatmaps":{}}}
|
||||
* */
|
||||
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||
private String capFacetCountsTo1(String expected) throws IOException {
|
||||
return transformFacetFields(expected, e -> {
|
||||
List<Object> facetValues = (List<Object>) e.getValue();
|
||||
for (ListIterator iterator = facetValues.listIterator(); iterator.hasNext();) {
|
||||
Object value = iterator.next();
|
||||
Long count = (Long) iterator.next();
|
||||
if (value!=null && count > 1) {
|
||||
iterator.set(1);
|
||||
}
|
||||
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private String transformFacetFields(String expected, Consumer<Map.Entry<Object,Object>> consumer) throws IOException {
|
||||
Object json = ObjectBuilder.fromJSON(expected);
|
||||
Map facet_fields = getFacetFieldMap(json);
|
||||
Set entries = facet_fields.entrySet();
|
||||
for (Object facetTuples : entries) { //despite there should be only one field
|
||||
Entry entry = (Entry)facetTuples;
|
||||
consumer.accept(entry);
|
||||
}
|
||||
return JSONUtil.toJSON(json);
|
||||
}
|
||||
|
||||
private Map getFacetFieldMap(Object json) {
|
||||
Object facet_counts = ((Map)json).get("facet_counts");
|
||||
Map facet_fields = (Map) ((Map)facet_counts).get("facet_fields");
|
||||
return facet_fields;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -25,20 +25,28 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||
import org.apache.solr.client.solrj.response.CoreAdminResponse;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.CollectionStateWatcher;
|
||||
import org.apache.solr.common.cloud.CompositeIdRouter;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.DocRouter;
|
||||
|
@ -56,6 +64,7 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||
|
||||
|
@ -71,6 +80,12 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||
schemaString = "schema15.xml"; // we need a string id
|
||||
}
|
||||
|
||||
@Override
|
||||
public void distribSetUp() throws Exception {
|
||||
super.distribSetUp();
|
||||
useFactory(null);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() throws Exception {
|
||||
|
||||
|
@ -91,6 +106,146 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||
//waitForThingsToLevelOut(15);
|
||||
}
|
||||
|
||||
/*
|
||||
Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
|
||||
Add a replica. Ensure count matches in leader and replica.
|
||||
*/
|
||||
public void testSplitStaticIndexReplication() throws Exception {
|
||||
waitForThingsToLevelOut(15);
|
||||
|
||||
DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
Replica replica = defCol.getReplicas().get(0);
|
||||
String nodeName = replica.getNodeName();
|
||||
|
||||
String collectionName = "testSplitStaticIndexReplication";
|
||||
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
|
||||
create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
|
||||
create.setCreateNodeSet(nodeName); // we want to create the leader on a fixed node so that we know which one to restart later
|
||||
create.process(cloudClient);
|
||||
try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
|
||||
client.setDefaultCollection(collectionName);
|
||||
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
|
||||
try {
|
||||
thread.start();
|
||||
Thread.sleep(1000); // give the indexer sometime to do its work
|
||||
thread.safeStop();
|
||||
thread.join();
|
||||
client.commit();
|
||||
controlClient.commit();
|
||||
|
||||
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
|
||||
splitShard.setShardName(SHARD1);
|
||||
String asyncId = splitShard.processAsync(client);
|
||||
RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
|
||||
if (state == RequestStatusState.COMPLETED) {
|
||||
waitForRecoveriesToFinish(collectionName, true);
|
||||
// let's wait to see parent shard become inactive
|
||||
CountDownLatch latch = new CountDownLatch(1);
|
||||
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
|
||||
@Override
|
||||
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||
Slice parent = collectionState.getSlice(SHARD1);
|
||||
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||
if (slice10 != null && slice11 != null &&
|
||||
parent.getState() == Slice.State.INACTIVE &&
|
||||
slice10.getState() == Slice.State.ACTIVE &&
|
||||
slice11.getState() == Slice.State.ACTIVE) {
|
||||
latch.countDown();
|
||||
return true; // removes the watch
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
latch.await(1, TimeUnit.MINUTES);
|
||||
if (latch.getCount() != 0) {
|
||||
// sanity check
|
||||
fail("Sub-shards did not become active even after waiting for 1 minute");
|
||||
}
|
||||
|
||||
int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
|
||||
|
||||
// restart the sub-shard leader node
|
||||
boolean restarted = false;
|
||||
for (JettySolrRunner jetty : jettys) {
|
||||
int port = jetty.getBaseUrl().getPort();
|
||||
if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
|
||||
ChaosMonkey.kill(jetty);
|
||||
ChaosMonkey.start(jetty);
|
||||
restarted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!restarted) {
|
||||
// sanity check
|
||||
fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
|
||||
}
|
||||
|
||||
// add a new replica for the sub-shard
|
||||
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
|
||||
// use control client because less chances of it being the node being restarted
|
||||
// this is to avoid flakiness of test because of NoHttpResponseExceptions
|
||||
String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
|
||||
try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
|
||||
state = addReplica.processAndWait(control, 30);
|
||||
}
|
||||
if (state == RequestStatusState.COMPLETED) {
|
||||
CountDownLatch newReplicaLatch = new CountDownLatch(1);
|
||||
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
|
||||
@Override
|
||||
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||
if (liveNodes.size() != liveNodeCount) {
|
||||
return false;
|
||||
}
|
||||
Slice slice = collectionState.getSlice(SHARD1_0);
|
||||
if (slice.getReplicas().size() == 2) {
|
||||
if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
|
||||
// we see replicas and none of them are recovering
|
||||
newReplicaLatch.countDown();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
newReplicaLatch.await(30, TimeUnit.SECONDS);
|
||||
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
|
||||
// handle new shards/replica so well.
|
||||
ClusterState clusterState = client.getZkStateReader().getClusterState();
|
||||
DocCollection collection = clusterState.getCollection(collectionName);
|
||||
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
|
||||
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
|
||||
} else {
|
||||
fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
|
||||
}
|
||||
} else {
|
||||
fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
|
||||
}
|
||||
} finally {
|
||||
thread.safeStop();
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int assertConsistentReplicas(Slice shard) throws SolrServerException, IOException {
|
||||
long numFound = Long.MIN_VALUE;
|
||||
int count = 0;
|
||||
for (Replica replica : shard.getReplicas()) {
|
||||
HttpSolrClient client = new HttpSolrClient.Builder(replica.getCoreUrl())
|
||||
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build();
|
||||
QueryResponse response = client.query(new SolrQuery("q", "*:*", "distrib", "false"));
|
||||
log.info("Found numFound={} on replica: {}", response.getResults().getNumFound(), replica.getCoreUrl());
|
||||
if (numFound == Long.MIN_VALUE) {
|
||||
numFound = response.getResults().getNumFound();
|
||||
} else {
|
||||
assertEquals("Shard " + shard.getName() + " replicas do not have same number of documents", numFound, response.getResults().getNumFound());
|
||||
}
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to test that we can split a shard when a previous split event
|
||||
* left sub-shards in construction or recovery state.
|
||||
|
@ -143,6 +298,218 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitWithChaosMonkey() throws Exception {
|
||||
waitForThingsToLevelOut(15);
|
||||
|
||||
List<StoppableIndexingThread> indexers = new ArrayList<>();
|
||||
try {
|
||||
for (int i = 0; i < 1; i++) {
|
||||
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, cloudClient, String.valueOf(i), true);
|
||||
indexers.add(thread);
|
||||
thread.start();
|
||||
}
|
||||
Thread.sleep(1000); // give the indexers some time to do their work
|
||||
} catch (Exception e) {
|
||||
log.error("Error in test", e);
|
||||
} finally {
|
||||
for (StoppableIndexingThread indexer : indexers) {
|
||||
indexer.safeStop();
|
||||
indexer.join();
|
||||
}
|
||||
}
|
||||
|
||||
cloudClient.commit();
|
||||
controlClient.commit();
|
||||
|
||||
AtomicBoolean stop = new AtomicBoolean();
|
||||
AtomicBoolean killed = new AtomicBoolean(false);
|
||||
Runnable monkey = new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
||||
zkStateReader.registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
|
||||
@Override
|
||||
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||
if (stop.get()) {
|
||||
return true; // abort and remove the watch
|
||||
}
|
||||
Slice slice = collectionState.getSlice(SHARD1_0);
|
||||
if (slice != null && slice.getReplicas().size() > 1) {
|
||||
// ensure that only one watcher invocation thread can kill!
|
||||
if (killed.compareAndSet(false, true)) {
|
||||
log.info("Monkey thread found 2 replicas for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
|
||||
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
|
||||
try {
|
||||
Thread.sleep(1000 + random().nextInt(500));
|
||||
ChaosMonkey.kill(cjetty);
|
||||
stop.set(true);
|
||||
return true;
|
||||
} catch (Exception e) {
|
||||
log.error("Monkey unable to kill jetty at port " + cjetty.jetty.getLocalPort(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info("Monkey thread found only one replica for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
|
||||
return false;
|
||||
}
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
Thread monkeyThread = null;
|
||||
/*
|
||||
somehow the cluster state object inside this zk state reader has static copy of the collection which is never updated
|
||||
so any call to waitForRecoveriesToFinish just keeps looping until timeout.
|
||||
We workaround by explicitly registering the collection as an interesting one so that it is watched by ZkStateReader
|
||||
see SOLR-9440. Todo remove this hack after SOLR-9440 is fixed.
|
||||
*/
|
||||
cloudClient.getZkStateReader().registerCore(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
|
||||
monkeyThread = new Thread(monkey);
|
||||
monkeyThread.start();
|
||||
try {
|
||||
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
splitShard.setShardName(SHARD1);
|
||||
String asyncId = splitShard.processAsync(cloudClient);
|
||||
RequestStatusState splitStatus = null;
|
||||
try {
|
||||
splitStatus = CollectionAdminRequest.requestStatus(asyncId).waitFor(cloudClient, 120);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to get request status, maybe because the overseer node was shutdown by monkey", e);
|
||||
}
|
||||
|
||||
// we don't care if the split failed because we are injecting faults and it is likely
|
||||
// that the split has failed but in any case we want to assert that all docs that got
|
||||
// indexed are available in SolrCloud and if the split succeeded then all replicas of the sub-shard
|
||||
// must be consistent (i.e. have same numdocs)
|
||||
|
||||
log.info("Shard split request state is COMPLETED");
|
||||
stop.set(true);
|
||||
monkeyThread.join();
|
||||
Set<String> addFails = new HashSet<>();
|
||||
Set<String> deleteFails = new HashSet<>();
|
||||
for (StoppableIndexingThread indexer : indexers) {
|
||||
addFails.addAll(indexer.getAddFails());
|
||||
deleteFails.addAll(indexer.getDeleteFails());
|
||||
}
|
||||
|
||||
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
|
||||
log.info("Starting shard1 leader jetty at port {}", cjetty.jetty.getLocalPort());
|
||||
ChaosMonkey.start(cjetty.jetty);
|
||||
cloudClient.getZkStateReader().forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
log.info("Current collection state: {}", printClusterStateInfo(AbstractDistribZkTestBase.DEFAULT_COLLECTION));
|
||||
|
||||
boolean replicaCreationsFailed = false;
|
||||
if (splitStatus == RequestStatusState.FAILED) {
|
||||
// either one or more replica creation failed (because it may have been created on the same parent shard leader node)
|
||||
// or the split may have failed while trying to soft-commit *after* all replicas have been created
|
||||
// the latter counts as a successful switch even if the API doesn't say so
|
||||
// so we must find a way to distinguish between the two
|
||||
// an easy way to do that is to look at the sub-shard replicas and check if the replica core actually exists
|
||||
// instead of existing solely inside the cluster state
|
||||
DocCollection collectionState = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||
if (slice10 != null && slice11 != null) {
|
||||
for (Replica replica : slice10) {
|
||||
if (!doesReplicaCoreExist(replica)) {
|
||||
replicaCreationsFailed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (Replica replica : slice11) {
|
||||
if (!doesReplicaCoreExist(replica)) {
|
||||
replicaCreationsFailed = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// true if sub-shard states switch to 'active' eventually
|
||||
AtomicBoolean areSubShardsActive = new AtomicBoolean(false);
|
||||
|
||||
if (!replicaCreationsFailed) {
|
||||
// all sub-shard replicas were created successfully so all cores must recover eventually
|
||||
waitForRecoveriesToFinish(AbstractDistribZkTestBase.DEFAULT_COLLECTION, true);
|
||||
// let's wait for the overseer to switch shard states
|
||||
CountDownLatch latch = new CountDownLatch(1);
|
||||
cloudClient.getZkStateReader().registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
|
||||
@Override
|
||||
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||
Slice parent = collectionState.getSlice(SHARD1);
|
||||
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||
if (slice10 != null && slice11 != null &&
|
||||
parent.getState() == Slice.State.INACTIVE &&
|
||||
slice10.getState() == Slice.State.ACTIVE &&
|
||||
slice11.getState() == Slice.State.ACTIVE) {
|
||||
areSubShardsActive.set(true);
|
||||
latch.countDown();
|
||||
return true; // removes the watch
|
||||
} else if (slice10 != null && slice11 != null &&
|
||||
parent.getState() == Slice.State.ACTIVE &&
|
||||
slice10.getState() == Slice.State.RECOVERY_FAILED &&
|
||||
slice11.getState() == Slice.State.RECOVERY_FAILED) {
|
||||
areSubShardsActive.set(false);
|
||||
latch.countDown();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
latch.await(2, TimeUnit.MINUTES);
|
||||
|
||||
if (latch.getCount() != 0) {
|
||||
// sanity check
|
||||
fail("We think that split was successful but sub-shard states were not updated even after 2 minutes.");
|
||||
}
|
||||
}
|
||||
|
||||
cloudClient.commit(); // for visibility of results on sub-shards
|
||||
|
||||
checkShardConsistency(true, true, addFails, deleteFails);
|
||||
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||
// ensure we have added more than 0 docs
|
||||
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
|
||||
assertEquals("Found " + ctrlDocs + " control docs and " + cloudClientDocs + " cloud docs", ctrlDocs, cloudClientDocs);
|
||||
|
||||
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
|
||||
// handle new shards/replica so well.
|
||||
if (areSubShardsActive.get()) {
|
||||
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
|
||||
DocCollection collection = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
|
||||
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
|
||||
numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_1));
|
||||
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_1", 2, numReplicasChecked);
|
||||
}
|
||||
} finally {
|
||||
stop.set(true);
|
||||
monkeyThread.join();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean doesReplicaCoreExist(Replica replica) throws IOException {
|
||||
try (HttpSolrClient client = new HttpSolrClient.Builder(replica.getStr(BASE_URL_PROP))
|
||||
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build()) {
|
||||
String coreName = replica.getCoreName();
|
||||
try {
|
||||
CoreAdminResponse status = CoreAdminRequest.getStatus(coreName, client);
|
||||
if (status.getCoreStatus(coreName) == null || status.getCoreStatus(coreName).size() == 0) {
|
||||
return false;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("Error gettting core status of replica " + replica + ". Perhaps it does not exist!", e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSplitShardWithRule() throws Exception {
|
||||
waitForThingsToLevelOut(15);
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
|
@ -27,10 +26,8 @@ import java.util.HashMap;
|
|||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||
|
@ -235,70 +232,6 @@ public class TestMiniSolrCloudCluster extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testErrorsInStartup() throws Exception {
|
||||
|
||||
AtomicInteger jettyIndex = new AtomicInteger();
|
||||
|
||||
MiniSolrCloudCluster cluster = null;
|
||||
try {
|
||||
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||
@Override
|
||||
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
|
||||
if (jettyIndex.incrementAndGet() != 2)
|
||||
return super.startJettySolrRunner(name, context, config);
|
||||
throw new IOException("Fake exception on startup!");
|
||||
}
|
||||
};
|
||||
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
|
||||
}
|
||||
catch (Exception e) {
|
||||
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
|
||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
|
||||
}
|
||||
finally {
|
||||
if (cluster != null)
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testErrorsInShutdown() throws Exception {
|
||||
|
||||
AtomicInteger jettyIndex = new AtomicInteger();
|
||||
|
||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||
@Override
|
||||
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
||||
JettySolrRunner j = super.stopJettySolrRunner(jetty);
|
||||
if (jettyIndex.incrementAndGet() == 2)
|
||||
throw new IOException("Fake IOException on shutdown!");
|
||||
return j;
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
cluster.shutdown();
|
||||
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
|
||||
}
|
||||
catch (Exception e) {
|
||||
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
|
||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtraFilters() throws Exception {
|
||||
Builder jettyConfig = JettyConfig.builder();
|
||||
jettyConfig.waitForLoadingCoresToFinish(null);
|
||||
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
|
||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCollectionCreateWithoutCoresThenDelete() throws Exception {
|
||||
|
||||
|
|
|
@ -1,207 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.index.TieredMergePolicyFactory;
|
||||
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.RuleChain;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
|
||||
public class TestMiniSolrCloudClusterBase extends LuceneTestCase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
protected int NUM_SERVERS = 5;
|
||||
protected int NUM_SHARDS = 2;
|
||||
protected int REPLICATION_FACTOR = 2;
|
||||
|
||||
public TestMiniSolrCloudClusterBase () {
|
||||
NUM_SERVERS = 5;
|
||||
NUM_SHARDS = 2;
|
||||
REPLICATION_FACTOR = 2;
|
||||
}
|
||||
|
||||
@Rule
|
||||
public TestRule solrTestRules = RuleChain
|
||||
.outerRule(new SystemPropertiesRestoreRule());
|
||||
|
||||
@ClassRule
|
||||
public static TestRule solrClassRules = RuleChain.outerRule(
|
||||
new SystemPropertiesRestoreRule()).around(
|
||||
new RevertDefaultThreadHandlerRule());
|
||||
|
||||
@Test
|
||||
public void testBasics() throws Exception {
|
||||
final String collectionName = "testSolrCloudCollection";
|
||||
testCollectionCreateSearchDelete(collectionName);
|
||||
}
|
||||
|
||||
private MiniSolrCloudCluster createMiniSolrCloudCluster() throws Exception {
|
||||
JettyConfig.Builder jettyConfig = JettyConfig.builder();
|
||||
jettyConfig.waitForLoadingCoresToFinish(null);
|
||||
return new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
|
||||
}
|
||||
|
||||
private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId) throws Exception {
|
||||
String configName = "solrCloudCollectionConfig";
|
||||
File configDir = new File(SolrTestCaseJ4.TEST_HOME() + File.separator + "collection1" + File.separator + "conf");
|
||||
miniCluster.uploadConfigDir(configDir, configName);
|
||||
|
||||
Map<String, String> collectionProperties = new HashMap<>();
|
||||
collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
|
||||
collectionProperties.put("solr.tests.maxBufferedDocs", "100000");
|
||||
collectionProperties.put("solr.tests.ramBufferSizeMB", "100");
|
||||
// use non-test classes so RandomizedRunner isn't necessary
|
||||
if (random().nextBoolean()) {
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
|
||||
} else {
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
|
||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
|
||||
}
|
||||
collectionProperties.put("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
|
||||
collectionProperties.put("solr.directoryFactory", "solr.RAMDirectoryFactory");
|
||||
|
||||
miniCluster.createCollection(collectionName, NUM_SHARDS, REPLICATION_FACTOR, configName, createNodeSet, asyncId, collectionProperties);
|
||||
}
|
||||
|
||||
protected void testCollectionCreateSearchDelete(String collectionName) throws Exception {
|
||||
|
||||
MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
|
||||
|
||||
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
|
||||
|
||||
try {
|
||||
assertNotNull(miniCluster.getZkServer());
|
||||
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
|
||||
assertEquals(NUM_SERVERS, jettys.size());
|
||||
for (JettySolrRunner jetty : jettys) {
|
||||
assertTrue(jetty.isRunning());
|
||||
}
|
||||
|
||||
// shut down a server
|
||||
JettySolrRunner stoppedServer = miniCluster.stopJettySolrRunner(0);
|
||||
assertTrue(stoppedServer.isStopped());
|
||||
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
|
||||
|
||||
// create a server
|
||||
JettySolrRunner startedServer = miniCluster.startJettySolrRunner();
|
||||
assertTrue(startedServer.isRunning());
|
||||
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
|
||||
|
||||
// create collection
|
||||
final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
|
||||
createCollection(miniCluster, collectionName, null, asyncId);
|
||||
if (asyncId != null) {
|
||||
final RequestStatusState state = AbstractFullDistribZkTestBase.getRequestStateAfterCompletion(asyncId, 330,
|
||||
cloudSolrClient);
|
||||
assertSame("did not see async createCollection completion", RequestStatusState.COMPLETED, state);
|
||||
}
|
||||
|
||||
try (SolrZkClient zkClient = new SolrZkClient
|
||||
(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
|
||||
ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
|
||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
|
||||
|
||||
// modify/query collection
|
||||
cloudSolrClient.setDefaultCollection(collectionName);
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.setField("id", "1");
|
||||
cloudSolrClient.add(doc);
|
||||
cloudSolrClient.commit();
|
||||
SolrQuery query = new SolrQuery();
|
||||
query.setQuery("*:*");
|
||||
QueryResponse rsp = cloudSolrClient.query(query);
|
||||
assertEquals(1, rsp.getResults().getNumFound());
|
||||
|
||||
// remove a server not hosting any replicas
|
||||
zkStateReader.forceUpdateCollection(collectionName);
|
||||
ClusterState clusterState = zkStateReader.getClusterState();
|
||||
HashMap<String, JettySolrRunner> jettyMap = new HashMap<String, JettySolrRunner>();
|
||||
for (JettySolrRunner jetty : miniCluster.getJettySolrRunners()) {
|
||||
String key = jetty.getBaseUrl().toString().substring((jetty.getBaseUrl().getProtocol() + "://").length());
|
||||
jettyMap.put(key, jetty);
|
||||
}
|
||||
Collection<Slice> slices = clusterState.getSlices(collectionName);
|
||||
// track the servers not host repliacs
|
||||
for (Slice slice : slices) {
|
||||
jettyMap.remove(slice.getLeader().getNodeName().replace("_solr", "/solr"));
|
||||
for (Replica replica : slice.getReplicas()) {
|
||||
jettyMap.remove(replica.getNodeName().replace("_solr", "/solr"));
|
||||
}
|
||||
}
|
||||
assertTrue("Expected to find a node without a replica", jettyMap.size() > 0);
|
||||
JettySolrRunner jettyToStop = jettyMap.entrySet().iterator().next().getValue();
|
||||
jettys = miniCluster.getJettySolrRunners();
|
||||
for (int i = 0; i < jettys.size(); ++i) {
|
||||
if (jettys.get(i).equals(jettyToStop)) {
|
||||
miniCluster.stopJettySolrRunner(i);
|
||||
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
|
||||
}
|
||||
}
|
||||
|
||||
// now restore the original state so that this function could be called multiple times
|
||||
|
||||
// re-create a server (to restore original NUM_SERVERS count)
|
||||
startedServer = miniCluster.startJettySolrRunner();
|
||||
assertTrue(startedServer.isRunning());
|
||||
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
|
||||
|
||||
doExtraTests(miniCluster, zkClient, zkStateReader,cloudSolrClient, collectionName);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
miniCluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader, CloudSolrClient cloudSolrClient,
|
||||
String defaultCollName) throws Exception { /*do nothing*/ }
|
||||
|
||||
}
|
|
@ -127,20 +127,6 @@ public class TestMiniSolrCloudClusterKerberos extends TestMiniSolrCloudCluster {
|
|||
public void testCollectionCreateSearchDelete() throws Exception {
|
||||
super.testCollectionCreateSearchDelete();
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
|
||||
@Test
|
||||
@Override
|
||||
public void testErrorsInShutdown() throws Exception {
|
||||
super.testErrorsInShutdown();
|
||||
}
|
||||
|
||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
|
||||
@Test
|
||||
@Override
|
||||
public void testErrorsInStartup() throws Exception {
|
||||
super.testErrorsInStartup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.TreeMap;
|
|||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
|
@ -57,16 +58,18 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||
private static SolrClient solrClient;
|
||||
|
||||
private static String getUsersFirstGroup() throws Exception {
|
||||
org.apache.hadoop.security.Groups hGroups =
|
||||
new org.apache.hadoop.security.Groups(new Configuration());
|
||||
String group = "*"; // accept any group if a group can't be found
|
||||
try {
|
||||
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
|
||||
if (g != null && g.size() > 0) {
|
||||
group = g.get(0);
|
||||
if (!Constants.WINDOWS) { // does not work on Windows!
|
||||
org.apache.hadoop.security.Groups hGroups =
|
||||
new org.apache.hadoop.security.Groups(new Configuration());
|
||||
try {
|
||||
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
|
||||
if (g != null && g.size() > 0) {
|
||||
group = g.get(0);
|
||||
}
|
||||
} catch (NullPointerException npe) {
|
||||
// if user/group doesn't exist on test box
|
||||
}
|
||||
} catch (NullPointerException npe) {
|
||||
// if user/group doesn't exist on test box
|
||||
}
|
||||
return group;
|
||||
}
|
||||
|
@ -92,6 +95,8 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||
|
||||
@BeforeClass
|
||||
public static void startup() throws Exception {
|
||||
assumeFalse("Hadoop does not work on Windows", Constants.WINDOWS);
|
||||
|
||||
System.setProperty("authenticationPlugin", HttpParamDelegationTokenPlugin.class.getName());
|
||||
System.setProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED, "true");
|
||||
|
||||
|
@ -151,7 +156,9 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||
miniCluster.shutdown();
|
||||
}
|
||||
miniCluster = null;
|
||||
solrClient.close();
|
||||
if (solrClient != null) {
|
||||
solrClient.close();
|
||||
}
|
||||
solrClient = null;
|
||||
System.clearProperty("authenticationPlugin");
|
||||
System.clearProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED);
|
||||
|
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.core;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Properties;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.handler.IndexFetcher;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class TestArbitraryIndexDir extends AbstractSolrTestCase {
|
||||
|
||||
@Rule
|
||||
public TestRule testRules = new SystemPropertiesRestoreRule();
|
||||
|
||||
// TODO: fix this test to not require FSDirectory
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() {
|
||||
// this test wants to start solr, and then open a separate indexwriter of its own on the same dir.
|
||||
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
|
||||
System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLoadNewIndexDir() throws IOException, ParserConfigurationException, SAXException {
|
||||
//add a doc in original index dir
|
||||
assertU(adoc("id", String.valueOf(1),
|
||||
"name", "name"+String.valueOf(1)));
|
||||
//create a new index dir and index.properties file
|
||||
File idxprops = new File(h.getCore().getDataDir() + IndexFetcher.INDEX_PROPERTIES);
|
||||
Properties p = new Properties();
|
||||
File newDir = new File(h.getCore().getDataDir() + "index_temp");
|
||||
newDir.mkdirs();
|
||||
p.put("index", newDir.getName());
|
||||
Writer os = null;
|
||||
try {
|
||||
os = new OutputStreamWriter(new FileOutputStream(idxprops), StandardCharsets.UTF_8);
|
||||
p.store(os, "index properties");
|
||||
} catch (Exception e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
"Unable to write " + IndexFetcher.INDEX_PROPERTIES, e);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(os);
|
||||
}
|
||||
|
||||
//add a doc in the new index dir
|
||||
Directory dir = newFSDirectory(newDir.toPath());
|
||||
IndexWriter iw = new IndexWriter(
|
||||
dir,
|
||||
new IndexWriterConfig(new StandardAnalyzer())
|
||||
);
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("id", "2", Field.Store.YES));
|
||||
doc.add(new TextField("name", "name2", Field.Store.YES));
|
||||
iw.addDocument(doc);
|
||||
iw.commit();
|
||||
iw.close();
|
||||
|
||||
//commit will cause searcher to open with the new index dir
|
||||
assertU(commit());h.getCoreContainer().reload(h.getCore().getName());
|
||||
//new index dir contains just 1 doc.
|
||||
assertQ("return doc with id 2",
|
||||
req("id:2"),
|
||||
"*[count(//doc)=1]"
|
||||
);
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -278,11 +278,11 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.LowerCaseFilter");
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.StopFilter");
|
||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
|
||||
assertEquals("Query has only one token", 1, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
|
||||
|
@ -311,7 +311,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.LowerCaseFilter");
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||
|
@ -320,7 +320,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.StopFilter");
|
||||
tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
|
||||
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
|
||||
|
|
|
@ -209,7 +209,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 10);
|
||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||
|
@ -222,7 +222,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
|
||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
|
||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.StopFilter");
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||
assertEquals(tokenList.size(), 8);
|
||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
|
||||
|
@ -258,12 +258,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.StopFilter");
|
||||
tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||
assertEquals(2, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
|
||||
|
@ -416,7 +416,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
|
||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
|
||||
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
||||
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||
assertEquals(6, tokenList.size());
|
||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));
|
||||
|
|
|
@ -0,0 +1,236 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.response.FacetField;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.junit.Before;
|
||||
|
||||
public class DistributedFacetExistsSmallTest extends BaseDistributedSearchTestCase {
|
||||
|
||||
public static final String FLD = "t_s";
|
||||
private int maxId;
|
||||
|
||||
public DistributedFacetExistsSmallTest() {
|
||||
}
|
||||
|
||||
@Before
|
||||
public void prepareIndex() throws Exception {
|
||||
del("*:*");
|
||||
|
||||
final Random rnd = random();
|
||||
index(id, maxId=rnd.nextInt(5), FLD, "AAA");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "B");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||
|
||||
final SolrClient shard0 = clients.get(0);
|
||||
// expectidly fails test
|
||||
//shard0.add(sdoc("id", 13, FLD, "DDD"));
|
||||
commit();
|
||||
|
||||
handle.clear();
|
||||
handle.put("QTime", SKIPVAL);
|
||||
handle.put("timestamp", SKIPVAL);
|
||||
handle.put("maxScore", SKIPVAL);
|
||||
handle.put("_version_", SKIPVAL);
|
||||
}
|
||||
|
||||
@ShardsFixed(num=4)
|
||||
public void test() throws Exception{
|
||||
checkBasicRequest();
|
||||
checkWithMinCountEqOne();
|
||||
checkWithSortCount();
|
||||
checkWithMethodSetPerField();
|
||||
|
||||
{
|
||||
// empty enum for checking npe
|
||||
final ModifiableSolrParams params = buildParams();
|
||||
params.remove("facet.exists");
|
||||
QueryResponse rsp = query(params);
|
||||
}
|
||||
|
||||
checkRandomParams();
|
||||
|
||||
checkInvalidMincount();
|
||||
}
|
||||
|
||||
private void checkRandomParams() throws Exception {
|
||||
final ModifiableSolrParams params = buildParams();
|
||||
Random rand = random();
|
||||
|
||||
if (rand.nextBoolean()) {
|
||||
int from;
|
||||
params.set("q", "["+(from = rand.nextInt(maxId/2))+
|
||||
" TO "+((from-1)+(rand.nextInt(maxId)))+"]");
|
||||
}
|
||||
|
||||
int offset = 0;
|
||||
int indexSize = 6;
|
||||
if (rand .nextInt(100) < 20) {
|
||||
if (rand.nextBoolean()) {
|
||||
offset = rand.nextInt(100) < 10 ? rand.nextInt(indexSize *2) : rand.nextInt(indexSize/3+1);
|
||||
}
|
||||
params.add("facet.offset", Integer.toString(offset));
|
||||
}
|
||||
|
||||
int limit = 100;
|
||||
if (rand.nextInt(100) < 20) {
|
||||
if (rand.nextBoolean()) {
|
||||
limit = rand.nextInt(100) < 10 ? rand.nextInt(indexSize/2+1) : rand.nextInt(indexSize*2);
|
||||
}
|
||||
params.add("facet.limit", Integer.toString(limit));
|
||||
}
|
||||
|
||||
if (rand.nextBoolean()) {
|
||||
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
||||
}
|
||||
|
||||
if ( rand.nextInt(100) < 20) {
|
||||
final String[] prefixes = new String[] {"A","B","C"};
|
||||
params.add("facet.prefix", prefixes[rand.nextInt(prefixes.length)]);
|
||||
}
|
||||
|
||||
if (rand.nextInt(100) < 20) {
|
||||
params.add("facet.missing", "true");
|
||||
}
|
||||
|
||||
if (rand.nextInt(100) < 20) { // assigning only valid vals
|
||||
params.add("facet.mincount", rand.nextBoolean() ? "0": "1" );
|
||||
}
|
||||
|
||||
final boolean shardRespondsWithMissingEvenLimitIsZero =
|
||||
params.getBool("facet.missing", false) && params.getInt("facet.limit", 100)==0;
|
||||
// skip miss count check, here cloud is different to non-distrib
|
||||
if (shardRespondsWithMissingEvenLimitIsZero ) {
|
||||
handle.put(null, SKIP);
|
||||
}
|
||||
query(params);
|
||||
if (shardRespondsWithMissingEvenLimitIsZero ) {
|
||||
handle.remove(null);
|
||||
}
|
||||
}
|
||||
|
||||
private void checkInvalidMincount() throws SolrServerException, IOException {
|
||||
final ModifiableSolrParams params = buildParams();
|
||||
if (random().nextBoolean()) {
|
||||
params.remove("facet.exists");
|
||||
params.set("f."+FLD+".facet.exists","true");
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
params.set("facet.mincount", ""+(2+random().nextInt(100)) );
|
||||
} else {
|
||||
params.set("f."+FLD+".facet.mincount", ""+(2+random().nextInt(100)) );
|
||||
}
|
||||
|
||||
try {
|
||||
if (random().nextBoolean()) {
|
||||
setDistributedParams(params);
|
||||
queryServer(params);
|
||||
} else {
|
||||
params.set("distrib", "false");
|
||||
controlClient.query(params);
|
||||
}
|
||||
fail();
|
||||
} catch(SolrException e) { // check that distr and single index search fail the same
|
||||
assertEquals(e.code(), ErrorCode.BAD_REQUEST.code);
|
||||
assertTrue(e.getMessage().contains("facet.exists"));
|
||||
assertTrue(e.getMessage().contains("facet.mincount"));
|
||||
assertTrue(e.getMessage().contains(FLD));
|
||||
}
|
||||
}
|
||||
|
||||
private void checkBasicRequest() throws Exception {
|
||||
final ModifiableSolrParams params = buildParams();
|
||||
QueryResponse rsp = query(params);
|
||||
assertResponse(rsp);
|
||||
}
|
||||
|
||||
private void checkWithMinCountEqOne() throws Exception {
|
||||
final ModifiableSolrParams params = buildParams("facet.mincount","1");
|
||||
QueryResponse rsp = query(params);
|
||||
assertResponse(rsp);
|
||||
}
|
||||
|
||||
private void checkWithSortCount() throws Exception {
|
||||
final ModifiableSolrParams params = buildParams("facet.sort","count");
|
||||
QueryResponse rsp = query(params);
|
||||
assertResponse(rsp);
|
||||
}
|
||||
|
||||
private void checkWithMethodSetPerField() throws Exception {
|
||||
final ModifiableSolrParams params = buildParams("f." + FLD + ".facet.exists", "true");
|
||||
params.remove("facet.exists");
|
||||
QueryResponse rsp = query(params);
|
||||
assertResponse(rsp);
|
||||
}
|
||||
|
||||
private ModifiableSolrParams buildParams(String... additionalParams) {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
||||
params.add("q", "*:*");
|
||||
params.add("rows", "0");
|
||||
//params.add("debugQuery", "true");
|
||||
params.add("facet", "true");
|
||||
params.add("sort", "id asc");
|
||||
|
||||
if(random().nextBoolean()){
|
||||
params.add("facet.method", "enum");
|
||||
}
|
||||
|
||||
params.add("facet.exists", "true");
|
||||
params.add("facet.field", FLD);
|
||||
for(int i = 0; i < additionalParams.length;) {
|
||||
params.add(additionalParams[i++], additionalParams[i++]);
|
||||
}
|
||||
return params;
|
||||
}
|
||||
|
||||
private void assertResponse(QueryResponse rsp) {
|
||||
final FacetField facetField = rsp.getFacetField(FLD);
|
||||
|
||||
assertThat(facetField.getValueCount(), is(6));
|
||||
final List<FacetField.Count> counts = facetField.getValues();
|
||||
for (FacetField.Count count : counts) {
|
||||
assertThat("Count for: " + count.getName(), count.getCount(), is(1L));
|
||||
}
|
||||
assertThat(counts.get(0).getName(), is("AAA"));
|
||||
assertThat(counts.get(1).getName(), is("B"));
|
||||
assertThat(counts.get(2).getName(), is("BB"));
|
||||
}
|
||||
}
|
|
@ -38,7 +38,6 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.util.TimeZoneUtils;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
import org.noggit.ObjectBuilder;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -494,11 +493,9 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
|
||||
ModifiableSolrParams params = params("q","*:*", "rows","0", "facet","true", "facet.field","{!key=myalias}"+field);
|
||||
|
||||
String[] methods = {null, "fc","enum","fcs", "uif"
|
||||
};
|
||||
String[] methods = {null, "fc","enum","fcs", "uif"};
|
||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||
methods = new String[]{null, "fc","enum", "uif"
|
||||
};
|
||||
methods = new String[]{null, "fc","enum", "uif"};
|
||||
}
|
||||
|
||||
prefixes = prefixes==null ? new String[]{null} : prefixes;
|
||||
|
@ -2017,6 +2014,49 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3");
|
||||
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100");
|
||||
doFacetPrefix("t_s", null, "", "facet.method", "fc");
|
||||
doFacetExistsPrefix("t_s", null, "");
|
||||
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "3");
|
||||
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "100");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFacetExistsShouldThrowExceptionForMincountGreaterThanOne () throws Exception {
|
||||
final String f = "t_s";
|
||||
final List<String> msg = Arrays.asList("facet.mincount", "facet.exists", f);
|
||||
Collections.shuffle(msg, random());
|
||||
assertQEx("checking global method or per field", msg.get(0),
|
||||
req("q", "id:[* TO *]"
|
||||
,"indent","on"
|
||||
,"facet","true"
|
||||
, random().nextBoolean() ? "facet.exists": "f."+f+".facet.exists", "true"
|
||||
,"facet.field", f
|
||||
, random().nextBoolean() ? "facet.mincount" : "f."+f+".facet.mincount" ,
|
||||
"" + (2+random().nextInt(Integer.MAX_VALUE-2))
|
||||
)
|
||||
, ErrorCode.BAD_REQUEST);
|
||||
|
||||
assertQ("overriding per field",
|
||||
req("q", "id:[* TO *]"
|
||||
,"indent","on"
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"f."+f+".facet.exists", "false"
|
||||
,"facet.field", f
|
||||
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
|
||||
),
|
||||
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
|
||||
|
||||
assertQ("overriding per field",
|
||||
req("q", "id:[* TO *]"
|
||||
,"indent","on"
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", f
|
||||
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
|
||||
,"f."+f+".facet.mincount", random().nextBoolean() ? "0":"1"
|
||||
),
|
||||
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
|
||||
|
||||
}
|
||||
|
||||
static void indexFacetPrefixSingleValued() {
|
||||
|
@ -2037,7 +2077,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
|
||||
//@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
|
||||
public void testFacetContainsUif() {
|
||||
doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "uif");
|
||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "uif", "facet.contains", "Ast");
|
||||
|
@ -2063,6 +2103,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true");
|
||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true");
|
||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true");
|
||||
doFacetExistsPrefix("contains_s1", null, "Astra", "facet.contains", "Ast");
|
||||
}
|
||||
|
||||
static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) {
|
||||
|
@ -2313,6 +2354,239 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
public void doFacetExistsPrefix(String f, String local, String termSuffix, String... params) {
|
||||
String indent="on";
|
||||
String pre = "//lst[@name='"+f+"']";
|
||||
String lf = local==null ? f : local+f;
|
||||
|
||||
assertQ("test field facet.method",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent", indent
|
||||
,"facet", "true"
|
||||
,"f."+lf+".facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount", "0"
|
||||
,"facet.offset", "0"
|
||||
,"facet.limit", "100"
|
||||
,"facet.sort", "count"
|
||||
,"facet.prefix", "B"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix middle, exact match first term",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","B"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","index"
|
||||
,"facet.prefix","B"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix middle, paging",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","1"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","B"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='BBB"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix middle, paging",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","1"
|
||||
,"facet.limit","1"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","B"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix end, not exact match",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","C"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix end, exact match",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","CC"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
|
||||
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix past end",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","X"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix past end",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","1"
|
||||
,"facet.limit","-1"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","X"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix at start, exact match",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","AAA"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
|
||||
);
|
||||
assertQ("test facet.prefix at Start, not exact match",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","AA"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
|
||||
);
|
||||
assertQ("test facet.prefix before start",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","0"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","999"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||
);
|
||||
|
||||
assertQ("test facet.prefix before start",
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","0"
|
||||
,"facet.offset","2"
|
||||
,"facet.limit","100"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","999"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||
);
|
||||
|
||||
// test offset beyond what is collected internally in queue
|
||||
assertQ(
|
||||
req(params, "q", "id:[* TO *]"
|
||||
,"indent",indent
|
||||
,"facet","true"
|
||||
,"facet.exists", "true"
|
||||
,"facet.field", lf
|
||||
,"facet.mincount","1"
|
||||
,"facet.offset","5"
|
||||
,"facet.limit","10"
|
||||
,"facet.sort","count"
|
||||
,"facet.prefix","CC"
|
||||
)
|
||||
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||
);
|
||||
}
|
||||
|
||||
public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) {
|
||||
String indent="on";
|
||||
String pre = "//lst[@name='"+f+"']";
|
||||
|
|
|
@ -263,6 +263,23 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
@Test // https://issues.apache.org/jira/browse/SOLR-9494
|
||||
public void testNeedsScoreBugFixed() throws Exception {
|
||||
String[] doc = {"id","1", "group_s", "xyz", "text_ws", "hello xxx world"};
|
||||
assertU(adoc(doc));
|
||||
assertU(commit());
|
||||
|
||||
ModifiableSolrParams params = params(
|
||||
"q", "{!surround df=text_ws} 2W(hello, world)", // a SpanQuery that matches
|
||||
"fq", "{!collapse field=group_s}", // collapse on some field
|
||||
// note: rows= whatever; doesn't matter
|
||||
"facet", "true", // facet on something
|
||||
"facet.field", "group_s"
|
||||
);
|
||||
assertQ(req(params));
|
||||
assertQ(req(params)); // fails *second* time!
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeBoost() throws Exception {
|
||||
|
||||
|
|
|
@ -40,20 +40,16 @@ import org.apache.http.message.BasicHeader;
|
|||
import org.apache.http.util.EntityUtils;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
||||
import org.apache.solr.cloud.TestMiniSolrCloudClusterBase;
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.Base64;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
|
@ -61,50 +57,50 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.util.SolrCLI;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.SolrTestCaseJ4.getHttpSolrClient;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
|
||||
public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
||||
public class BasicAuthIntegrationTest extends SolrCloudTestCase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
@Override
|
||||
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader,
|
||||
CloudSolrClient cloudSolrClient, String defaultCollName) throws Exception {
|
||||
private static final String COLLECTION = "authCollection";
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
configureCluster(3)
|
||||
.addConfig("conf", configset("cloud-minimal"))
|
||||
.configure();
|
||||
|
||||
CollectionAdminRequest.createCollection(COLLECTION, "conf", 3, 1).process(cluster.getSolrClient());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasicAuth() throws Exception {
|
||||
|
||||
String authcPrefix = "/admin/authentication";
|
||||
String authzPrefix = "/admin/authorization";
|
||||
|
||||
String old = cloudSolrClient.getDefaultCollection();
|
||||
cloudSolrClient.setDefaultCollection(null);
|
||||
|
||||
NamedList<Object> rsp;
|
||||
HttpClient cl = null;
|
||||
try {
|
||||
cl = HttpClientUtil.createClient(null);
|
||||
String baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
||||
|
||||
JettySolrRunner randomJetty = cluster.getRandomJetty(random());
|
||||
String baseUrl = randomJetty.getBaseUrl().toString();
|
||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20);
|
||||
zkClient.setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
|
||||
zkClient().setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
|
||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
||||
|
||||
boolean found = false;
|
||||
for (JettySolrRunner jettySolrRunner : miniCluster.getJettySolrRunners()) {
|
||||
if(baseUrl.contains(String.valueOf(jettySolrRunner.getLocalPort()))){
|
||||
found = true;
|
||||
jettySolrRunner.stop();
|
||||
jettySolrRunner.start();
|
||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assertTrue("No server found to restart , looking for : "+baseUrl , found);
|
||||
randomJetty.stop();
|
||||
randomJetty.start(false);
|
||||
baseUrl = randomJetty.getBaseUrl().toString();
|
||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
||||
|
||||
String command = "{\n" +
|
||||
"'set-user': {'harry':'HarryIsCool'}\n" +
|
||||
|
@ -112,11 +108,12 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
|
||||
GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams());
|
||||
genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), "")));
|
||||
try {
|
||||
cloudSolrClient.request(genericReq);
|
||||
fail("Should have failed with a 401");
|
||||
} catch (HttpSolrClient.RemoteSolrException e) {
|
||||
}
|
||||
|
||||
HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class, () -> {
|
||||
cluster.getSolrClient().request(genericReq);
|
||||
});
|
||||
assertEquals(401, exp.code());
|
||||
|
||||
command = "{\n" +
|
||||
"'set-user': {'harry':'HarryIsUberCool'}\n" +
|
||||
"}";
|
||||
|
@ -130,7 +127,8 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
int statusCode = r.getStatusLine().getStatusCode();
|
||||
Utils.consumeFully(r.getEntity());
|
||||
assertEquals("proper_cred sent, but access denied", 200, statusCode);
|
||||
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
||||
|
||||
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||
|
||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20);
|
||||
command = "{\n" +
|
||||
|
@ -139,7 +137,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
|
||||
executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks");
|
||||
|
||||
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
||||
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20);
|
||||
|
||||
executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap
|
||||
|
@ -153,7 +151,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" );
|
||||
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20);
|
||||
|
||||
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(defaultCollName);
|
||||
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(COLLECTION);
|
||||
|
||||
try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) {
|
||||
try {
|
||||
|
@ -170,18 +168,17 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
|
||||
}
|
||||
}
|
||||
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
|
||||
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
|
||||
.setBasicAuthCredentials("harry", "HarryIsUberCool"));
|
||||
|
||||
try {
|
||||
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
|
||||
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
|
||||
.setBasicAuthCredentials("harry", "Cool12345"));
|
||||
fail("This should not succeed");
|
||||
} catch (HttpSolrClient.RemoteSolrException e) {
|
||||
|
||||
}
|
||||
|
||||
cloudSolrClient.setDefaultCollection(old);
|
||||
executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool");
|
||||
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
@ -190,7 +187,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||
update.setBasicAuthCredentials("harry","HarryIsUberCool");
|
||||
update.add(doc);
|
||||
update.setCommitWithin(100);
|
||||
cloudSolrClient.request(update);
|
||||
cluster.getSolrClient().request(update, COLLECTION);
|
||||
|
||||
|
||||
executeCommand(baseUrl + authcPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool");
|
||||
|
|
|
@ -116,23 +116,7 @@
|
|||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
||||
|
||||
|
||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
||||
you can safely remove this section. -->
|
||||
<!-- The root directory that collection data should be written to. -->
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
|
|
|
@ -119,23 +119,7 @@
|
|||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
||||
|
||||
|
||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
||||
you can safely remove this section. -->
|
||||
<!-- The root directory that collection data should be written to. -->
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
|
|
|
@ -116,23 +116,7 @@
|
|||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
||||
|
||||
|
||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
||||
you can safely remove this section. -->
|
||||
<!-- The root directory that collection data should be written to. -->
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
|
|
|
@ -116,23 +116,7 @@
|
|||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
||||
|
||||
|
||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
||||
you can safely remove this section. -->
|
||||
<!-- The root directory that collection data should be written to. -->
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
|
|
|
@ -117,23 +117,7 @@
|
|||
persistent, and doesn't work with replication.
|
||||
-->
|
||||
<directoryFactory name="DirectoryFactory"
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
||||
|
||||
|
||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
||||
you can safely remove this section. -->
|
||||
<!-- The root directory that collection data should be written to. -->
|
||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
||||
<!-- Enable/Disable the hdfs cache. -->
|
||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
||||
|
||||
</directoryFactory>
|
||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||
|
||||
<!-- The CodecFactory for defining the format of the inverted index.
|
||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||
|
|
|
@ -81,7 +81,16 @@ public class Slice extends ZkNodeProps implements Iterable<Replica> {
|
|||
* shard in that state still receives update requests from the parent shard
|
||||
* leader, however does not participate in distributed search.
|
||||
*/
|
||||
RECOVERY;
|
||||
RECOVERY,
|
||||
|
||||
/**
|
||||
* Sub-shards of a split shard are put in that state when the split is deemed failed
|
||||
* by the overseer even though all replicas are active because either the leader node is
|
||||
* no longer live or has a different ephemeral owner (zk session id). Such conditions can potentially
|
||||
* lead to data loss. See SOLR-9438 for details. A shard in that state will neither receive
|
||||
* update requests from the parent shard leader, nor participate in distributed search.
|
||||
*/
|
||||
RECOVERY_FAILED;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
|
|
|
@ -32,6 +32,7 @@ import java.nio.charset.StandardCharsets;
|
|||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.RejectedExecutionException;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
@ -263,7 +264,14 @@ public class SolrZkClient implements Closeable {
|
|||
@Override
|
||||
public void process(final WatchedEvent event) {
|
||||
log.debug("Submitting job to respond to event " + event);
|
||||
zkCallbackExecutor.submit(() -> watcher.process(event));
|
||||
try {
|
||||
zkCallbackExecutor.submit(() -> watcher.process(event));
|
||||
} catch (RejectedExecutionException e) {
|
||||
// If not a graceful shutdown
|
||||
if (!isClosed()) {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -185,6 +185,14 @@ public interface FacetParams {
|
|||
* only use the filterCache for terms with a df >= to this parameter.
|
||||
*/
|
||||
public static final String FACET_ENUM_CACHE_MINDF = FACET + ".enum.cache.minDf";
|
||||
|
||||
/**
|
||||
* A boolean parameter that caps the facet counts at 1.
|
||||
* With this set, a returned count will only be 0 or 1.
|
||||
* For apps that don't need the count, this should be an optimization
|
||||
*/
|
||||
public static final String FACET_EXISTS = FACET+".exists";
|
||||
|
||||
/**
|
||||
* Any field whose terms the user wants to enumerate over for
|
||||
* Facet Contraint Counts (multi-value)
|
||||
|
|
|
@ -182,12 +182,15 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
|
|||
// test a second query, test making a copy of the main query
|
||||
SolrQuery query2 = query.getCopy();
|
||||
query2.addFilterQuery("inStock:true");
|
||||
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
|
||||
response = client.query( query2 );
|
||||
Assert.assertEquals(1, query2.getFilterQueries().length);
|
||||
Assert.assertEquals(0, response.getStatus());
|
||||
Assert.assertEquals(2, response.getResults().getNumFound() );
|
||||
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
|
||||
|
||||
for (SolrDocument outDoc : response.getResults()) {
|
||||
assertEquals(true, outDoc.getFieldValue("inStock"));
|
||||
}
|
||||
|
||||
// sanity check round tripping of params...
|
||||
query = new SolrQuery("foo");
|
||||
query.addFilterQuery("{!field f=inStock}true");
|
||||
|
|
|
@ -175,33 +175,36 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
|
||||
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
|
||||
List<Object> matchObj = generateAllDataTypes();
|
||||
|
||||
assertEquals(unmarshaledObj.size(), matchObj.size());
|
||||
for(int i=0; i < unmarshaledObj.size(); i++) {
|
||||
|
||||
if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
|
||||
byte[] b1 = (byte[]) unmarshaledObj.get(i);
|
||||
byte[] b2 = (byte[]) matchObj.get(i);
|
||||
assertTrue(Arrays.equals(b1, b2));
|
||||
} else if(unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument ) {
|
||||
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if(unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList ) {
|
||||
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if(unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
|
||||
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if(unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
|
||||
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else {
|
||||
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
|
||||
}
|
||||
|
||||
}
|
||||
compareObjects(unmarshaledObj, matchObj);
|
||||
} catch (IOException e) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void compareObjects(List unmarshaledObj, List matchObj) {
|
||||
assertEquals(unmarshaledObj.size(), matchObj.size());
|
||||
for (int i = 0; i < unmarshaledObj.size(); i++) {
|
||||
|
||||
if (unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
|
||||
byte[] b1 = (byte[]) unmarshaledObj.get(i);
|
||||
byte[] b2 = (byte[]) matchObj.get(i);
|
||||
assertTrue(Arrays.equals(b1, b2));
|
||||
} else if (unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument) {
|
||||
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if (unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList) {
|
||||
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if (unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
|
||||
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else if (unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
|
||||
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
|
||||
} else {
|
||||
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBackCompatForSolrDocumentWithChildDocs() throws IOException {
|
||||
JavaBinCodec javabin = new JavaBinCodec(){
|
||||
|
@ -267,14 +270,33 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testResponseChildDocuments() throws IOException {
|
||||
public void testAllTypes() throws IOException {
|
||||
List<Object> obj = generateAllDataTypes();
|
||||
compareObjects(
|
||||
(List) getObject(getBytes(obj)),
|
||||
(List) obj
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
private static Object serializeAndDeserialize(Object o) throws IOException {
|
||||
return getObject(getBytes(o));
|
||||
}
|
||||
private static byte[] getBytes(Object o) throws IOException {
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
javabin.marshal(generateSolrDocumentWithChildDocs(), baos);
|
||||
javabin.marshal(o, baos);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
SolrDocument result = (SolrDocument) javabin.unmarshal(new ByteArrayInputStream(baos.toByteArray()));
|
||||
private static Object getObject(byte[] bytes) throws IOException {
|
||||
return new JavaBinCodec().unmarshal(new ByteArrayInputStream(bytes));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testResponseChildDocuments() throws IOException {
|
||||
SolrDocument result = (SolrDocument) serializeAndDeserialize(generateSolrDocumentWithChildDocs());
|
||||
assertEquals(2, result.size());
|
||||
assertEquals("1", result.getFieldValue("id"));
|
||||
assertEquals("parentDocument", result.getFieldValue("subject"));
|
||||
|
@ -305,13 +327,11 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testStringCaching() throws Exception {
|
||||
Map<String, Object> m = Utils.makeMap("key1", "val1", "key2", "val2");
|
||||
byte[] b1 = getBytes(m);//copy 1
|
||||
byte[] b2 = getBytes(m);//copy 2
|
||||
Map m1 = (Map) getObject(b1);
|
||||
Map m2 = (Map) getObject(b1);
|
||||
|
||||
ByteArrayOutputStream os1 = new ByteArrayOutputStream();
|
||||
new JavaBinCodec().marshal(m, os1);
|
||||
Map m1 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os1.toByteArray()));
|
||||
ByteArrayOutputStream os2 = new ByteArrayOutputStream();
|
||||
new JavaBinCodec().marshal(m, os2);
|
||||
Map m2 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os2.toByteArray()));
|
||||
List l1 = new ArrayList<>(m1.keySet());
|
||||
List l2 = new ArrayList<>(m2.keySet());
|
||||
|
||||
|
@ -346,8 +366,8 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
});
|
||||
|
||||
|
||||
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os1.toByteArray()));
|
||||
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os2.toByteArray()));
|
||||
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b1));
|
||||
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b2));
|
||||
l1 = new ArrayList<>(m1.keySet());
|
||||
l2 = new ArrayList<>(m2.keySet());
|
||||
assertTrue(l1.get(0).equals(l2.get(0)));
|
||||
|
@ -359,26 +379,19 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
public void genBinaryFiles() throws IOException {
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
|
||||
|
||||
Object data = generateAllDataTypes();
|
||||
|
||||
javabin.marshal(data, os);
|
||||
byte[] out = os.toByteArray();
|
||||
byte[] out = getBytes(data);
|
||||
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
|
||||
BufferedOutputStream bos = new BufferedOutputStream(fs);
|
||||
bos.write(out);
|
||||
bos.close();
|
||||
|
||||
//Binary file with child documents
|
||||
javabin = new JavaBinCodec();
|
||||
SolrDocument sdoc = generateSolrDocumentWithChildDocs();
|
||||
os = new ByteArrayOutputStream();
|
||||
javabin.marshal(sdoc, os);
|
||||
fs = new FileOutputStream(new File(BIN_FILE_LOCATION_CHILD_DOCS));
|
||||
bos = new BufferedOutputStream(fs);
|
||||
bos.write(os.toByteArray());
|
||||
bos.write(getBytes(sdoc));
|
||||
bos.close();
|
||||
|
||||
}
|
||||
|
@ -553,12 +566,7 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||
sdoc.put("some_boolean", ""+r.nextBoolean());
|
||||
sdoc.put("another_boolean", ""+r.nextBoolean());
|
||||
|
||||
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
javabin.marshal(sdoc, os);
|
||||
os.toByteArray();
|
||||
buffers[bufnum] = os.toByteArray();
|
||||
buffers[bufnum] = getBytes(sdoc);
|
||||
}
|
||||
|
||||
int ret = 0;
|
||||
|
|
|
@ -51,9 +51,6 @@
|
|||
|
||||
<!-- redefine the clover setup, because we dont want to run clover for the test-framework -->
|
||||
<target name="-clover.setup" if="run.clover"/>
|
||||
|
||||
<!-- redefine the test compilation, so it's just a no-op -->
|
||||
<target name="compile-test"/>
|
||||
|
||||
<!-- redefine the forbidden apis for tests, as we check ourselves -->
|
||||
<target name="-check-forbidden-tests" depends="-init-forbidden-apis,compile-core">
|
||||
|
|
|
@ -374,7 +374,7 @@ public class MiniSolrCloudCluster {
|
|||
* @throws Exception on error
|
||||
*/
|
||||
public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
||||
jetty.start();
|
||||
jetty.start(false);
|
||||
jettys.add(jetty);
|
||||
return jetty;
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.List;
|
|||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
|
||||
|
@ -143,6 +144,10 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||
/** The cluster */
|
||||
protected static MiniSolrCloudCluster cluster;
|
||||
|
||||
protected SolrZkClient zkClient() {
|
||||
return cluster.getSolrClient().getZkStateReader().getZkClient();
|
||||
}
|
||||
|
||||
/**
|
||||
* Call this to configure a cluster of n nodes.
|
||||
*
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
# Logging level
|
||||
log4j.rootLogger=INFO, CONSOLE
|
||||
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.CONSOLE.Target=System.err
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
|
||||
log4j.logger.org.apache.zookeeper=WARN
|
||||
log4j.logger.org.apache.hadoop=WARN
|
||||
log4j.logger.org.apache.directory=WARN
|
||||
log4j.logger.org.apache.solr.hadoop=INFO
|
|
@ -0,0 +1,66 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.core.IsNot.not;
|
||||
|
||||
public class JettySolrRunnerTest extends SolrTestCaseJ4 {
|
||||
|
||||
@Test
|
||||
public void testRestartPorts() throws Exception {
|
||||
|
||||
Path solrHome = createTempDir();
|
||||
Files.write(solrHome.resolve("solr.xml"), MiniSolrCloudCluster.DEFAULT_CLOUD_SOLR_XML.getBytes(Charset.defaultCharset()));
|
||||
|
||||
JettyConfig config = JettyConfig.builder().build();
|
||||
|
||||
JettySolrRunner jetty = new JettySolrRunner(solrHome.toString(), config);
|
||||
try {
|
||||
jetty.start();
|
||||
|
||||
URL url = jetty.getBaseUrl();
|
||||
int usedPort = url.getPort();
|
||||
|
||||
jetty.stop();
|
||||
jetty.start();
|
||||
|
||||
assertEquals("After restart, jetty port should be the same", usedPort, jetty.getBaseUrl().getPort());
|
||||
|
||||
jetty.stop();
|
||||
jetty.start(false);
|
||||
|
||||
assertThat("After restart, jetty port should be different", jetty.getBaseUrl().getPort(), not(usedPort));
|
||||
}
|
||||
finally {
|
||||
if (jetty.isRunning())
|
||||
jetty.stop();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.RuleChain;
|
||||
import org.junit.rules.TestRule;
|
||||
|
||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
|
||||
public class MiniSolrCloudClusterTest extends LuceneTestCase {
|
||||
|
||||
@ClassRule
|
||||
public static TestRule solrClassRules = RuleChain.outerRule(
|
||||
new SystemPropertiesRestoreRule()).around(
|
||||
new RevertDefaultThreadHandlerRule());
|
||||
|
||||
@Test
|
||||
public void testErrorsInStartup() throws Exception {
|
||||
|
||||
AtomicInteger jettyIndex = new AtomicInteger();
|
||||
|
||||
MiniSolrCloudCluster cluster = null;
|
||||
try {
|
||||
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||
@Override
|
||||
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
|
||||
if (jettyIndex.incrementAndGet() != 2)
|
||||
return super.startJettySolrRunner(name, context, config);
|
||||
throw new IOException("Fake exception on startup!");
|
||||
}
|
||||
};
|
||||
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
|
||||
}
|
||||
catch (Exception e) {
|
||||
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
|
||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
|
||||
}
|
||||
finally {
|
||||
if (cluster != null)
|
||||
cluster.shutdown();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testErrorsInShutdown() throws Exception {
|
||||
|
||||
AtomicInteger jettyIndex = new AtomicInteger();
|
||||
|
||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||
@Override
|
||||
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
||||
JettySolrRunner j = super.stopJettySolrRunner(jetty);
|
||||
if (jettyIndex.incrementAndGet() == 2)
|
||||
throw new IOException("Fake IOException on shutdown!");
|
||||
return j;
|
||||
}
|
||||
};
|
||||
|
||||
try {
|
||||
cluster.shutdown();
|
||||
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
|
||||
}
|
||||
catch (Exception e) {
|
||||
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
|
||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtraFilters() throws Exception {
|
||||
JettyConfig.Builder jettyConfig = JettyConfig.builder();
|
||||
jettyConfig.waitForLoadingCoresToFinish(null);
|
||||
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
|
||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(random().nextInt(3) + 1, createTempDir(), jettyConfig.build());
|
||||
cluster.shutdown();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue