mirror of
https://github.com/apache/lucene.git
synced 2025-02-13 13:35:37 +00:00
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
652065e14e
@ -7,6 +7,8 @@
|
|||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
<sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
<orderEntry type="sourceFolder" forTests="false" />
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
@ -61,6 +61,11 @@
|
|||||||
<directory>${module-path}/src/resources</directory>
|
<directory>${module-path}/src/resources</directory>
|
||||||
</resource>
|
</resource>
|
||||||
</resources>
|
</resources>
|
||||||
|
<testResources>
|
||||||
|
<testResource>
|
||||||
|
<directory>${module-path}/src/test-files</directory>
|
||||||
|
</testResource>
|
||||||
|
</testResources>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
@ -86,16 +86,24 @@ def check_url_list(lst):
|
|||||||
if mirror_contains_file(url):
|
if mirror_contains_file(url):
|
||||||
p('.')
|
p('.')
|
||||||
else:
|
else:
|
||||||
p('X')
|
p('\nFAIL: ' + url + '\n' if args.details else 'X')
|
||||||
ret.append(url)
|
ret.append(url)
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Checks that all Lucene mirrors contain a copy of a release')
|
desc = 'Periodically checks that all Lucene/Solr mirrors contain either a copy of a release or a specified path'
|
||||||
parser.add_argument('-version', '-v', help='Lucene version to check', required=True)
|
parser = argparse.ArgumentParser(description=desc)
|
||||||
parser.add_argument('-interval', '-i', help='seconds to wait to query again pending mirrors', type=int, default=300)
|
parser.add_argument('-version', '-v', help='Lucene/Solr version to check')
|
||||||
|
parser.add_argument('-path', '-p', help='instead of a versioned release, check for some/explicit/path')
|
||||||
|
parser.add_argument('-interval', '-i', help='seconds to wait before re-querying mirrors', type=int, default=300)
|
||||||
|
parser.add_argument('-details', '-d', help='print missing mirror URLs', action='store_true', default=False)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if (args.version is None and args.path is None) \
|
||||||
|
or (args.version is not None and args.path is not None):
|
||||||
|
p('You must specify either -version or -path but not both!\n')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
conn = http.HTTPConnection('www.apache.org')
|
conn = http.HTTPConnection('www.apache.org')
|
||||||
conn.request('GET', '/mirrors/')
|
conn.request('GET', '/mirrors/')
|
||||||
@ -105,8 +113,8 @@ except Exception as e:
|
|||||||
p('Unable to fetch the Apache mirrors list!\n')
|
p('Unable to fetch the Apache mirrors list!\n')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
apache_path = 'lucene/java/{}/changes/Changes.html'.format(args.version);
|
mirror_path = args.path if args.path is not None else 'lucene/java/{}/changes/Changes.html'.format(args.version)
|
||||||
maven_url = 'http://repo1.maven.org/maven2/' \
|
maven_url = None if args.version is None else 'http://repo1.maven.org/maven2/' \
|
||||||
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
|
'org/apache/lucene/lucene-core/{0}/lucene-core-{0}.pom.asc'.format(args.version)
|
||||||
maven_available = False
|
maven_available = False
|
||||||
|
|
||||||
@ -119,18 +127,19 @@ for match in re.finditer('<TR>(.*?)</TR>', str(html), re.MULTILINE | re.IGNORECA
|
|||||||
|
|
||||||
match = re.search('<A\s+HREF\s*=\s*"([^"]+)"\s*>', row, re.MULTILINE | re.IGNORECASE)
|
match = re.search('<A\s+HREF\s*=\s*"([^"]+)"\s*>', row, re.MULTILINE | re.IGNORECASE)
|
||||||
if match:
|
if match:
|
||||||
pending_mirrors.append(match.group(1) + apache_path)
|
pending_mirrors.append(match.group(1) + mirror_path)
|
||||||
|
|
||||||
total_mirrors = len(pending_mirrors)
|
total_mirrors = len(pending_mirrors)
|
||||||
|
|
||||||
|
label = args.version if args.version is not None else args.path
|
||||||
while True:
|
while True:
|
||||||
p('\n' + str(datetime.datetime.now()))
|
p('\n{:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()))
|
||||||
p('\nPolling {} Apache Mirrors'.format(len(pending_mirrors)))
|
p('\nPolling {} Apache Mirrors'.format(len(pending_mirrors)))
|
||||||
if not maven_available:
|
if maven_url is not None and not maven_available:
|
||||||
p(' and Maven Central')
|
p(' and Maven Central')
|
||||||
p('...\n')
|
p('...\n')
|
||||||
|
|
||||||
if not maven_available:
|
if maven_url is not None and not maven_available:
|
||||||
maven_available = mirror_contains_file(maven_url)
|
maven_available = mirror_contains_file(maven_url)
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
@ -140,14 +149,14 @@ while True:
|
|||||||
|
|
||||||
available_mirrors = total_mirrors - len(pending_mirrors)
|
available_mirrors = total_mirrors - len(pending_mirrors)
|
||||||
|
|
||||||
p('\n\n{} is{}downloadable from Maven Central\n'.format(args.version, maven_available and ' ' or ' not '))
|
if maven_url is not None:
|
||||||
p('{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'.format(args.version, available_mirrors,
|
p('\n\n{} is{}downloadable from Maven Central'.format(label, ' ' if maven_available else ' not '))
|
||||||
total_mirrors,
|
p('\n{} is downloadable from {}/{} Apache Mirrors ({:.2f}%)\n'
|
||||||
available_mirrors * 100 / total_mirrors))
|
.format(label, available_mirrors, total_mirrors, available_mirrors * 100 / total_mirrors))
|
||||||
if len(pending_mirrors) == 0:
|
if len(pending_mirrors) == 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
if remaining > 0:
|
if remaining > 0:
|
||||||
p('Sleeping for {} seconds...\n'.format(remaining))
|
p('Sleeping for {:d} seconds...\n'.format(int(remaining + 0.5)))
|
||||||
time.sleep(remaining)
|
time.sleep(remaining)
|
||||||
|
|
||||||
|
@ -36,7 +36,36 @@ Other
|
|||||||
|
|
||||||
======================= Lucene 6.3.0 =======================
|
======================= Lucene 6.3.0 =======================
|
||||||
|
|
||||||
(No changes)
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-7436: MinHashFilter's constructor, and some of its default
|
||||||
|
settings, should be public. (Doug Turnbull via Mike McCandless)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
|
||||||
|
* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
|
||||||
|
trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
|
||||||
|
term. (Thomas Kappler via David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-7440: Document id skipping (PostingsEnum.advance) could throw an
|
||||||
|
ArrayIndexOutOfBoundsException exception on large index segments (>1.8B docs)
|
||||||
|
with large skips. (yonik)
|
||||||
|
|
||||||
|
* LUCENE-7442: MinHashFilter's ctor should validate its args.
|
||||||
|
(Cao Manh Dat via Steve Rowe)
|
||||||
|
|
||||||
|
* LUCENE-7318: Fix backwards compatibility issues around StandardAnalyzer
|
||||||
|
and its components, introduced with Lucene 6.2.0. The moved classes
|
||||||
|
were restored in their original packages: LowercaseFilter and StopFilter,
|
||||||
|
as well as several utility classes. (Uwe Schindler, Mike McCandless)
|
||||||
|
|
||||||
|
Improvements
|
||||||
|
|
||||||
|
Optimizations
|
||||||
|
|
||||||
|
Other
|
||||||
|
|
||||||
======================= Lucene 6.2.0 =======================
|
======================= Lucene 6.2.0 =======================
|
||||||
|
|
||||||
@ -632,6 +661,9 @@ Other
|
|||||||
* LUCENE-7095: Add point values support to the numeric field query time join.
|
* LUCENE-7095: Add point values support to the numeric field query time join.
|
||||||
(Martijn van Groningen, Mike McCandless)
|
(Martijn van Groningen, Mike McCandless)
|
||||||
|
|
||||||
|
======================= Lucene 5.5.3 =======================
|
||||||
|
(No Changes)
|
||||||
|
|
||||||
======================= Lucene 5.5.2 =======================
|
======================= Lucene 5.5.2 =======================
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
@ -0,0 +1,41 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.core;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalizes token text to lower case.
|
||||||
|
* <p>
|
||||||
|
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
|
||||||
|
* is preserved for documentation purposes and consistency with filter factory.
|
||||||
|
* @see org.apache.lucene.analysis.LowerCaseFilter
|
||||||
|
* @see LowerCaseFilterFactory
|
||||||
|
*/
|
||||||
|
public final class LowerCaseFilter extends org.apache.lucene.analysis.LowerCaseFilter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new LowerCaseFilter, that normalizes token text to lower case.
|
||||||
|
*
|
||||||
|
* @param in TokenStream to filter
|
||||||
|
*/
|
||||||
|
public LowerCaseFilter(TokenStream in) {
|
||||||
|
super(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -20,7 +20,6 @@ package org.apache.lucene.analysis.core;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
|
||||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
|
@ -0,0 +1,47 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.analysis.core;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes stop words from a token stream.
|
||||||
|
* <p>
|
||||||
|
* This class moved to Lucene Core, but a reference in the {@code analysis/common} module
|
||||||
|
* is preserved for documentation purposes and consistency with filter factory.
|
||||||
|
* @see org.apache.lucene.analysis.StopFilter
|
||||||
|
* @see StopFilterFactory
|
||||||
|
*/
|
||||||
|
public final class StopFilter extends org.apache.lucene.analysis.StopFilter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a filter which removes words from the input TokenStream that are
|
||||||
|
* named in the Set.
|
||||||
|
*
|
||||||
|
* @param in
|
||||||
|
* Input stream
|
||||||
|
* @param stopWords
|
||||||
|
* A {@link CharArraySet} representing the stopwords.
|
||||||
|
* @see #makeStopSet(java.lang.String...)
|
||||||
|
*/
|
||||||
|
public StopFilter(TokenStream in, CharArraySet stopWords) {
|
||||||
|
super(in, stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.WordlistLoader; // jdocs
|
import org.apache.lucene.analysis.WordlistLoader; // jdocs
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||||
|
@ -49,11 +49,11 @@ public class MinHashFilter extends TokenFilter {
|
|||||||
|
|
||||||
private static final LongPair[] cachedIntHashes = new LongPair[HASH_CACHE_SIZE];
|
private static final LongPair[] cachedIntHashes = new LongPair[HASH_CACHE_SIZE];
|
||||||
|
|
||||||
static final int DEFAULT_HASH_COUNT = 1;
|
public static final int DEFAULT_HASH_COUNT = 1;
|
||||||
|
|
||||||
static final int DEFAULT_HASH_SET_SIZE = 1;
|
public static final int DEFAULT_HASH_SET_SIZE = 1;
|
||||||
|
|
||||||
static final int DEFAULT_BUCKET_COUNT = 512;
|
public static final int DEFAULT_BUCKET_COUNT = 512;
|
||||||
|
|
||||||
static final String MIN_HASH_TYPE = "MIN_HASH";
|
static final String MIN_HASH_TYPE = "MIN_HASH";
|
||||||
|
|
||||||
@ -112,8 +112,17 @@ public class MinHashFilter extends TokenFilter {
|
|||||||
* @param hashSetSize the no. of min hashes to keep
|
* @param hashSetSize the no. of min hashes to keep
|
||||||
* @param withRotation whether rotate or not hashes while incrementing tokens
|
* @param withRotation whether rotate or not hashes while incrementing tokens
|
||||||
*/
|
*/
|
||||||
MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
|
public MinHashFilter(TokenStream input, int hashCount, int bucketCount, int hashSetSize, boolean withRotation) {
|
||||||
super(input);
|
super(input);
|
||||||
|
if (hashCount <= 0) {
|
||||||
|
throw new IllegalArgumentException("hashCount must be greater than zero");
|
||||||
|
}
|
||||||
|
if (bucketCount <= 0) {
|
||||||
|
throw new IllegalArgumentException("bucketCount must be greater than zero");
|
||||||
|
}
|
||||||
|
if (hashSetSize <= 0) {
|
||||||
|
throw new IllegalArgumentException("hashSetSize must be greater than zero");
|
||||||
|
}
|
||||||
this.hashCount = hashCount;
|
this.hashCount = hashCount;
|
||||||
this.bucketCount = bucketCount;
|
this.bucketCount = bucketCount;
|
||||||
this.hashSetSize = hashSetSize;
|
this.hashSetSize = hashSetSize;
|
||||||
|
@ -46,5 +46,9 @@
|
|||||||
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
|
<p>
|
||||||
|
This Java package additionally contains {@code StandardAnalyzer}, {@code StandardTokenizer},
|
||||||
|
and {@code StandardFilter}, which are not visible here, because they moved to Lucene Core.
|
||||||
|
The factories for those components (e.g., used in Solr) are still part of this module.
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -106,7 +106,9 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
|
|||||||
SnowballFilter.class, // this is called SnowballPorterFilterFactory
|
SnowballFilter.class, // this is called SnowballPorterFilterFactory
|
||||||
PatternKeywordMarkerFilter.class,
|
PatternKeywordMarkerFilter.class,
|
||||||
SetKeywordMarkerFilter.class,
|
SetKeywordMarkerFilter.class,
|
||||||
UnicodeWhitespaceTokenizer.class // a supported option via WhitespaceTokenizerFactory
|
UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
|
||||||
|
org.apache.lucene.analysis.StopFilter.class, // class from core, but StopFilterFactory creates one from this module
|
||||||
|
org.apache.lucene.analysis.LowerCaseFilter.class // class from core, but LowerCaseFilterFactory creates one from this module
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,7 +166,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||||||
// also randomly pick it:
|
// also randomly pick it:
|
||||||
ValidatingTokenFilter.class,
|
ValidatingTokenFilter.class,
|
||||||
// TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or similar following will then cause pain)
|
// TODO: needs to be a tokenizer, doesnt handle graph inputs properly (a shingle or similar following will then cause pain)
|
||||||
WordDelimiterFilter.class)) {
|
WordDelimiterFilter.class,
|
||||||
|
// clones of core's filters:
|
||||||
|
org.apache.lucene.analysis.core.StopFilter.class,
|
||||||
|
org.apache.lucene.analysis.core.LowerCaseFilter.class)) {
|
||||||
for (Constructor<?> ctor : c.getConstructors()) {
|
for (Constructor<?> ctor : c.getConstructors()) {
|
||||||
brokenConstructors.put(ctor, ALWAYS);
|
brokenConstructors.put(ctor, ALWAYS);
|
||||||
}
|
}
|
||||||
|
@ -357,7 +357,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||||||
"5.5.1-cfs",
|
"5.5.1-cfs",
|
||||||
"5.5.1-nocfs",
|
"5.5.1-nocfs",
|
||||||
"5.5.2-cfs",
|
"5.5.2-cfs",
|
||||||
"5.5.2-nocfs"
|
"5.5.2-nocfs",
|
||||||
|
"5.5.3-cfs",
|
||||||
|
"5.5.3-nocfs"
|
||||||
};
|
};
|
||||||
|
|
||||||
// TODO: on 6.0.0 release, gen the single segment indices and add here:
|
// TODO: on 6.0.0 release, gen the single segment indices and add here:
|
||||||
|
Binary file not shown.
Binary file not shown.
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.CharacterUtils;
|
|||||||
/**
|
/**
|
||||||
* Normalizes token text to lower case.
|
* Normalizes token text to lower case.
|
||||||
*/
|
*/
|
||||||
public final class LowerCaseFilter extends TokenFilter {
|
public class LowerCaseFilter extends TokenFilter {
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||||||
/**
|
/**
|
||||||
* Removes stop words from a token stream.
|
* Removes stop words from a token stream.
|
||||||
*/
|
*/
|
||||||
public final class StopFilter extends FilteringTokenFilter {
|
public class StopFilter extends FilteringTokenFilter {
|
||||||
|
|
||||||
private final CharArraySet stopWords;
|
private final CharArraySet stopWords;
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
@ -63,7 +63,9 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||||||
/** skipInterval of each level. */
|
/** skipInterval of each level. */
|
||||||
private int skipInterval[];
|
private int skipInterval[];
|
||||||
|
|
||||||
/** Number of docs skipped per level. */
|
/** Number of docs skipped per level.
|
||||||
|
* It's possible for some values to overflow a signed int, but this has been accounted for.
|
||||||
|
*/
|
||||||
private int[] numSkipped;
|
private int[] numSkipped;
|
||||||
|
|
||||||
/** Doc id of current skip entry per level. */
|
/** Doc id of current skip entry per level. */
|
||||||
@ -151,7 +153,8 @@ public abstract class MultiLevelSkipListReader implements Closeable {
|
|||||||
|
|
||||||
numSkipped[level] += skipInterval[level];
|
numSkipped[level] += skipInterval[level];
|
||||||
|
|
||||||
if (numSkipped[level] > docCount) {
|
// numSkipped may overflow a signed int, so compare as unsigned.
|
||||||
|
if (Integer.compareUnsigned(numSkipped[level], docCount) > 0) {
|
||||||
// this skip list is exhausted
|
// this skip list is exhausted
|
||||||
skipDoc[level] = Integer.MAX_VALUE;
|
skipDoc[level] = Integer.MAX_VALUE;
|
||||||
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
if (numberOfSkipLevels > level) numberOfSkipLevels = level;
|
||||||
|
@ -429,12 +429,10 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||||||
private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
|
private static class SegmentInfoAndLevel implements Comparable<SegmentInfoAndLevel> {
|
||||||
SegmentCommitInfo info;
|
SegmentCommitInfo info;
|
||||||
float level;
|
float level;
|
||||||
int index;
|
|
||||||
|
|
||||||
public SegmentInfoAndLevel(SegmentCommitInfo info, float level, int index) {
|
public SegmentInfoAndLevel(SegmentCommitInfo info, float level) {
|
||||||
this.info = info;
|
this.info = info;
|
||||||
this.level = level;
|
this.level = level;
|
||||||
this.index = index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sorts largest to smallest
|
// Sorts largest to smallest
|
||||||
@ -475,7 +473,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
|||||||
size = 1;
|
size = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm, i);
|
final SegmentInfoAndLevel infoLevel = new SegmentInfoAndLevel(info, (float) Math.log(size)/norm);
|
||||||
levels.add(infoLevel);
|
levels.add(infoLevel);
|
||||||
|
|
||||||
if (verbose(writer)) {
|
if (verbose(writer)) {
|
||||||
|
@ -31,7 +31,7 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
|||||||
* though you can explicitly choose classic Levenshtein by passing <code>false</code>
|
* though you can explicitly choose classic Levenshtein by passing <code>false</code>
|
||||||
* to the <code>transpositions</code> parameter.
|
* to the <code>transpositions</code> parameter.
|
||||||
*
|
*
|
||||||
* <p>This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite}
|
* <p>This query uses {@link MultiTermQuery.TopTermsBlendedFreqScoringRewrite}
|
||||||
* as default. So terms will be collected and scored according to their
|
* as default. So terms will be collected and scored according to their
|
||||||
* edit distance. Only the top terms are used for building the {@link BooleanQuery}.
|
* edit distance. Only the top terms are used for building the {@link BooleanQuery}.
|
||||||
* It is not recommended to change the rewrite mode for fuzzy queries.
|
* It is not recommended to change the rewrite mode for fuzzy queries.
|
||||||
|
@ -17,12 +17,7 @@
|
|||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
@ -35,10 +30,12 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.lucene.util.automaton.Automaton;
|
import org.apache.lucene.util.automaton.Automaton;
|
||||||
import org.apache.lucene.util.automaton.ByteRunAutomaton;
|
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
/** Subclass of TermsEnum for enumerating all terms that are similar
|
/** Subclass of TermsEnum for enumerating all terms that are similar
|
||||||
* to the specified filter term.
|
* to the specified filter term.
|
||||||
*
|
*
|
||||||
@ -46,38 +43,46 @@ import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
|||||||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
||||||
* greater than all that precede it.</p>
|
* greater than all that precede it.</p>
|
||||||
*/
|
*/
|
||||||
public class FuzzyTermsEnum extends TermsEnum {
|
public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
|
// NOTE: we can't subclass FilteredTermsEnum here because we need to sometimes change actualEnum:
|
||||||
private TermsEnum actualEnum;
|
private TermsEnum actualEnum;
|
||||||
private BoostAttribute actualBoostAtt;
|
|
||||||
|
|
||||||
private final BoostAttribute boostAtt =
|
// We use this to communicate the score (boost) of the current matched term we are on back to
|
||||||
attributes().addAttribute(BoostAttribute.class);
|
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite that is collecting the best (default 50) matched terms:
|
||||||
|
private final BoostAttribute boostAtt;
|
||||||
|
|
||||||
|
// MultiTermQuery.TopTermsBlendedFreqScoringRewrite tells us the worst boost still in its queue using this att,
|
||||||
|
// which we use to know when we can reduce the automaton from ed=2 to ed=1, or ed=0 if only single top term is collected:
|
||||||
private final MaxNonCompetitiveBoostAttribute maxBoostAtt;
|
private final MaxNonCompetitiveBoostAttribute maxBoostAtt;
|
||||||
|
|
||||||
|
// We use this to share the pre-built (once for the query) Levenshtein automata across segments:
|
||||||
private final LevenshteinAutomataAttribute dfaAtt;
|
private final LevenshteinAutomataAttribute dfaAtt;
|
||||||
|
|
||||||
private float bottom;
|
private float bottom;
|
||||||
private BytesRef bottomTerm;
|
private BytesRef bottomTerm;
|
||||||
|
private final CompiledAutomaton automata[];
|
||||||
|
|
||||||
protected final float minSimilarity;
|
private BytesRef queuedBottom;
|
||||||
protected final float scale_factor;
|
|
||||||
|
|
||||||
protected final int termLength;
|
final int termLength;
|
||||||
|
|
||||||
protected int maxEdits;
|
// Maximum number of edits we will accept. This is either 2 or 1 (or, degenerately, 0) passed by the user originally,
|
||||||
protected final boolean raw;
|
// but as we collect terms, we can lower this (e.g. from 2 to 1) if we detect that the term queue is full, and all
|
||||||
|
// collected terms are ed=1:
|
||||||
|
private int maxEdits;
|
||||||
|
|
||||||
protected final Terms terms;
|
final Terms terms;
|
||||||
private final Term term;
|
final Term term;
|
||||||
protected final int termText[];
|
final int termText[];
|
||||||
protected final int realPrefixLength;
|
final int realPrefixLength;
|
||||||
|
|
||||||
private final boolean transpositions;
|
// True (the default, in FuzzyQuery) if a transposition should count as a single edit:
|
||||||
|
final boolean transpositions;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
||||||
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
|
* length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
|
||||||
* <code>minSimilarity</code>.
|
|
||||||
* <p>
|
* <p>
|
||||||
* After calling the constructor the enumeration is already pointing to the first
|
* After calling the constructor the enumeration is already pointing to the first
|
||||||
* valid term if such a term exists.
|
* valid term if such a term exists.
|
||||||
@ -87,105 +92,88 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||||||
* thats contains information about competitive boosts during rewrite. It is also used
|
* thats contains information about competitive boosts during rewrite. It is also used
|
||||||
* to cache DFAs between segment transitions.
|
* to cache DFAs between segment transitions.
|
||||||
* @param term Pattern term.
|
* @param term Pattern term.
|
||||||
* @param minSimilarity Minimum required similarity for terms from the reader. Pass an integer value
|
* @param maxEdits Maximum edit distance.
|
||||||
* representing edit distance. Passing a fraction is deprecated.
|
|
||||||
* @param prefixLength Length of required common prefix. Default value is 0.
|
* @param prefixLength Length of required common prefix. Default value is 0.
|
||||||
* @throws IOException if there is a low-level IO error
|
* @throws IOException if there is a low-level IO error
|
||||||
*/
|
*/
|
||||||
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
|
public FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
|
||||||
final float minSimilarity, final int prefixLength, boolean transpositions) throws IOException {
|
final int maxEdits, final int prefixLength, boolean transpositions) throws IOException {
|
||||||
if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
|
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||||
throw new IllegalArgumentException("fractional edit distances are not allowed");
|
throw new IllegalArgumentException("max edits must be 0.." + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got: " + maxEdits);
|
||||||
if (minSimilarity < 0.0f)
|
}
|
||||||
throw new IllegalArgumentException("minimumSimilarity cannot be less than 0");
|
if (prefixLength < 0) {
|
||||||
if(prefixLength < 0)
|
|
||||||
throw new IllegalArgumentException("prefixLength cannot be less than 0");
|
throw new IllegalArgumentException("prefixLength cannot be less than 0");
|
||||||
|
}
|
||||||
|
this.maxEdits = maxEdits;
|
||||||
this.terms = terms;
|
this.terms = terms;
|
||||||
this.term = term;
|
this.term = term;
|
||||||
|
|
||||||
// convert the string into a utf32 int[] representation for fast comparisons
|
// convert the string into a utf32 int[] representation for fast comparisons
|
||||||
final String utf16 = term.text();
|
final String utf16 = term.text();
|
||||||
this.termText = new int[utf16.codePointCount(0, utf16.length())];
|
this.termText = new int[utf16.codePointCount(0, utf16.length())];
|
||||||
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp))
|
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
|
||||||
termText[j++] = cp = utf16.codePointAt(i);
|
termText[j++] = cp = utf16.codePointAt(i);
|
||||||
|
}
|
||||||
this.termLength = termText.length;
|
this.termLength = termText.length;
|
||||||
|
|
||||||
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
|
this.dfaAtt = atts.addAttribute(LevenshteinAutomataAttribute.class);
|
||||||
|
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||||
|
|
||||||
|
// NOTE: boostAtt must pulled from attributes() not from atts! This is because TopTermsRewrite looks for boostAtt from this TermsEnum's
|
||||||
|
// private attributes() and not the global atts passed to us from MultiTermQuery:
|
||||||
|
this.boostAtt = attributes().addAttribute(BoostAttribute.class);
|
||||||
|
|
||||||
//The prefix could be longer than the word.
|
//The prefix could be longer than the word.
|
||||||
//It's kind of silly though. It means we must match the entire word.
|
//It's kind of silly though. It means we must match the entire word.
|
||||||
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
|
this.realPrefixLength = prefixLength > termLength ? termLength : prefixLength;
|
||||||
// if minSimilarity >= 1, we treat it as number of edits
|
|
||||||
if (minSimilarity >= 1f) {
|
|
||||||
this.minSimilarity = 0; // just driven by number of edits
|
|
||||||
maxEdits = (int) minSimilarity;
|
|
||||||
raw = true;
|
|
||||||
} else {
|
|
||||||
this.minSimilarity = minSimilarity;
|
|
||||||
// calculate the maximum k edits for this similarity
|
|
||||||
maxEdits = initialMaxDistance(this.minSimilarity, termLength);
|
|
||||||
raw = false;
|
|
||||||
}
|
|
||||||
if (transpositions && maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
|
||||||
throw new UnsupportedOperationException("with transpositions enabled, distances > "
|
|
||||||
+ LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + " are not supported ");
|
|
||||||
}
|
|
||||||
this.transpositions = transpositions;
|
this.transpositions = transpositions;
|
||||||
this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
|
|
||||||
|
|
||||||
this.maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
CompiledAutomaton[] prevAutomata = dfaAtt.automata();
|
||||||
|
if (prevAutomata == null) {
|
||||||
|
prevAutomata = new CompiledAutomaton[maxEdits+1];
|
||||||
|
|
||||||
|
LevenshteinAutomata builder =
|
||||||
|
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
|
||||||
|
|
||||||
|
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
||||||
|
for (int i = 0; i <= maxEdits; i++) {
|
||||||
|
Automaton a = builder.toAutomaton(i, prefix);
|
||||||
|
prevAutomata[i] = new CompiledAutomaton(a, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// first segment computes the automata, and we share with subsequent segments via this Attribute:
|
||||||
|
dfaAtt.setAutomata(prevAutomata);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.automata = prevAutomata;
|
||||||
bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
||||||
bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
||||||
bottomChanged(null, true);
|
bottomChanged(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return an automata-based enum for matching up to editDistance from
|
* return an automata-based enum for matching up to editDistance from
|
||||||
* lastTerm, if possible
|
* lastTerm, if possible
|
||||||
*/
|
*/
|
||||||
protected TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm)
|
private TermsEnum getAutomatonEnum(int editDistance, BytesRef lastTerm) throws IOException {
|
||||||
throws IOException {
|
assert editDistance < automata.length;
|
||||||
final List<CompiledAutomaton> runAutomata = initAutomata(editDistance);
|
final CompiledAutomaton compiled = automata[editDistance];
|
||||||
if (editDistance < runAutomata.size()) {
|
BytesRef initialSeekTerm;
|
||||||
//System.out.println("FuzzyTE.getAEnum: ed=" + editDistance + " lastTerm=" + (lastTerm==null ? "null" : lastTerm.utf8ToString()));
|
if (lastTerm == null) {
|
||||||
final CompiledAutomaton compiled = runAutomata.get(editDistance);
|
// This is the first enum we are pulling:
|
||||||
return new AutomatonFuzzyTermsEnum(terms.intersect(compiled, lastTerm == null ? null : compiled.floor(lastTerm, new BytesRefBuilder())),
|
initialSeekTerm = null;
|
||||||
runAutomata.subList(0, editDistance + 1).toArray(new CompiledAutomaton[editDistance + 1]));
|
|
||||||
} else {
|
} else {
|
||||||
return null;
|
// We are pulling this enum (e.g., ed=1) after iterating for a while already (e.g., ed=2):
|
||||||
|
initialSeekTerm = compiled.floor(lastTerm, new BytesRefBuilder());
|
||||||
}
|
}
|
||||||
}
|
return terms.intersect(compiled, initialSeekTerm);
|
||||||
|
|
||||||
/** initialize levenshtein DFAs up to maxDistance, if possible */
|
|
||||||
private List<CompiledAutomaton> initAutomata(int maxDistance) {
|
|
||||||
final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
|
|
||||||
//System.out.println("cached automata size: " + runAutomata.size());
|
|
||||||
if (runAutomata.size() <= maxDistance &&
|
|
||||||
maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
|
||||||
LevenshteinAutomata builder =
|
|
||||||
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
|
|
||||||
|
|
||||||
String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
|
||||||
for (int i = runAutomata.size(); i <= maxDistance; i++) {
|
|
||||||
Automaton a = builder.toAutomaton(i, prefix);
|
|
||||||
//System.out.println("compute automaton n=" + i);
|
|
||||||
runAutomata.add(new CompiledAutomaton(a, true, false));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return runAutomata;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** swap in a new actual enum to proxy to */
|
|
||||||
protected void setEnum(TermsEnum actualEnum) {
|
|
||||||
this.actualEnum = actualEnum;
|
|
||||||
this.actualBoostAtt = actualEnum.attributes().addAttribute(BoostAttribute.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* fired when the max non-competitive boost has changed. this is the hook to
|
* fired when the max non-competitive boost has changed. this is the hook to
|
||||||
* swap in a smarter actualEnum
|
* swap in a smarter actualEnum.
|
||||||
*/
|
*/
|
||||||
private void bottomChanged(BytesRef lastTerm, boolean init)
|
private void bottomChanged(BytesRef lastTerm) throws IOException {
|
||||||
throws IOException {
|
|
||||||
int oldMaxEdits = maxEdits;
|
int oldMaxEdits = maxEdits;
|
||||||
|
|
||||||
// true if the last term encountered is lexicographically equal or after the bottom term in the PQ
|
// true if the last term encountered is lexicographically equal or after the bottom term in the PQ
|
||||||
@ -193,48 +181,72 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||||||
|
|
||||||
// as long as the max non-competitive boost is >= the max boost
|
// as long as the max non-competitive boost is >= the max boost
|
||||||
// for some edit distance, keep dropping the max edit distance.
|
// for some edit distance, keep dropping the max edit distance.
|
||||||
while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
|
while (maxEdits > 0) {
|
||||||
|
float maxBoost = 1.0f - ((float) maxEdits / (float) termLength);
|
||||||
|
if (bottom < maxBoost || (bottom == maxBoost && termAfter == false)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
maxEdits--;
|
maxEdits--;
|
||||||
|
|
||||||
if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
|
|
||||||
maxEditDistanceChanged(lastTerm, maxEdits, init);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
|
// TODO: this opto could be improved, e.g. if the worst term in the queue is zzzz with ed=2, then, really, on the next segment, we
|
||||||
throws IOException {
|
// should only be looking for ed=1 terms up until zzzz, then ed=2. Tricky :)
|
||||||
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
|
|
||||||
// instead of assert, we do a hard check in case someone uses our enum directly
|
|
||||||
// assert newEnum != null;
|
|
||||||
if (newEnum == null) {
|
|
||||||
assert maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
|
||||||
throw new IllegalArgumentException("maxEdits cannot be > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE");
|
|
||||||
}
|
|
||||||
setEnum(newEnum);
|
|
||||||
}
|
|
||||||
|
|
||||||
// for some raw min similarity and input term length, the maximum # of edits
|
if (oldMaxEdits != maxEdits || lastTerm == null) {
|
||||||
private int initialMaxDistance(float minimumSimilarity, int termLen) {
|
// This is a very powerful optimization: the maximum edit distance has changed. This happens because we collect only the top scoring
|
||||||
return (int) ((1D-minimumSimilarity) * termLen);
|
// N (= 50, by default) terms, and if e.g. maxEdits=2, and the queue is now full of matching terms, and we notice that the worst entry
|
||||||
|
// in that queue is ed=1, then we can switch the automata here to ed=1 which is a big speedup.
|
||||||
|
actualEnum = getAutomatonEnum(maxEdits, lastTerm);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for some number of edits, the maximum possible scaled boost
|
|
||||||
private float calculateMaxBoost(int nEdits) {
|
|
||||||
final float similarity = 1.0f - ((float) nEdits / (float) (termLength));
|
|
||||||
return (similarity - minSimilarity) * scale_factor;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private BytesRef queuedBottom = null;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
|
|
||||||
if (queuedBottom != null) {
|
if (queuedBottom != null) {
|
||||||
bottomChanged(queuedBottom, false);
|
bottomChanged(queuedBottom);
|
||||||
queuedBottom = null;
|
queuedBottom = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
BytesRef term = actualEnum.next();
|
|
||||||
boostAtt.setBoost(actualBoostAtt.getBoost());
|
BytesRef term;
|
||||||
|
|
||||||
|
// while loop because we skip short terms even if they are within the specified edit distance (see the NOTE in FuzzyQuery class javadocs)
|
||||||
|
while (true) {
|
||||||
|
|
||||||
|
term = actualEnum.next();
|
||||||
|
if (term == null) {
|
||||||
|
// end
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
int ed = maxEdits;
|
||||||
|
|
||||||
|
// we know the outer DFA always matches.
|
||||||
|
// now compute exact edit distance
|
||||||
|
while (ed > 0) {
|
||||||
|
if (matches(term, ed - 1)) {
|
||||||
|
ed--;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ed == 0) { // exact match
|
||||||
|
boostAtt.setBoost(1.0F);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||||
|
int minTermLength = Math.min(codePointCount, termLength);
|
||||||
|
|
||||||
|
// only accept a matching term if it's longer than the edit distance:
|
||||||
|
if (minTermLength > ed) {
|
||||||
|
float similarity = 1.0f - (float) ed / (float) minTermLength;
|
||||||
|
boostAtt.setBoost(similarity);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
|
||||||
final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
|
||||||
@ -243,12 +255,19 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||||||
this.bottomTerm = bottomTerm;
|
this.bottomTerm = bottomTerm;
|
||||||
// clone the term before potentially doing something with it
|
// clone the term before potentially doing something with it
|
||||||
// this is a rare but wonderful occurrence anyway
|
// this is a rare but wonderful occurrence anyway
|
||||||
|
|
||||||
|
// We must delay bottomChanged until the next next() call otherwise we mess up docFreq(), etc., for the current term:
|
||||||
queuedBottom = BytesRef.deepCopyOf(term);
|
queuedBottom = BytesRef.deepCopyOf(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
return term;
|
return term;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns true if term is within k edits of the query term */
|
||||||
|
private boolean matches(BytesRef termIn, int k) {
|
||||||
|
return k == 0 ? termIn.equals(term.bytes()) : automata[k].runAutomaton.run(termIn.bytes, termIn.offset, termIn.length);
|
||||||
|
}
|
||||||
|
|
||||||
// proxy all other enum calls to the actual enum
|
// proxy all other enum calls to the actual enum
|
||||||
@Override
|
@Override
|
||||||
public int docFreq() throws IOException {
|
public int docFreq() throws IOException {
|
||||||
@ -300,110 +319,44 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||||||
return actualEnum.term();
|
return actualEnum.term();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Implement fuzzy enumeration with Terms.intersect.
|
|
||||||
* <p>
|
|
||||||
* This is the fastest method as opposed to LinearFuzzyTermsEnum:
|
|
||||||
* as enumeration is logarithmic to the number of terms (instead of linear)
|
|
||||||
* and comparison is linear to length of the term (rather than quadratic)
|
|
||||||
*/
|
|
||||||
private class AutomatonFuzzyTermsEnum extends FilteredTermsEnum {
|
|
||||||
private final ByteRunAutomaton matchers[];
|
|
||||||
|
|
||||||
private final BytesRef termRef;
|
|
||||||
|
|
||||||
private final BoostAttribute boostAtt =
|
|
||||||
attributes().addAttribute(BoostAttribute.class);
|
|
||||||
|
|
||||||
public AutomatonFuzzyTermsEnum(TermsEnum tenum, CompiledAutomaton compiled[]) {
|
|
||||||
super(tenum, false);
|
|
||||||
this.matchers = new ByteRunAutomaton[compiled.length];
|
|
||||||
for (int i = 0; i < compiled.length; i++)
|
|
||||||
this.matchers[i] = compiled[i].runAutomaton;
|
|
||||||
termRef = new BytesRef(term.text());
|
|
||||||
}
|
|
||||||
|
|
||||||
/** finds the smallest Lev(n) DFA that accepts the term. */
|
|
||||||
@Override
|
|
||||||
protected AcceptStatus accept(BytesRef term) {
|
|
||||||
//System.out.println("AFTE.accept term=" + term);
|
|
||||||
int ed = matchers.length - 1;
|
|
||||||
|
|
||||||
// we are wrapping either an intersect() TermsEnum or an AutomatonTermsENum,
|
|
||||||
// so we know the outer DFA always matches.
|
|
||||||
// now compute exact edit distance
|
|
||||||
while (ed > 0) {
|
|
||||||
if (matches(term, ed - 1)) {
|
|
||||||
ed--;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//System.out.println("CHECK term=" + term.utf8ToString() + " ed=" + ed);
|
|
||||||
|
|
||||||
// scale to a boost and return (if similarity > minSimilarity)
|
|
||||||
if (ed == 0) { // exact match
|
|
||||||
boostAtt.setBoost(1.0F);
|
|
||||||
//System.out.println(" yes");
|
|
||||||
return AcceptStatus.YES;
|
|
||||||
} else {
|
|
||||||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
|
||||||
final float similarity = 1.0f - ((float) ed / (float)
|
|
||||||
(Math.min(codePointCount, termLength)));
|
|
||||||
if (similarity > minSimilarity) {
|
|
||||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
|
||||||
//System.out.println(" yes");
|
|
||||||
return AcceptStatus.YES;
|
|
||||||
} else {
|
|
||||||
return AcceptStatus.NO;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** returns true if term is within k edits of the query term */
|
|
||||||
final boolean matches(BytesRef term, int k) {
|
|
||||||
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @lucene.internal */
|
|
||||||
public float getMinSimilarity() {
|
|
||||||
return minSimilarity;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @lucene.internal */
|
|
||||||
public float getScaleFactor() {
|
|
||||||
return scale_factor;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* reuses compiled automata across different segments,
|
* reuses compiled automata across different segments,
|
||||||
* because they are independent of the index
|
* because they are independent of the index
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
public static interface LevenshteinAutomataAttribute extends Attribute {
|
public static interface LevenshteinAutomataAttribute extends Attribute {
|
||||||
public List<CompiledAutomaton> automata();
|
public CompiledAutomaton[] automata();
|
||||||
|
public void setAutomata(CompiledAutomaton[] automata);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores compiled automata as a list (indexed by edit distance)
|
* Stores compiled automata as a list (indexed by edit distance)
|
||||||
* @lucene.internal */
|
* @lucene.internal */
|
||||||
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
|
||||||
private final List<CompiledAutomaton> automata = new ArrayList<>();
|
private CompiledAutomaton[] automata;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<CompiledAutomaton> automata() {
|
public CompiledAutomaton[] automata() {
|
||||||
return automata;
|
return automata;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setAutomata(CompiledAutomaton[] automata) {
|
||||||
|
this.automata = automata;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void clear() {
|
public void clear() {
|
||||||
automata.clear();
|
automata = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
|
if (automata == null) {
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
return automata.hashCode();
|
return automata.hashCode();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object other) {
|
public boolean equals(Object other) {
|
||||||
@ -411,15 +364,17 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||||||
return true;
|
return true;
|
||||||
if (!(other instanceof LevenshteinAutomataAttributeImpl))
|
if (!(other instanceof LevenshteinAutomataAttributeImpl))
|
||||||
return false;
|
return false;
|
||||||
return automata.equals(((LevenshteinAutomataAttributeImpl) other).automata);
|
return Arrays.equals(automata, ((LevenshteinAutomataAttributeImpl) other).automata);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void copyTo(AttributeImpl target) {
|
public void copyTo(AttributeImpl _target) {
|
||||||
final List<CompiledAutomaton> targetAutomata =
|
LevenshteinAutomataAttribute target = (LevenshteinAutomataAttribute) _target;
|
||||||
((LevenshteinAutomataAttribute) target).automata();
|
if (automata == null) {
|
||||||
targetAutomata.clear();
|
target.setAutomata(null);
|
||||||
targetAutomata.addAll(automata);
|
} else {
|
||||||
|
target.setAutomata(automata);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
135
lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java
Normal file
135
lucene/core/src/test/org/apache/lucene/index/Test2BDocs.java
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.Monster;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.lucene.util.TimeUnits;
|
||||||
|
|
||||||
|
@SuppressCodecs({"SimpleText", "Memory", "Direct"})
|
||||||
|
@TimeoutSuite(millis = 80 * TimeUnits.HOUR) // effectively no limit
|
||||||
|
@Monster("Takes ~30min")
|
||||||
|
@SuppressSysoutChecks(bugUrl = "Stuff gets printed")
|
||||||
|
public class Test2BDocs extends LuceneTestCase {
|
||||||
|
|
||||||
|
// indexes Integer.MAX_VALUE docs with indexed field(s)
|
||||||
|
public void test2BDocs() throws Exception {
|
||||||
|
BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BDocs"));
|
||||||
|
if (dir instanceof MockDirectoryWrapper) {
|
||||||
|
((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexWriter w = new IndexWriter(dir,
|
||||||
|
new IndexWriterConfig(new MockAnalyzer(random()))
|
||||||
|
.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
|
||||||
|
.setRAMBufferSizeMB(256.0)
|
||||||
|
.setMergeScheduler(new ConcurrentMergeScheduler())
|
||||||
|
.setMergePolicy(newLogMergePolicy(false, 10))
|
||||||
|
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
|
||||||
|
.setCodec(TestUtil.getDefaultCodec()));
|
||||||
|
|
||||||
|
Document doc = new Document();
|
||||||
|
Field field = new Field("f1", "a", StringField.TYPE_NOT_STORED);
|
||||||
|
doc.add(field);
|
||||||
|
|
||||||
|
for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
|
||||||
|
w.addDocument(doc);
|
||||||
|
if (i % (10*1000*1000) == 0) {
|
||||||
|
System.out.println("indexed: " + i);
|
||||||
|
System.out.flush();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.forceMerge(1);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
System.out.println("verifying...");
|
||||||
|
System.out.flush();
|
||||||
|
|
||||||
|
DirectoryReader r = DirectoryReader.open(dir);
|
||||||
|
|
||||||
|
BytesRef term = new BytesRef(1);
|
||||||
|
term.bytes[0] = (byte)'a';
|
||||||
|
term.length = 1;
|
||||||
|
|
||||||
|
long skips = 0;
|
||||||
|
|
||||||
|
Random rnd = random();
|
||||||
|
|
||||||
|
long start = System.nanoTime();
|
||||||
|
|
||||||
|
for (LeafReaderContext context : r.leaves()) {
|
||||||
|
LeafReader reader = context.reader();
|
||||||
|
int lim = context.reader().maxDoc();
|
||||||
|
|
||||||
|
Terms terms = reader.fields().terms("f1");
|
||||||
|
for (int i=0; i<10000; i++) {
|
||||||
|
TermsEnum te = terms.iterator();
|
||||||
|
assertTrue( te.seekExact(term) );
|
||||||
|
PostingsEnum docs = te.postings(null);
|
||||||
|
|
||||||
|
// skip randomly through the term
|
||||||
|
for (int target = -1;;)
|
||||||
|
{
|
||||||
|
int maxSkipSize = lim - target + 1;
|
||||||
|
// do a smaller skip half of the time
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
maxSkipSize = Math.min(256, maxSkipSize);
|
||||||
|
}
|
||||||
|
int newTarget = target + rnd.nextInt(maxSkipSize) + 1;
|
||||||
|
if (newTarget >= lim) {
|
||||||
|
if (target+1 >= lim) break; // we already skipped to end, so break.
|
||||||
|
newTarget = lim-1; // skip to end
|
||||||
|
}
|
||||||
|
target = newTarget;
|
||||||
|
|
||||||
|
int res = docs.advance(target);
|
||||||
|
if (res == PostingsEnum.NO_MORE_DOCS) break;
|
||||||
|
|
||||||
|
assertTrue( res >= target );
|
||||||
|
|
||||||
|
skips++;
|
||||||
|
target = res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
|
||||||
|
long end = System.nanoTime();
|
||||||
|
|
||||||
|
System.out.println("Skip count=" + skips + " seconds=" + TimeUnit.NANOSECONDS.toSeconds(end-start));
|
||||||
|
assert skips > 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -18,11 +18,13 @@ package org.apache.lucene.search;
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.codecs.Codec;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
@ -36,7 +38,6 @@ import org.apache.lucene.index.Term;
|
|||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
@ -66,11 +67,26 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||||||
private static Directory dir2;
|
private static Directory dir2;
|
||||||
private static int mulFactor;
|
private static int mulFactor;
|
||||||
|
|
||||||
|
private static Directory copyOf(Directory dir) throws IOException {
|
||||||
|
Directory copy = newFSDirectory(createTempDir());
|
||||||
|
for(String name : dir.listAll()) {
|
||||||
|
if (name.startsWith("extra")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
copy.copyFrom(dir, name, name, IOContext.DEFAULT);
|
||||||
|
copy.sync(Collections.singleton(name));
|
||||||
|
}
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClass() throws Exception {
|
public static void beforeClass() throws Exception {
|
||||||
// in some runs, test immediate adjacency of matches - in others, force a full bucket gap between docs
|
// in some runs, test immediate adjacency of matches - in others, force a full bucket gap between docs
|
||||||
NUM_FILLER_DOCS = random().nextBoolean() ? 0 : BooleanScorer.SIZE;
|
NUM_FILLER_DOCS = random().nextBoolean() ? 0 : BooleanScorer.SIZE;
|
||||||
PRE_FILLER_DOCS = TestUtil.nextInt(random(), 0, (NUM_FILLER_DOCS / 2));
|
PRE_FILLER_DOCS = TestUtil.nextInt(random(), 0, (NUM_FILLER_DOCS / 2));
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: NUM_FILLER_DOCS=" + NUM_FILLER_DOCS + " PRE_FILLER_DOCS=" + PRE_FILLER_DOCS);
|
||||||
|
}
|
||||||
|
|
||||||
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
|
if (NUM_FILLER_DOCS * PRE_FILLER_DOCS > 100000) {
|
||||||
directory = newFSDirectory(createTempDir());
|
directory = newFSDirectory(createTempDir());
|
||||||
@ -78,7 +94,11 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||||||
directory = newDirectory();
|
directory = newDirectory();
|
||||||
}
|
}
|
||||||
|
|
||||||
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
|
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
// randomized codecs are sometimes too costly for this test:
|
||||||
|
iwc.setCodec(Codec.forName("Lucene62"));
|
||||||
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
|
RandomIndexWriter writer= new RandomIndexWriter(random(), directory, iwc);
|
||||||
// we'll make a ton of docs, disable store/norms/vectors
|
// we'll make a ton of docs, disable store/norms/vectors
|
||||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
ft.setOmitNorms(true);
|
ft.setOmitNorms(true);
|
||||||
@ -118,8 +138,10 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||||||
singleSegmentDirectory.sync(Collections.singleton(fileName));
|
singleSegmentDirectory.sync(Collections.singleton(fileName));
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
// we need docID order to be preserved:
|
// we need docID order to be preserved:
|
||||||
|
// randomized codecs are sometimes too costly for this test:
|
||||||
|
iwc.setCodec(Codec.forName("Lucene62"));
|
||||||
iwc.setMergePolicy(newLogMergePolicy());
|
iwc.setMergePolicy(newLogMergePolicy());
|
||||||
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
|
try (IndexWriter w = new IndexWriter(singleSegmentDirectory, iwc)) {
|
||||||
w.forceMerge(1, true);
|
w.forceMerge(1, true);
|
||||||
@ -129,7 +151,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||||||
singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true));
|
singleSegmentSearcher.setSimilarity(searcher.getSimilarity(true));
|
||||||
|
|
||||||
// Make big index
|
// Make big index
|
||||||
dir2 = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(directory));
|
dir2 = copyOf(directory);
|
||||||
|
|
||||||
// First multiply small test index:
|
// First multiply small test index:
|
||||||
mulFactor = 1;
|
mulFactor = 1;
|
||||||
@ -141,9 +163,14 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("\nTEST: cycle...");
|
System.out.println("\nTEST: cycle...");
|
||||||
}
|
}
|
||||||
final Directory copy = new MockDirectoryWrapper(random(), TestUtil.ramCopyOf(dir2));
|
final Directory copy = copyOf(dir2);
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir2);
|
|
||||||
|
iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
// randomized codecs are sometimes too costly for this test:
|
||||||
|
iwc.setCodec(Codec.forName("Lucene62"));
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir2, iwc);
|
||||||
w.addIndexes(copy);
|
w.addIndexes(copy);
|
||||||
|
copy.close();
|
||||||
docCount = w.maxDoc();
|
docCount = w.maxDoc();
|
||||||
w.close();
|
w.close();
|
||||||
mulFactor *= 2;
|
mulFactor *= 2;
|
||||||
|
@ -18,13 +18,19 @@ package org.apache.lucene.search;
|
|||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiReader;
|
import org.apache.lucene.index.MultiReader;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
@ -32,7 +38,10 @@ import org.apache.lucene.index.Term;
|
|||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.apache.lucene.util.IntsRef;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -489,4 +498,210 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||||||
doc.add(newTextField("field", text, Field.Store.YES));
|
doc.add(newTextField("field", text, Field.Store.YES));
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String randomSimpleString(int digits) {
|
||||||
|
int termLength = TestUtil.nextInt(random(), 1, 8);
|
||||||
|
char[] chars = new char[termLength];
|
||||||
|
for(int i=0;i<termLength;i++) {
|
||||||
|
chars[i] = (char) ('a' + random().nextInt(digits));
|
||||||
|
}
|
||||||
|
return new String(chars);
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings({"unchecked","rawtypes"})
|
||||||
|
public void testRandom() throws Exception {
|
||||||
|
int numTerms = atLeast(100);
|
||||||
|
int digits = TestUtil.nextInt(random(), 2, 3);
|
||||||
|
Set<String> terms = new HashSet<>();
|
||||||
|
while (terms.size() < numTerms) {
|
||||||
|
terms.add(randomSimpleString(digits));
|
||||||
|
}
|
||||||
|
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
for(String term : terms) {
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("field", term, Field.Store.YES));
|
||||||
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
|
DirectoryReader r = w.getReader();
|
||||||
|
//System.out.println("TEST: reader=" + r);
|
||||||
|
IndexSearcher s = newSearcher(r);
|
||||||
|
int iters = atLeast(1000);
|
||||||
|
for(int iter=0;iter<iters;iter++) {
|
||||||
|
String queryTerm = randomSimpleString(digits);
|
||||||
|
int prefixLength = random().nextInt(queryTerm.length());
|
||||||
|
String queryPrefix = queryTerm.substring(0, prefixLength);
|
||||||
|
|
||||||
|
// we don't look at scores here:
|
||||||
|
List<TermAndScore>[] expected = new List[3];
|
||||||
|
for(int ed=0;ed<3;ed++) {
|
||||||
|
expected[ed] = new ArrayList<TermAndScore>();
|
||||||
|
}
|
||||||
|
for(String term : terms) {
|
||||||
|
if (term.startsWith(queryPrefix) == false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int ed = getDistance(term, queryTerm);
|
||||||
|
if (Math.min(queryTerm.length(), term.length()) > ed) {
|
||||||
|
float score = 1f - (float) ed / (float) Math.min(queryTerm.length(), term.length());
|
||||||
|
while (ed < 3) {
|
||||||
|
expected[ed].add(new TermAndScore(term, score));
|
||||||
|
ed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int ed=0;ed<3;ed++) {
|
||||||
|
Collections.sort(expected[ed]);
|
||||||
|
int queueSize = TestUtil.nextInt(random(), 1, terms.size());
|
||||||
|
/*
|
||||||
|
System.out.println("\nTEST: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength);
|
||||||
|
for(TermAndScore ent : expected[ed]) {
|
||||||
|
System.out.println(" " + ent);
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
FuzzyQuery query = new FuzzyQuery(new Term("field", queryTerm), ed, prefixLength, queueSize, true);
|
||||||
|
TopDocs hits = s.search(query, terms.size());
|
||||||
|
Set<String> actual = new HashSet<>();
|
||||||
|
for(ScoreDoc hit : hits.scoreDocs) {
|
||||||
|
Document doc = s.doc(hit.doc);
|
||||||
|
actual.add(doc.get("field"));
|
||||||
|
//System.out.println(" actual: " + doc.get("field") + " score=" + hit.score);
|
||||||
|
}
|
||||||
|
Set<String> expectedTop = new HashSet<>();
|
||||||
|
int limit = Math.min(queueSize, expected[ed].size());
|
||||||
|
for(int i=0;i<limit;i++) {
|
||||||
|
expectedTop.add(expected[ed].get(i).term);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (actual.equals(expectedTop) == false) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("FAILED: query=" + queryTerm + " ed=" + ed + " queueSize=" + queueSize + " vs expected match size=" + expected[ed].size() + " prefixLength=" + prefixLength + "\n");
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for(String term : actual) {
|
||||||
|
if (expectedTop.contains(term) == false) {
|
||||||
|
if (first) {
|
||||||
|
sb.append(" these matched but shouldn't:\n");
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
sb.append(" " + term + "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
first = true;
|
||||||
|
for(String term : expectedTop) {
|
||||||
|
if (actual.contains(term) == false) {
|
||||||
|
if (first) {
|
||||||
|
sb.append(" these did not match but should:\n");
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
sb.append(" " + term + "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new AssertionError(sb.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IOUtils.close(r, w, dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TermAndScore implements Comparable<TermAndScore> {
|
||||||
|
final String term;
|
||||||
|
final float score;
|
||||||
|
|
||||||
|
public TermAndScore(String term, float score) {
|
||||||
|
this.term = term;
|
||||||
|
this.score = score;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(TermAndScore other) {
|
||||||
|
// higher score sorts first, and if scores are tied, lower term sorts first
|
||||||
|
if (score > other.score) {
|
||||||
|
return -1;
|
||||||
|
} else if (score < other.score) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return term.compareTo(other.term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return term + " score=" + score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Poached from LuceneLevenshteinDistance.java (from suggest module): it supports transpositions (treats them as ed=1, not ed=2)
|
||||||
|
private static int getDistance(String target, String other) {
|
||||||
|
IntsRef targetPoints;
|
||||||
|
IntsRef otherPoints;
|
||||||
|
int n;
|
||||||
|
int d[][]; // cost array
|
||||||
|
|
||||||
|
// NOTE: if we cared, we could 3*m space instead of m*n space, similar to
|
||||||
|
// what LevenshteinDistance does, except cycling thru a ring of three
|
||||||
|
// horizontal cost arrays... but this comparator is never actually used by
|
||||||
|
// DirectSpellChecker, it's only used for merging results from multiple shards
|
||||||
|
// in "distributed spellcheck", and it's inefficient in other ways too...
|
||||||
|
|
||||||
|
// cheaper to do this up front once
|
||||||
|
targetPoints = toIntsRef(target);
|
||||||
|
otherPoints = toIntsRef(other);
|
||||||
|
n = targetPoints.length;
|
||||||
|
final int m = otherPoints.length;
|
||||||
|
d = new int[n+1][m+1];
|
||||||
|
|
||||||
|
if (n == 0 || m == 0) {
|
||||||
|
if (n == m) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return Math.max(n, m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// indexes into strings s and t
|
||||||
|
int i; // iterates through s
|
||||||
|
int j; // iterates through t
|
||||||
|
|
||||||
|
int t_j; // jth character of t
|
||||||
|
|
||||||
|
int cost; // cost
|
||||||
|
|
||||||
|
for (i = 0; i<=n; i++) {
|
||||||
|
d[i][0] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j<=m; j++) {
|
||||||
|
d[0][j] = j;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 1; j<=m; j++) {
|
||||||
|
t_j = otherPoints.ints[j-1];
|
||||||
|
|
||||||
|
for (i=1; i<=n; i++) {
|
||||||
|
cost = targetPoints.ints[i-1]==t_j ? 0 : 1;
|
||||||
|
// minimum of cell to the left+1, to the top+1, diagonally left and up +cost
|
||||||
|
d[i][j] = Math.min(Math.min(d[i-1][j]+1, d[i][j-1]+1), d[i-1][j-1]+cost);
|
||||||
|
// transposition
|
||||||
|
if (i > 1 && j > 1 && targetPoints.ints[i-1] == otherPoints.ints[j-2] && targetPoints.ints[i-2] == otherPoints.ints[j-1]) {
|
||||||
|
d[i][j] = Math.min(d[i][j], d[i-2][j-2] + cost);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return d[n][m];
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IntsRef toIntsRef(String s) {
|
||||||
|
IntsRef ref = new IntsRef(s.length()); // worst case
|
||||||
|
int utf16Len = s.length();
|
||||||
|
for (int i = 0, cp = 0; i < utf16Len; i += Character.charCount(cp)) {
|
||||||
|
cp = ref.ints[ref.length++] = Character.codePointAt(s, i);
|
||||||
|
}
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -539,7 +539,9 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
|
|||||||
public void testConcurrentIndexCloseSearchAndRefresh() throws Exception {
|
public void testConcurrentIndexCloseSearchAndRefresh() throws Exception {
|
||||||
final Directory dir = newFSDirectory(createTempDir());
|
final Directory dir = newFSDirectory(createTempDir());
|
||||||
AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
|
AtomicReference<IndexWriter> writerRef = new AtomicReference<>();
|
||||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
|
final MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||||
|
analyzer.setMaxTokenLength(IndexWriter.MAX_TERM_LENGTH);
|
||||||
|
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
|
||||||
|
|
||||||
AtomicReference<SearcherManager> mgrRef = new AtomicReference<>();
|
AtomicReference<SearcherManager> mgrRef = new AtomicReference<>();
|
||||||
mgrRef.set(new SearcherManager(writerRef.get(), null));
|
mgrRef.set(new SearcherManager(writerRef.get(), null));
|
||||||
@ -561,7 +563,7 @@ public class TestSearcherManager extends ThreadedIndexingAndSearchingTestCase {
|
|||||||
} else {
|
} else {
|
||||||
w.rollback();
|
w.rollback();
|
||||||
}
|
}
|
||||||
writerRef.set(new IndexWriter(dir, newIndexWriterConfig()));
|
writerRef.set(new IndexWriter(dir, newIndexWriterConfig(analyzer)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
docs.close();
|
docs.close();
|
||||||
|
@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
|
|||||||
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
Term[] phraseQueryTerms = phraseQuery.getTerms();
|
||||||
if (phraseQueryTerms.length == 1) {
|
if (phraseQueryTerms.length == 1) {
|
||||||
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
|
||||||
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
for (int i = 0; i < phraseQueryTerms.length; i++) {
|
||||||
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
|
||||||
@ -153,8 +152,8 @@ public class WeightedSpanTermExtractor {
|
|||||||
// this query is TermContext sensitive.
|
// this query is TermContext sensitive.
|
||||||
extractWeightedTerms(terms, query, boost);
|
extractWeightedTerms(terms, query, boost);
|
||||||
} else if (query instanceof DisjunctionMaxQuery) {
|
} else if (query instanceof DisjunctionMaxQuery) {
|
||||||
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
for (Query clause : ((DisjunctionMaxQuery) query)) {
|
||||||
extract(iterator.next(), boost, terms);
|
extract(clause, boost, terms);
|
||||||
}
|
}
|
||||||
} else if (query instanceof ToParentBlockJoinQuery) {
|
} else if (query instanceof ToParentBlockJoinQuery) {
|
||||||
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
|
extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
|
||||||
@ -184,16 +183,15 @@ public class WeightedSpanTermExtractor {
|
|||||||
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
|
disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
|
||||||
++distinctPositions;
|
++distinctPositions;
|
||||||
}
|
}
|
||||||
for (int j = 0; j < termArray.length; ++j) {
|
for (Term aTermArray : termArray) {
|
||||||
disjuncts.add(new SpanTermQuery(termArray[j]));
|
disjuncts.add(new SpanTermQuery(aTermArray));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int positionGaps = 0;
|
int positionGaps = 0;
|
||||||
int position = 0;
|
int position = 0;
|
||||||
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
||||||
for (int i = 0; i < disjunctLists.length; ++i) {
|
for (List<SpanQuery> disjuncts : disjunctLists) {
|
||||||
List<SpanQuery> disjuncts = disjunctLists[i];
|
|
||||||
if (disjuncts != null) {
|
if (disjuncts != null) {
|
||||||
clauses[position++] = new SpanOrQuery(disjuncts
|
clauses[position++] = new SpanOrQuery(disjuncts
|
||||||
.toArray(new SpanQuery[disjuncts.size()]));
|
.toArray(new SpanQuery[disjuncts.size()]));
|
||||||
@ -202,12 +200,16 @@ public class WeightedSpanTermExtractor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (clauses.length == 1) {
|
||||||
|
extractWeightedSpanTerms(terms, clauses[0], boost);
|
||||||
|
} else {
|
||||||
final int slop = mpq.getSlop();
|
final int slop = mpq.getSlop();
|
||||||
final boolean inorder = (slop == 0);
|
final boolean inorder = (slop == 0);
|
||||||
|
|
||||||
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
|
||||||
extractWeightedSpanTerms(terms, sp, boost);
|
extractWeightedSpanTerms(terms, sp, boost);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
} else if (query instanceof MatchAllDocsQuery) {
|
} else if (query instanceof MatchAllDocsQuery) {
|
||||||
//nothing
|
//nothing
|
||||||
} else if (query instanceof CustomScoreQuery){
|
} else if (query instanceof CustomScoreQuery){
|
||||||
|
@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||||
import org.apache.lucene.util.automaton.RegExp;
|
import org.apache.lucene.util.automaton.RegExp;
|
||||||
import org.junit.Test;
|
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
|
|
||||||
@ -1580,30 +1579,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||||||
helper.start();
|
helper.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
final String fieldName = "substring";
|
||||||
|
|
||||||
|
final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
|
||||||
|
|
||||||
|
assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
|
||||||
|
final String fieldName = "substring";
|
||||||
|
|
||||||
|
final MultiPhraseQuery mpq = new MultiPhraseQuery.Builder()
|
||||||
|
.add(new Term(fieldName, "uchu")).build();
|
||||||
|
|
||||||
|
assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
|
||||||
|
throws IOException, InvalidTokenOffsetsException {
|
||||||
final Analyzer analyzer = new Analyzer() {
|
final Analyzer analyzer = new Analyzer() {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
return new TokenStreamComponents(new NGramTokenizer(4, 4));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
final String fieldName = "substring";
|
|
||||||
|
|
||||||
final List<BytesRef> list = new ArrayList<>();
|
|
||||||
list.add(new BytesRef("uchu"));
|
|
||||||
final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
|
|
||||||
|
|
||||||
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
|
||||||
final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
|
|
||||||
|
|
||||||
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
|
||||||
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
highlighter.setTextFragmenter(new SimpleFragmenter(100));
|
||||||
final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
|
final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
|
||||||
|
|
||||||
assertEquals("B<b>uchu</b>ng",fragment);
|
|
||||||
|
|
||||||
|
assertEquals(expected, fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testUnRewrittenQuery() throws Exception {
|
public void testUnRewrittenQuery() throws Exception {
|
||||||
|
@ -21,8 +21,8 @@ import org.apache.lucene.queryparser.xml.DOMUtils;
|
|||||||
import org.apache.lucene.queryparser.xml.ParserException;
|
import org.apache.lucene.queryparser.xml.ParserException;
|
||||||
import org.apache.lucene.queryparser.xml.QueryBuilder;
|
import org.apache.lucene.queryparser.xml.QueryBuilder;
|
||||||
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
|
import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery;
|
||||||
import org.apache.lucene.sandbox.queries.SlowFuzzyQuery;
|
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.w3c.dom.Element;
|
import org.w3c.dom.Element;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
@ -33,7 +33,7 @@ import org.w3c.dom.NodeList;
|
|||||||
public class FuzzyLikeThisQueryBuilder implements QueryBuilder {
|
public class FuzzyLikeThisQueryBuilder implements QueryBuilder {
|
||||||
|
|
||||||
private static final int DEFAULT_MAX_NUM_TERMS = 50;
|
private static final int DEFAULT_MAX_NUM_TERMS = 50;
|
||||||
private static final float DEFAULT_MIN_SIMILARITY = SlowFuzzyQuery.defaultMinSimilarity;
|
private static final float DEFAULT_MIN_SIMILARITY = FuzzyQuery.defaultMinSimilarity;
|
||||||
private static final int DEFAULT_PREFIX_LENGTH = 1;
|
private static final int DEFAULT_PREFIX_LENGTH = 1;
|
||||||
private static final boolean DEFAULT_IGNORE_TF = false;
|
private static final boolean DEFAULT_IGNORE_TF = false;
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||||||
import org.apache.lucene.search.BoostAttribute;
|
import org.apache.lucene.search.BoostAttribute;
|
||||||
import org.apache.lucene.search.BoostQuery;
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyTermsEnum;
|
||||||
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
import org.apache.lucene.search.MaxNonCompetitiveBoostAttribute;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
@ -46,6 +47,7 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
|||||||
import org.apache.lucene.util.AttributeSource;
|
import org.apache.lucene.util.AttributeSource;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms.
|
* Fuzzifies ALL terms provided as strings and then picks the best n differentiating terms.
|
||||||
@ -114,12 +116,12 @@ public class FuzzyLikeThisQuery extends Query
|
|||||||
{
|
{
|
||||||
String queryString;
|
String queryString;
|
||||||
String fieldName;
|
String fieldName;
|
||||||
float minSimilarity;
|
int maxEdits;
|
||||||
int prefixLength;
|
int prefixLength;
|
||||||
public FieldVals(String name, float similarity, int length, String queryString)
|
public FieldVals(String name, int maxEdits, int length, String queryString)
|
||||||
{
|
{
|
||||||
fieldName = name;
|
fieldName = name;
|
||||||
minSimilarity = similarity;
|
this.maxEdits = maxEdits;
|
||||||
prefixLength = length;
|
prefixLength = length;
|
||||||
this.queryString = queryString;
|
this.queryString = queryString;
|
||||||
}
|
}
|
||||||
@ -130,7 +132,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||||||
int result = 1;
|
int result = 1;
|
||||||
result = prime * result
|
result = prime * result
|
||||||
+ ((fieldName == null) ? 0 : fieldName.hashCode());
|
+ ((fieldName == null) ? 0 : fieldName.hashCode());
|
||||||
result = prime * result + Float.floatToIntBits(minSimilarity);
|
result = prime * result + maxEdits;
|
||||||
result = prime * result + prefixLength;
|
result = prime * result + prefixLength;
|
||||||
result = prime * result
|
result = prime * result
|
||||||
+ ((queryString == null) ? 0 : queryString.hashCode());
|
+ ((queryString == null) ? 0 : queryString.hashCode());
|
||||||
@ -151,9 +153,9 @@ public class FuzzyLikeThisQuery extends Query
|
|||||||
return false;
|
return false;
|
||||||
} else if (!fieldName.equals(other.fieldName))
|
} else if (!fieldName.equals(other.fieldName))
|
||||||
return false;
|
return false;
|
||||||
if (Float.floatToIntBits(minSimilarity) != Float
|
if (maxEdits != other.maxEdits) {
|
||||||
.floatToIntBits(other.minSimilarity))
|
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
if (prefixLength != other.prefixLength)
|
if (prefixLength != other.prefixLength)
|
||||||
return false;
|
return false;
|
||||||
if (queryString == null) {
|
if (queryString == null) {
|
||||||
@ -171,12 +173,16 @@ public class FuzzyLikeThisQuery extends Query
|
|||||||
/**
|
/**
|
||||||
* Adds user input for "fuzzification"
|
* Adds user input for "fuzzification"
|
||||||
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
|
* @param queryString The string which will be parsed by the analyzer and for which fuzzy variants will be parsed
|
||||||
* @param minSimilarity The minimum similarity of the term variants (see FuzzyTermsEnum)
|
* @param minSimilarity The minimum similarity of the term variants; must be 0, 1 or 2 (see FuzzyTermsEnum)
|
||||||
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
|
* @param prefixLength Length of required common prefix on variant terms (see FuzzyTermsEnum)
|
||||||
*/
|
*/
|
||||||
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
|
public void addTerms(String queryString, String fieldName,float minSimilarity, int prefixLength)
|
||||||
{
|
{
|
||||||
fieldVals.add(new FieldVals(fieldName,minSimilarity,prefixLength,queryString));
|
int maxEdits = (int) minSimilarity;
|
||||||
|
if (maxEdits != minSimilarity || maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||||
|
throw new IllegalArgumentException("minSimilarity must integer value between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + ", inclusive; got " + minSimilarity);
|
||||||
|
}
|
||||||
|
fieldVals.add(new FieldVals(fieldName,maxEdits,prefixLength,queryString));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -202,7 +208,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||||||
AttributeSource atts = new AttributeSource();
|
AttributeSource atts = new AttributeSource();
|
||||||
MaxNonCompetitiveBoostAttribute maxBoostAtt =
|
MaxNonCompetitiveBoostAttribute maxBoostAtt =
|
||||||
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
atts.addAttribute(MaxNonCompetitiveBoostAttribute.class);
|
||||||
SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength);
|
FuzzyTermsEnum fe = new FuzzyTermsEnum(terms, atts, startTerm, f.maxEdits, f.prefixLength, true);
|
||||||
//store the df so all variants use same idf
|
//store the df so all variants use same idf
|
||||||
int df = reader.docFreq(startTerm);
|
int df = reader.docFreq(startTerm);
|
||||||
int numVariants = 0;
|
int numVariants = 0;
|
||||||
|
@ -1,201 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.SingleTermsEnum;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.search.BooleanQuery; // javadocs
|
|
||||||
import org.apache.lucene.search.FuzzyQuery; // javadocs
|
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
|
||||||
|
|
||||||
/** Implements the classic fuzzy search query. The similarity measurement
|
|
||||||
* is based on the Levenshtein (edit distance) algorithm.
|
|
||||||
* <p>
|
|
||||||
* Note that, unlike {@link FuzzyQuery}, this query will silently allow
|
|
||||||
* for a (possibly huge) number of edit distances in comparisons, and may
|
|
||||||
* be extremely slow (comparing every term in the index).
|
|
||||||
*
|
|
||||||
* @deprecated Use {@link FuzzyQuery} instead.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public class SlowFuzzyQuery extends MultiTermQuery {
|
|
||||||
|
|
||||||
public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
|
|
||||||
public final static int defaultPrefixLength = 0;
|
|
||||||
public final static int defaultMaxExpansions = 50;
|
|
||||||
|
|
||||||
private float minimumSimilarity;
|
|
||||||
private int prefixLength;
|
|
||||||
private boolean termLongEnough = false;
|
|
||||||
|
|
||||||
protected Term term;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Create a new SlowFuzzyQuery that will match terms with a similarity
|
|
||||||
* of at least <code>minimumSimilarity</code> to <code>term</code>.
|
|
||||||
* If a <code>prefixLength</code> > 0 is specified, a common prefix
|
|
||||||
* of that length is also required.
|
|
||||||
*
|
|
||||||
* @param term the term to search for
|
|
||||||
* @param minimumSimilarity a value between 0 and 1 to set the required similarity
|
|
||||||
* between the query term and the matching terms. For example, for a
|
|
||||||
* <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length
|
|
||||||
* as the query term is considered similar to the query term if the edit distance
|
|
||||||
* between both terms is less than <code>length(term)*0.5</code>
|
|
||||||
* <p>
|
|
||||||
* Alternatively, if <code>minimumSimilarity</code> is >= 1f, it is interpreted
|
|
||||||
* as a pure Levenshtein edit distance. For example, a value of <code>2f</code>
|
|
||||||
* will match all terms within an edit distance of <code>2</code> from the
|
|
||||||
* query term. Edit distances specified in this way may not be fractional.
|
|
||||||
*
|
|
||||||
* @param prefixLength length of common (non-fuzzy) prefix
|
|
||||||
* @param maxExpansions the maximum number of terms to match. If this number is
|
|
||||||
* greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten,
|
|
||||||
* then the maxClauseCount will be used instead.
|
|
||||||
* @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0
|
|
||||||
* or if prefixLength < 0
|
|
||||||
*/
|
|
||||||
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength,
|
|
||||||
int maxExpansions) {
|
|
||||||
super(term.field());
|
|
||||||
this.term = term;
|
|
||||||
|
|
||||||
if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity)
|
|
||||||
throw new IllegalArgumentException("fractional edit distances are not allowed");
|
|
||||||
if (minimumSimilarity < 0.0f)
|
|
||||||
throw new IllegalArgumentException("minimumSimilarity < 0");
|
|
||||||
if (prefixLength < 0)
|
|
||||||
throw new IllegalArgumentException("prefixLength < 0");
|
|
||||||
if (maxExpansions < 0)
|
|
||||||
throw new IllegalArgumentException("maxExpansions < 0");
|
|
||||||
|
|
||||||
setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions));
|
|
||||||
|
|
||||||
String text = term.text();
|
|
||||||
int len = text.codePointCount(0, text.length());
|
|
||||||
if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) {
|
|
||||||
this.termLongEnough = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
this.minimumSimilarity = minimumSimilarity;
|
|
||||||
this.prefixLength = prefixLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)}.
|
|
||||||
*/
|
|
||||||
public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
|
|
||||||
this(term, minimumSimilarity, prefixLength, defaultMaxExpansions);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)}.
|
|
||||||
*/
|
|
||||||
public SlowFuzzyQuery(Term term, float minimumSimilarity) {
|
|
||||||
this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)}.
|
|
||||||
*/
|
|
||||||
public SlowFuzzyQuery(Term term) {
|
|
||||||
this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the minimum similarity that is required for this query to match.
|
|
||||||
* @return float value between 0.0 and 1.0
|
|
||||||
*/
|
|
||||||
public float getMinSimilarity() {
|
|
||||||
return minimumSimilarity;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the non-fuzzy prefix length. This is the number of characters at the start
|
|
||||||
* of a term that must be identical (not fuzzy) to the query term if the query
|
|
||||||
* is to match that term.
|
|
||||||
*/
|
|
||||||
public int getPrefixLength() {
|
|
||||||
return prefixLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
|
||||||
if (!termLongEnough) { // can only match if it's exact
|
|
||||||
return new SingleTermsEnum(terms.iterator(), term.bytes());
|
|
||||||
}
|
|
||||||
return new SlowFuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the pattern term.
|
|
||||||
*/
|
|
||||||
public Term getTerm() {
|
|
||||||
return term;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString(String field) {
|
|
||||||
final StringBuilder buffer = new StringBuilder();
|
|
||||||
if (!term.field().equals(field)) {
|
|
||||||
buffer.append(term.field());
|
|
||||||
buffer.append(":");
|
|
||||||
}
|
|
||||||
buffer.append(term.text());
|
|
||||||
buffer.append('~');
|
|
||||||
buffer.append(Float.toString(minimumSimilarity));
|
|
||||||
return buffer.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
final int prime = 31;
|
|
||||||
int result = super.hashCode();
|
|
||||||
result = prime * result + Float.floatToIntBits(minimumSimilarity);
|
|
||||||
result = prime * result + prefixLength;
|
|
||||||
result = prime * result + ((term == null) ? 0 : term.hashCode());
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (this == obj)
|
|
||||||
return true;
|
|
||||||
if (!super.equals(obj))
|
|
||||||
return false;
|
|
||||||
if (getClass() != obj.getClass())
|
|
||||||
return false;
|
|
||||||
SlowFuzzyQuery other = (SlowFuzzyQuery) obj;
|
|
||||||
if (Float.floatToIntBits(minimumSimilarity) != Float
|
|
||||||
.floatToIntBits(other.minimumSimilarity))
|
|
||||||
return false;
|
|
||||||
if (prefixLength != other.prefixLength)
|
|
||||||
return false;
|
|
||||||
if (term == null) {
|
|
||||||
if (other.term != null)
|
|
||||||
return false;
|
|
||||||
} else if (!term.equals(other.term))
|
|
||||||
return false;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,263 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
|
||||||
import org.apache.lucene.index.FilteredTermsEnum;
|
|
||||||
import org.apache.lucene.search.BoostAttribute;
|
|
||||||
import org.apache.lucene.search.FuzzyTermsEnum;
|
|
||||||
import org.apache.lucene.util.AttributeSource;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.IntsRefBuilder;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.apache.lucene.util.UnicodeUtil;
|
|
||||||
|
|
||||||
/** Potentially slow fuzzy TermsEnum for enumerating all terms that are similar
|
|
||||||
* to the specified filter term.
|
|
||||||
* <p> If the minSimilarity or maxEdits is greater than the Automaton's
|
|
||||||
* allowable range, this backs off to the classic (brute force)
|
|
||||||
* fuzzy terms enum method by calling FuzzyTermsEnum's getAutomatonEnum.
|
|
||||||
* </p>
|
|
||||||
* <p>Term enumerations are always ordered by
|
|
||||||
* {@link BytesRef#compareTo}. Each term in the enumeration is
|
|
||||||
* greater than all that precede it.</p>
|
|
||||||
*
|
|
||||||
* @deprecated Use {@link FuzzyTermsEnum} instead.
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public final class SlowFuzzyTermsEnum extends FuzzyTermsEnum {
|
|
||||||
|
|
||||||
public SlowFuzzyTermsEnum(Terms terms, AttributeSource atts, Term term,
|
|
||||||
float minSimilarity, int prefixLength) throws IOException {
|
|
||||||
super(terms, atts, term, minSimilarity, prefixLength, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void maxEditDistanceChanged(BytesRef lastTerm, int maxEdits, boolean init)
|
|
||||||
throws IOException {
|
|
||||||
TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
|
|
||||||
if (newEnum != null) {
|
|
||||||
setEnum(newEnum);
|
|
||||||
} else if (init) {
|
|
||||||
setEnum(new LinearFuzzyTermsEnum());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Implement fuzzy enumeration with linear brute force.
|
|
||||||
*/
|
|
||||||
private class LinearFuzzyTermsEnum extends FilteredTermsEnum {
|
|
||||||
/* Allows us save time required to create a new array
|
|
||||||
* every time similarity is called.
|
|
||||||
*/
|
|
||||||
private int[] d;
|
|
||||||
private int[] p;
|
|
||||||
|
|
||||||
// this is the text, minus the prefix
|
|
||||||
private final int[] text;
|
|
||||||
|
|
||||||
private final BoostAttribute boostAtt =
|
|
||||||
attributes().addAttribute(BoostAttribute.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
|
|
||||||
* length <code>prefixLength</code> with <code>term</code> and which have a fuzzy similarity >
|
|
||||||
* <code>minSimilarity</code>.
|
|
||||||
* <p>
|
|
||||||
* After calling the constructor the enumeration is already pointing to the first
|
|
||||||
* valid term if such a term exists.
|
|
||||||
*
|
|
||||||
* @throws IOException If there is a low-level I/O error.
|
|
||||||
*/
|
|
||||||
public LinearFuzzyTermsEnum() throws IOException {
|
|
||||||
super(terms.iterator());
|
|
||||||
|
|
||||||
this.text = new int[termLength - realPrefixLength];
|
|
||||||
System.arraycopy(termText, realPrefixLength, text, 0, text.length);
|
|
||||||
final String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
|
|
||||||
prefixBytesRef = new BytesRef(prefix);
|
|
||||||
this.d = new int[this.text.length + 1];
|
|
||||||
this.p = new int[this.text.length + 1];
|
|
||||||
|
|
||||||
setInitialSeekTerm(prefixBytesRef);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final BytesRef prefixBytesRef;
|
|
||||||
// used for unicode conversion from BytesRef byte[] to int[]
|
|
||||||
private final IntsRefBuilder utf32 = new IntsRefBuilder();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* <p>The termCompare method in FuzzyTermEnum uses Levenshtein distance to
|
|
||||||
* calculate the distance between the given term and the comparing term.
|
|
||||||
* </p>
|
|
||||||
* <p>If the minSimilarity is >= 1.0, this uses the maxEdits as the comparison.
|
|
||||||
* Otherwise, this method uses the following logic to calculate similarity.
|
|
||||||
* <pre>
|
|
||||||
* similarity = 1 - ((float)distance / (float) (prefixLength + Math.min(textlen, targetlen)));
|
|
||||||
* </pre>
|
|
||||||
* where distance is the Levenshtein distance for the two words.
|
|
||||||
* </p>
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected final AcceptStatus accept(BytesRef term) {
|
|
||||||
if (StringHelper.startsWith(term, prefixBytesRef)) {
|
|
||||||
utf32.copyUTF8Bytes(term);
|
|
||||||
final int distance = calcDistance(utf32.ints(), realPrefixLength, utf32.length() - realPrefixLength);
|
|
||||||
|
|
||||||
//Integer.MIN_VALUE is the sentinel that Levenshtein stopped early
|
|
||||||
if (distance == Integer.MIN_VALUE){
|
|
||||||
return AcceptStatus.NO;
|
|
||||||
}
|
|
||||||
//no need to calc similarity, if raw is true and distance > maxEdits
|
|
||||||
if (raw == true && distance > maxEdits){
|
|
||||||
return AcceptStatus.NO;
|
|
||||||
}
|
|
||||||
final float similarity = calcSimilarity(distance, (utf32.length() - realPrefixLength), text.length);
|
|
||||||
|
|
||||||
//if raw is true, then distance must also be <= maxEdits by now
|
|
||||||
//given the previous if statement
|
|
||||||
if (raw == true ||
|
|
||||||
(raw == false && similarity > minSimilarity)) {
|
|
||||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
|
||||||
return AcceptStatus.YES;
|
|
||||||
} else {
|
|
||||||
return AcceptStatus.NO;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return AcceptStatus.END;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/******************************
|
|
||||||
* Compute Levenshtein distance
|
|
||||||
******************************/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* <p>calcDistance returns the Levenshtein distance between the query term
|
|
||||||
* and the target term.</p>
|
|
||||||
*
|
|
||||||
* <p>Embedded within this algorithm is a fail-fast Levenshtein distance
|
|
||||||
* algorithm. The fail-fast algorithm differs from the standard Levenshtein
|
|
||||||
* distance algorithm in that it is aborted if it is discovered that the
|
|
||||||
* minimum distance between the words is greater than some threshold.
|
|
||||||
|
|
||||||
* <p>Levenshtein distance (also known as edit distance) is a measure of similarity
|
|
||||||
* between two strings where the distance is measured as the number of character
|
|
||||||
* deletions, insertions or substitutions required to transform one string to
|
|
||||||
* the other string.
|
|
||||||
* @param target the target word or phrase
|
|
||||||
* @param offset the offset at which to start the comparison
|
|
||||||
* @param length the length of what's left of the string to compare
|
|
||||||
* @return the number of edits or Integer.MIN_VALUE if the edit distance is
|
|
||||||
* greater than maxDistance.
|
|
||||||
*/
|
|
||||||
private final int calcDistance(final int[] target, int offset, int length) {
|
|
||||||
final int m = length;
|
|
||||||
final int n = text.length;
|
|
||||||
if (n == 0) {
|
|
||||||
//we don't have anything to compare. That means if we just add
|
|
||||||
//the letters for m we get the new word
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
if (m == 0) {
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int maxDistance = calculateMaxDistance(m);
|
|
||||||
|
|
||||||
if (maxDistance < Math.abs(m-n)) {
|
|
||||||
//just adding the characters of m to n or vice-versa results in
|
|
||||||
//too many edits
|
|
||||||
//for example "pre" length is 3 and "prefixes" length is 8. We can see that
|
|
||||||
//given this optimal circumstance, the edit distance cannot be less than 5.
|
|
||||||
//which is 8-3 or more precisely Math.abs(3-8).
|
|
||||||
//if our maximum edit distance is 4, then we can discard this word
|
|
||||||
//without looking at it.
|
|
||||||
return Integer.MIN_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// init matrix d
|
|
||||||
for (int i = 0; i <=n; ++i) {
|
|
||||||
p[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// start computing edit distance
|
|
||||||
for (int j = 1; j<=m; ++j) { // iterates through target
|
|
||||||
int bestPossibleEditDistance = m;
|
|
||||||
final int t_j = target[offset+j-1]; // jth character of t
|
|
||||||
d[0] = j;
|
|
||||||
|
|
||||||
for (int i=1; i<=n; ++i) { // iterates through text
|
|
||||||
// minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1)
|
|
||||||
if (t_j != text[i-1]) {
|
|
||||||
d[i] = Math.min(Math.min(d[i-1], p[i]), p[i-1]) + 1;
|
|
||||||
} else {
|
|
||||||
d[i] = Math.min(Math.min(d[i-1]+1, p[i]+1), p[i-1]);
|
|
||||||
}
|
|
||||||
bestPossibleEditDistance = Math.min(bestPossibleEditDistance, d[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//After calculating row i, the best possible edit distance
|
|
||||||
//can be found by found by finding the smallest value in a given column.
|
|
||||||
//If the bestPossibleEditDistance is greater than the max distance, abort.
|
|
||||||
|
|
||||||
if (j > maxDistance && bestPossibleEditDistance > maxDistance) { //equal is okay, but not greater
|
|
||||||
//the closest the target can be to the text is just too far away.
|
|
||||||
//this target is leaving the party early.
|
|
||||||
return Integer.MIN_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy current distance counts to 'previous row' distance counts: swap p and d
|
|
||||||
int _d[] = p;
|
|
||||||
p = d;
|
|
||||||
d = _d;
|
|
||||||
}
|
|
||||||
|
|
||||||
// our last action in the above loop was to switch d and p, so p now
|
|
||||||
// actually has the most recent cost counts
|
|
||||||
|
|
||||||
return p[n];
|
|
||||||
}
|
|
||||||
|
|
||||||
private float calcSimilarity(int edits, int m, int n){
|
|
||||||
// this will return less than 0.0 when the edit distance is
|
|
||||||
// greater than the number of characters in the shorter word.
|
|
||||||
// but this was the formula that was previously used in FuzzyTermEnum,
|
|
||||||
// so it has not been changed (even though minimumSimilarity must be
|
|
||||||
// greater than 0.0)
|
|
||||||
|
|
||||||
return 1.0f - ((float)edits / (float) (realPrefixLength + Math.min(n, m)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The max Distance is the maximum Levenshtein distance for the text
|
|
||||||
* compared to some other value that results in score that is
|
|
||||||
* better than the minimum similarity.
|
|
||||||
* @param m the length of the "other value"
|
|
||||||
* @return the maximum levenshtein distance that we care about
|
|
||||||
*/
|
|
||||||
private int calculateMaxDistance(int m) {
|
|
||||||
return raw ? maxEdits : Math.min(maxEdits,
|
|
||||||
(int)((1-minSimilarity) * (Math.min(text.length, m) + realPrefixLength)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -77,7 +77,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||||||
//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
|
//Tests that idf ranking is not favouring rare mis-spellings over a strong edit-distance match
|
||||||
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
|
public void testClosestEditDistanceMatchComesFirst() throws Throwable {
|
||||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
flt.addTerms("smith", "name", 0.3f, 1);
|
flt.addTerms("smith", "name", 2, 1);
|
||||||
Query q = flt.rewrite(searcher.getIndexReader());
|
Query q = flt.rewrite(searcher.getIndexReader());
|
||||||
HashSet<Term> queryTerms = new HashSet<>();
|
HashSet<Term> queryTerms = new HashSet<>();
|
||||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||||
@ -94,7 +94,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||||||
//Test multiple input words are having variants produced
|
//Test multiple input words are having variants produced
|
||||||
public void testMultiWord() throws Throwable {
|
public void testMultiWord() throws Throwable {
|
||||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
|
flt.addTerms("jonathin smoth", "name", 2, 1);
|
||||||
Query q = flt.rewrite(searcher.getIndexReader());
|
Query q = flt.rewrite(searcher.getIndexReader());
|
||||||
HashSet<Term> queryTerms = new HashSet<>();
|
HashSet<Term> queryTerms = new HashSet<>();
|
||||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||||
@ -110,8 +110,8 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||||||
// LUCENE-4809
|
// LUCENE-4809
|
||||||
public void testNonExistingField() throws Throwable {
|
public void testNonExistingField() throws Throwable {
|
||||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
flt.addTerms("jonathin smoth", "name", 0.3f, 1);
|
flt.addTerms("jonathin smoth", "name", 2, 1);
|
||||||
flt.addTerms("jonathin smoth", "this field does not exist", 0.3f, 1);
|
flt.addTerms("jonathin smoth", "this field does not exist", 2, 1);
|
||||||
// don't fail here just because the field doesn't exits
|
// don't fail here just because the field doesn't exits
|
||||||
Query q = flt.rewrite(searcher.getIndexReader());
|
Query q = flt.rewrite(searcher.getIndexReader());
|
||||||
HashSet<Term> queryTerms = new HashSet<>();
|
HashSet<Term> queryTerms = new HashSet<>();
|
||||||
@ -129,7 +129,7 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||||||
//Test bug found when first query word does not match anything
|
//Test bug found when first query word does not match anything
|
||||||
public void testNoMatchFirstWordBug() throws Throwable {
|
public void testNoMatchFirstWordBug() throws Throwable {
|
||||||
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery flt = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
flt.addTerms("fernando smith", "name", 0.3f, 1);
|
flt.addTerms("fernando smith", "name", 2, 1);
|
||||||
Query q = flt.rewrite(searcher.getIndexReader());
|
Query q = flt.rewrite(searcher.getIndexReader());
|
||||||
HashSet<Term> queryTerms = new HashSet<>();
|
HashSet<Term> queryTerms = new HashSet<>();
|
||||||
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
searcher.createWeight(q, true, 1f).extractTerms(queryTerms);
|
||||||
@ -144,9 +144,9 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||||||
public void testFuzzyLikeThisQueryEquals() {
|
public void testFuzzyLikeThisQueryEquals() {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery fltq1 = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
fltq1.addTerms("javi", "subject", 0.5f, 2);
|
fltq1.addTerms("javi", "subject", 2, 2);
|
||||||
FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
|
FuzzyLikeThisQuery fltq2 = new FuzzyLikeThisQuery(10, analyzer);
|
||||||
fltq2.addTerms("javi", "subject", 0.5f, 2);
|
fltq2.addTerms("javi", "subject", 2, 2);
|
||||||
assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
|
assertEquals("FuzzyLikeThisQuery with same attributes is not equal", fltq1,
|
||||||
fltq2);
|
fltq2);
|
||||||
}
|
}
|
||||||
|
@ -1,487 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.sandbox.queries;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.MultiReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.MultiTermQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Tests {@link SlowFuzzyQuery}.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class TestSlowFuzzyQuery extends LuceneTestCase {
|
|
||||||
|
|
||||||
public void testFuzziness() throws Exception {
|
|
||||||
//every test with SlowFuzzyQuery.defaultMinSimilarity
|
|
||||||
//is exercising the Automaton, not the brute force linear method
|
|
||||||
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("aaaaa", writer);
|
|
||||||
addDoc("aaaab", writer);
|
|
||||||
addDoc("aaabb", writer);
|
|
||||||
addDoc("aabbb", writer);
|
|
||||||
addDoc("abbbb", writer);
|
|
||||||
addDoc("bbbbb", writer);
|
|
||||||
addDoc("ddddd", writer);
|
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
|
|
||||||
// same with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(2, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 6);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
|
|
||||||
// test scoring
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals("3 documents should match", 3, hits.length);
|
|
||||||
List<String> order = Arrays.asList("bbbbb","abbbb","aabbb");
|
|
||||||
for (int i = 0; i < hits.length; i++) {
|
|
||||||
final String term = searcher.doc(hits[i].doc).get("field");
|
|
||||||
//System.out.println(hits[i].score);
|
|
||||||
assertEquals(order.get(i), term);
|
|
||||||
}
|
|
||||||
|
|
||||||
// test pq size by supplying maxExpansions=2
|
|
||||||
// This query would normally return 3 documents, because 3 terms match (see above):
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "bbbbb"), SlowFuzzyQuery.defaultMinSimilarity, 0, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals("only 2 documents should match", 2, hits.length);
|
|
||||||
order = Arrays.asList("bbbbb","abbbb");
|
|
||||||
for (int i = 0; i < hits.length; i++) {
|
|
||||||
final String term = searcher.doc(hits[i].doc).get("field");
|
|
||||||
//System.out.println(hits[i].score);
|
|
||||||
assertEquals(order.get(i), term);
|
|
||||||
}
|
|
||||||
|
|
||||||
// not similar enough:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaccc"), SlowFuzzyQuery.defaultMinSimilarity, 0); // edit distance to "aaaaa" = 3
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// query identical to a word in the index:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaa"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
// default allows for up to two edits:
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
|
||||||
|
|
||||||
// query similar to a word in the index:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
|
||||||
|
|
||||||
// now with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
assertEquals(searcher.doc(hits[2].doc).get("field"), ("aaabb"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(2, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaa"));
|
|
||||||
assertEquals(searcher.doc(hits[1].doc).get("field"), ("aaaab"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaac"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
|
||||||
|
|
||||||
// now with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 3);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 4);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("ddddd"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 5);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
|
|
||||||
// different field = no match:
|
|
||||||
query = new SlowFuzzyQuery(new Term("anotherfield", "ddddX"), SlowFuzzyQuery.defaultMinSimilarity, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testFuzzinessLong2() throws Exception {
|
|
||||||
//Lucene-5033
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("abcdef", writer);
|
|
||||||
addDoc("segment", writer);
|
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
SlowFuzzyQuery query;
|
|
||||||
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 3f, 0);
|
|
||||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "abcxxxx"), 4f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
reader.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testFuzzinessLong() throws Exception {
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("aaaaaaa", writer);
|
|
||||||
addDoc("segment", writer);
|
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
SlowFuzzyQuery query;
|
|
||||||
// not similar enough:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "xxxxx"), 0.5f, 0);
|
|
||||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
// edit distance to "aaaaaaa" = 3, this matches because the string is longer than
|
|
||||||
// in testDefaultFuzziness so a bigger difference is allowed:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
|
||||||
|
|
||||||
// now with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 4);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals(searcher.doc(hits[0].doc).get("field"), ("aaaaaaa"));
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaaaccc"), 0.5f, 5);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// no match, more than half of the characters is wrong:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// now with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "aaacccc"), 0.5f, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// "student" and "stellent" are indeed similar to "segment" by default:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
|
|
||||||
// now with prefix
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 1);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.5f, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "stellent"), 0.5f, 2);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// "student" doesn't match anymore thanks to increased minimum similarity:
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "student"), 0.6f, 0);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
expectThrows(IllegalArgumentException.class, () -> {
|
|
||||||
new SlowFuzzyQuery(new Term("field", "student"), 1.1f);
|
|
||||||
});
|
|
||||||
|
|
||||||
expectThrows(IllegalArgumentException.class, () -> {
|
|
||||||
new SlowFuzzyQuery(new Term("field", "student"), -0.1f);
|
|
||||||
});
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* MultiTermQuery provides (via attribute) information about which values
|
|
||||||
* must be competitive to enter the priority queue.
|
|
||||||
*
|
|
||||||
* SlowFuzzyQuery optimizes itself around this information, if the attribute
|
|
||||||
* is not implemented correctly, there will be problems!
|
|
||||||
*/
|
|
||||||
public void testTieBreaker() throws Exception {
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("a123456", writer);
|
|
||||||
addDoc("c123456", writer);
|
|
||||||
addDoc("d123456", writer);
|
|
||||||
addDoc("e123456", writer);
|
|
||||||
|
|
||||||
Directory directory2 = newDirectory();
|
|
||||||
RandomIndexWriter writer2 = new RandomIndexWriter(random(), directory2);
|
|
||||||
addDoc("a123456", writer2);
|
|
||||||
addDoc("b123456", writer2);
|
|
||||||
addDoc("b123456", writer2);
|
|
||||||
addDoc("b123456", writer2);
|
|
||||||
addDoc("c123456", writer2);
|
|
||||||
addDoc("f123456", writer2);
|
|
||||||
|
|
||||||
IndexReader ir1 = writer.getReader();
|
|
||||||
IndexReader ir2 = writer2.getReader();
|
|
||||||
|
|
||||||
MultiReader mr = new MultiReader(ir1, ir2);
|
|
||||||
IndexSearcher searcher = newSearcher(mr);
|
|
||||||
SlowFuzzyQuery fq = new SlowFuzzyQuery(new Term("field", "z123456"), 1f, 0, 2);
|
|
||||||
TopDocs docs = searcher.search(fq, 2);
|
|
||||||
assertEquals(5, docs.totalHits); // 5 docs, from the a and b's
|
|
||||||
mr.close();
|
|
||||||
ir1.close();
|
|
||||||
ir2.close();
|
|
||||||
writer.close();
|
|
||||||
writer2.close();
|
|
||||||
directory.close();
|
|
||||||
directory2.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testTokenLengthOpt() throws IOException {
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("12345678911", writer);
|
|
||||||
addDoc("segment", writer);
|
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
Query query;
|
|
||||||
// term not over 10 chars, so optimization shortcuts
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "1234569"), 0.9f);
|
|
||||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// 10 chars, so no optimization
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "1234567891"), 0.9f);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
// over 10 chars, so no optimization
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "12345678911"), 0.9f);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
|
|
||||||
// over 10 chars, no match
|
|
||||||
query = new SlowFuzzyQuery(new Term("field", "sdfsdfsdfsdf"), 0.9f);
|
|
||||||
hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(0, hits.length);
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. */
|
|
||||||
public void testBoostOnlyRewrite() throws Exception {
|
|
||||||
Directory directory = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
|
|
||||||
addDoc("Lucene", writer);
|
|
||||||
addDoc("Lucene", writer);
|
|
||||||
addDoc("Lucenne", writer);
|
|
||||||
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
SlowFuzzyQuery query = new SlowFuzzyQuery(new Term("field", "lucene"));
|
|
||||||
query.setRewriteMethod(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50));
|
|
||||||
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
|
|
||||||
assertEquals(3, hits.length);
|
|
||||||
// normally, 'Lucenne' would be the first result as IDF will skew the score.
|
|
||||||
assertEquals("Lucene", reader.document(hits[0].doc).get("field"));
|
|
||||||
assertEquals("Lucene", reader.document(hits[1].doc).get("field"));
|
|
||||||
assertEquals("Lucenne", reader.document(hits[2].doc).get("field"));
|
|
||||||
reader.close();
|
|
||||||
directory.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testGiga() throws Exception {
|
|
||||||
|
|
||||||
Directory index = newDirectory();
|
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), index);
|
|
||||||
|
|
||||||
addDoc("Lucene in Action", w);
|
|
||||||
addDoc("Lucene for Dummies", w);
|
|
||||||
|
|
||||||
//addDoc("Giga", w);
|
|
||||||
addDoc("Giga byte", w);
|
|
||||||
|
|
||||||
addDoc("ManagingGigabytesManagingGigabyte", w);
|
|
||||||
addDoc("ManagingGigabytesManagingGigabytes", w);
|
|
||||||
|
|
||||||
addDoc("The Art of Computer Science", w);
|
|
||||||
addDoc("J. K. Rowling", w);
|
|
||||||
addDoc("JK Rowling", w);
|
|
||||||
addDoc("Joanne K Roling", w);
|
|
||||||
addDoc("Bruce Willis", w);
|
|
||||||
addDoc("Willis bruce", w);
|
|
||||||
addDoc("Brute willis", w);
|
|
||||||
addDoc("B. willis", w);
|
|
||||||
IndexReader r = w.getReader();
|
|
||||||
w.close();
|
|
||||||
|
|
||||||
Query q = new SlowFuzzyQuery(new Term("field", "giga"), 0.9f);
|
|
||||||
|
|
||||||
// 3. search
|
|
||||||
IndexSearcher searcher = newSearcher(r);
|
|
||||||
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals("Giga byte", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
r.close();
|
|
||||||
index.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testDistanceAsEditsSearching() throws Exception {
|
|
||||||
Directory index = newDirectory();
|
|
||||||
RandomIndexWriter w = new RandomIndexWriter(random(), index);
|
|
||||||
addDoc("foobar", w);
|
|
||||||
addDoc("test", w);
|
|
||||||
addDoc("working", w);
|
|
||||||
IndexReader reader = w.getReader();
|
|
||||||
IndexSearcher searcher = newSearcher(reader);
|
|
||||||
w.close();
|
|
||||||
|
|
||||||
SlowFuzzyQuery q = new SlowFuzzyQuery(new Term("field", "fouba"), 2);
|
|
||||||
ScoreDoc[] hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
|
|
||||||
q = new SlowFuzzyQuery(new Term("field", "foubara"), 2);
|
|
||||||
hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals("foobar", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
|
|
||||||
q = new SlowFuzzyQuery(new Term("field", "t"), 3);
|
|
||||||
hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
|
|
||||||
q = new SlowFuzzyQuery(new Term("field", "a"), 4f, 0, 50);
|
|
||||||
hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(1, hits.length);
|
|
||||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
|
|
||||||
q = new SlowFuzzyQuery(new Term("field", "a"), 6f, 0, 50);
|
|
||||||
hits = searcher.search(q, 10).scoreDocs;
|
|
||||||
assertEquals(2, hits.length);
|
|
||||||
assertEquals("test", searcher.doc(hits[0].doc).get("field"));
|
|
||||||
assertEquals("foobar", searcher.doc(hits[1].doc).get("field"));
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
index.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void addDoc(String text, RandomIndexWriter writer) throws IOException {
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(newTextField("field", text, Field.Store.YES));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
}
|
|
||||||
}
|
|
@ -415,40 +415,42 @@ public class DirectSpellChecker {
|
|||||||
BoostAttribute boostAtt =
|
BoostAttribute boostAtt =
|
||||||
e.attributes().addAttribute(BoostAttribute.class);
|
e.attributes().addAttribute(BoostAttribute.class);
|
||||||
while ((candidateTerm = e.next()) != null) {
|
while ((candidateTerm = e.next()) != null) {
|
||||||
final float boost = boostAtt.getBoost();
|
// For FuzzyQuery, boost is the score:
|
||||||
|
float score = boostAtt.getBoost();
|
||||||
// ignore uncompetitive hits
|
// ignore uncompetitive hits
|
||||||
if (stQueue.size() >= numSug && boost <= stQueue.peek().boost)
|
if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// ignore exact match of the same term
|
// ignore exact match of the same term
|
||||||
if (queryTerm.bytesEquals(candidateTerm))
|
if (queryTerm.bytesEquals(candidateTerm)) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
int df = e.docFreq();
|
int df = e.docFreq();
|
||||||
|
|
||||||
// check docFreq if required
|
// check docFreq if required
|
||||||
if (df <= docfreq)
|
if (df <= docfreq) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
final float score;
|
|
||||||
final String termAsString;
|
final String termAsString;
|
||||||
if (distance == INTERNAL_LEVENSHTEIN) {
|
if (distance == INTERNAL_LEVENSHTEIN) {
|
||||||
// delay creating strings until the end
|
// delay creating strings until the end
|
||||||
termAsString = null;
|
termAsString = null;
|
||||||
// undo FuzzyTermsEnum's scale factor for a real scaled lev score
|
|
||||||
score = boost / e.getScaleFactor() + e.getMinSimilarity();
|
|
||||||
} else {
|
} else {
|
||||||
spare.copyUTF8Bytes(candidateTerm);
|
spare.copyUTF8Bytes(candidateTerm);
|
||||||
termAsString = spare.toString();
|
termAsString = spare.toString();
|
||||||
score = distance.getDistance(term.text(), termAsString);
|
score = distance.getDistance(term.text(), termAsString);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (score < accuracy)
|
if (score < accuracy) {
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// add new entry in PQ
|
// add new entry in PQ
|
||||||
st.term = BytesRef.deepCopyOf(candidateTerm);
|
st.term = BytesRef.deepCopyOf(candidateTerm);
|
||||||
st.boost = boost;
|
st.boost = score;
|
||||||
st.docfreq = df;
|
st.docfreq = df;
|
||||||
st.termAsString = termAsString;
|
st.termAsString = termAsString;
|
||||||
st.score = score;
|
st.score = score;
|
||||||
|
@ -77,6 +77,15 @@ prefix, then you will now get an error as these options are incompatible with nu
|
|||||||
|
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
|
* SOLR-5725: facet.method=enum can bypass exact counts calculation with facet.exists=true, it just returns 1 for
|
||||||
|
terms which exists in result docset. (Alexey Kozhemiakin, Sebastian Koziel, Radoslaw Zielinski via Mikhail Khludnev)
|
||||||
|
|
||||||
|
* SOLR-9127: Excel workbook (.xlsx) response writer. use 'wt=xlsx' (Tony Moriarty, noble)
|
||||||
|
|
||||||
|
* SOLR-9469: JettySolrRunner now has the option of restarting using a different
|
||||||
|
port (Alan Woodward)
|
||||||
|
|
||||||
|
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
----------------------
|
----------------------
|
||||||
@ -103,12 +112,19 @@ Bug Fixes
|
|||||||
|
|
||||||
* SOLR-9461: DELETENODE, REPLACENODE should pass down the 'async' param to subcommands (shalin, noble)
|
* SOLR-9461: DELETENODE, REPLACENODE should pass down the 'async' param to subcommands (shalin, noble)
|
||||||
|
|
||||||
* SOLR-9319: DELETEREPLICA can accept a 'count' and remove appropriate replicas (Nitin Sharma, noble )
|
|
||||||
|
|
||||||
* SOLR-9444: Fix path usage for cloud backup/restore. (Hrishikesh Gadre, Uwe Schindler, Varun Thacker)
|
* SOLR-9444: Fix path usage for cloud backup/restore. (Hrishikesh Gadre, Uwe Schindler, Varun Thacker)
|
||||||
|
|
||||||
* SOLR-9381: Snitch for freedisk uses '/' instead of 'coreRootDirectory' (Tim Owen, noble)
|
* SOLR-9381: Snitch for freedisk uses '/' instead of 'coreRootDirectory' (Tim Owen, noble)
|
||||||
|
|
||||||
|
* SOLR-9488: Shard split can fail to write commit data on shutdown/restart causing replicas to recover
|
||||||
|
without replicating the index. This can cause data loss. (shalin)
|
||||||
|
|
||||||
|
* SOLR-9490: Fixed bugs in BoolField that caused it to erroneously return "false" for all docs depending
|
||||||
|
on usage (Colvin Cowie, Dan Fox, hossman)
|
||||||
|
|
||||||
|
* SOLR-9438: Shard split can be marked successful and sub-shard states switched to 'active' even when
|
||||||
|
one or more sub-shards replicas do not recover due to the leader crashing or restarting between the time
|
||||||
|
the replicas are created and before they can recover. This can cause data loss. (shalin)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
@ -135,6 +151,27 @@ Other Changes
|
|||||||
|
|
||||||
* SOLR-9406: SolrSuggester should selectively register close hook (Gethin James, Joel Bernstein)
|
* SOLR-9406: SolrSuggester should selectively register close hook (Gethin James, Joel Bernstein)
|
||||||
|
|
||||||
|
* SOLR-8961: Add a test module for solr-test-framework (Alan Woodward)
|
||||||
|
|
||||||
|
* SOLR-9474: MiniSolrCloudCluster will not reuse ports by default when
|
||||||
|
restarting its JettySolrRunners (Alan Woodward)
|
||||||
|
|
||||||
|
* SOLR-9498: Remove HDFS properties from DIH solrconfig.xml, as started in SOLR-6943 (Alexandre Rafalovitch)
|
||||||
|
|
||||||
|
* SOLR-9365: Reduce noise in solr logs during graceful shutdown. (Cao Manh Dat via shalin)
|
||||||
|
|
||||||
|
================== 6.2.1 ==================
|
||||||
|
|
||||||
|
Bug Fixes
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
* SOLR-9494: Use of {!collapse} sometimes doesn't correctly return true for Collector.needsScores(), especially when the
|
||||||
|
query was cached. This can cause an exception when 'q' is a SpanQuery or potentially others. (David Smiley)
|
||||||
|
|
||||||
|
* SOLR-9408: Fix TreeMergeOutputFormat to add timestamp metadata to a commit. SolrCloud replication relies on this.
|
||||||
|
(Jessica Cheng Mallet via Varun Thacker)
|
||||||
|
|
||||||
|
|
||||||
================== 6.2.0 ==================
|
================== 6.2.0 ==================
|
||||||
|
|
||||||
Versions of Major Components
|
Versions of Major Components
|
||||||
@ -1168,6 +1205,23 @@ Other Changes
|
|||||||
* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
|
* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
|
||||||
(David Smiley)
|
(David Smiley)
|
||||||
|
|
||||||
|
======================= 5.5.3 =======================
|
||||||
|
|
||||||
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||||
|
|
||||||
|
Versions of Major Components
|
||||||
|
---------------------
|
||||||
|
Apache Tika 1.13
|
||||||
|
Carrot2 3.12.0
|
||||||
|
Velocity 1.7 and Velocity Tools 2.0
|
||||||
|
Apache UIMA 2.3.1
|
||||||
|
Apache ZooKeeper 3.4.6
|
||||||
|
Jetty 9.3.8.v20160314
|
||||||
|
|
||||||
|
|
||||||
|
(No Changes)
|
||||||
|
|
||||||
|
|
||||||
======================= 5.5.2 =======================
|
======================= 5.5.2 =======================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||||
|
@ -0,0 +1,414 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.handler.extraction;
|
||||||
|
|
||||||
|
import java.io.CharArrayWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.io.Writer;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
|
import org.apache.poi.ss.usermodel.Font;
|
||||||
|
import org.apache.poi.ss.usermodel.IndexedColors;
|
||||||
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
|
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.common.params.SolrParams;
|
||||||
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.BasicResultContext;
|
||||||
|
import org.apache.solr.response.RawResponseWriter;
|
||||||
|
import org.apache.solr.response.ResultContext;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.response.TextResponseWriter;
|
||||||
|
import org.apache.solr.schema.FieldType;
|
||||||
|
import org.apache.solr.schema.SchemaField;
|
||||||
|
import org.apache.solr.schema.StrField;
|
||||||
|
import org.apache.solr.search.DocList;
|
||||||
|
import org.apache.solr.search.ReturnFields;
|
||||||
|
|
||||||
|
public class XLSXResponseWriter extends RawResponseWriter {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(OutputStream out, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
|
||||||
|
// throw away arraywriter just to satisfy super requirements; we're grabbing
|
||||||
|
// all writes before they go to it anyway
|
||||||
|
XLSXWriter w = new XLSXWriter(new CharArrayWriter(), req, rsp);
|
||||||
|
|
||||||
|
LinkedHashMap<String,String> reqNamesMap = new LinkedHashMap<>();
|
||||||
|
LinkedHashMap<String,Integer> reqWidthsMap = new LinkedHashMap<>();
|
||||||
|
|
||||||
|
Iterator<String> paramNamesIter = req.getParams().getParameterNamesIterator();
|
||||||
|
while (paramNamesIter.hasNext()) {
|
||||||
|
String nextParam = paramNamesIter.next();
|
||||||
|
if (nextParam.startsWith("colname.")) {
|
||||||
|
String field = nextParam.substring("colname.".length());
|
||||||
|
reqNamesMap.put(field, req.getParams().get(nextParam));
|
||||||
|
} else if (nextParam.startsWith("colwidth.")) {
|
||||||
|
String field = nextParam.substring("colwidth.".length());
|
||||||
|
reqWidthsMap.put(field, req.getParams().getInt(nextParam));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
w.writeResponse(out, reqNamesMap, reqWidthsMap);
|
||||||
|
} finally {
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getContentType(SolrQueryRequest request, SolrQueryResponse response) {
|
||||||
|
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class XLSXWriter extends TextResponseWriter {
|
||||||
|
|
||||||
|
SolrQueryRequest req;
|
||||||
|
SolrQueryResponse rsp;
|
||||||
|
|
||||||
|
class SerialWriteWorkbook {
|
||||||
|
SXSSFWorkbook swb;
|
||||||
|
Sheet sh;
|
||||||
|
|
||||||
|
XSSFCellStyle headerStyle;
|
||||||
|
int rowIndex;
|
||||||
|
Row curRow;
|
||||||
|
int cellIndex;
|
||||||
|
|
||||||
|
SerialWriteWorkbook() {
|
||||||
|
this.swb = new SXSSFWorkbook(100);
|
||||||
|
this.sh = this.swb.createSheet();
|
||||||
|
|
||||||
|
this.rowIndex = 0;
|
||||||
|
|
||||||
|
this.headerStyle = (XSSFCellStyle)swb.createCellStyle();
|
||||||
|
this.headerStyle.setFillBackgroundColor(IndexedColors.BLACK.getIndex());
|
||||||
|
//solid fill
|
||||||
|
this.headerStyle.setFillPattern((short)1);
|
||||||
|
Font headerFont = swb.createFont();
|
||||||
|
headerFont.setFontHeightInPoints((short)14);
|
||||||
|
headerFont.setBoldweight(Font.BOLDWEIGHT_BOLD);
|
||||||
|
headerFont.setColor(IndexedColors.WHITE.getIndex());
|
||||||
|
this.headerStyle.setFont(headerFont);
|
||||||
|
}
|
||||||
|
|
||||||
|
void addRow() {
|
||||||
|
curRow = sh.createRow(rowIndex++);
|
||||||
|
cellIndex = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHeaderRow() {
|
||||||
|
curRow.setHeightInPoints((short)21);
|
||||||
|
}
|
||||||
|
|
||||||
|
//sets last created cell to have header style
|
||||||
|
void setHeaderCell() {
|
||||||
|
curRow.getCell(cellIndex - 1).setCellStyle(this.headerStyle);
|
||||||
|
}
|
||||||
|
|
||||||
|
//set the width of the most recently created column
|
||||||
|
void setColWidth(int charWidth) {
|
||||||
|
//width in poi is units of 1/256th of a character width for some reason
|
||||||
|
this.sh.setColumnWidth(cellIndex - 1, 256*charWidth);
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeCell(String value) {
|
||||||
|
Cell cell = curRow.createCell(cellIndex++);
|
||||||
|
cell.setCellValue(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void flush(OutputStream out) {
|
||||||
|
try {
|
||||||
|
swb.write(out);
|
||||||
|
} catch (IOException e) {
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
e.printStackTrace(new PrintWriter(sw));
|
||||||
|
String stacktrace = sw.toString();
|
||||||
|
}finally {
|
||||||
|
swb.dispose();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SerialWriteWorkbook wb = new SerialWriteWorkbook();
|
||||||
|
|
||||||
|
static class XLField {
|
||||||
|
String name;
|
||||||
|
SchemaField sf;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String,XLField> xlFields = new LinkedHashMap<String,XLField>();
|
||||||
|
|
||||||
|
public XLSXWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp){
|
||||||
|
super(writer, req, rsp);
|
||||||
|
this.req = req;
|
||||||
|
this.rsp = rsp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void writeResponse(OutputStream out, LinkedHashMap<String, String> colNamesMap,
|
||||||
|
LinkedHashMap<String, Integer> colWidthsMap) throws IOException {
|
||||||
|
SolrParams params = req.getParams();
|
||||||
|
|
||||||
|
Collection<String> fields = returnFields.getRequestedFieldNames();
|
||||||
|
Object responseObj = rsp.getValues().get("response");
|
||||||
|
boolean returnOnlyStored = false;
|
||||||
|
if (fields==null||returnFields.hasPatternMatching()) {
|
||||||
|
if (responseObj instanceof SolrDocumentList) {
|
||||||
|
// get the list of fields from the SolrDocumentList
|
||||||
|
if(fields==null) {
|
||||||
|
fields = new LinkedHashSet<String>();
|
||||||
|
}
|
||||||
|
for (SolrDocument sdoc: (SolrDocumentList)responseObj) {
|
||||||
|
fields.addAll(sdoc.getFieldNames());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// get the list of fields from the index
|
||||||
|
Iterable<String> all = req.getSearcher().getFieldNames();
|
||||||
|
if (fields == null) {
|
||||||
|
fields = Sets.newHashSet(all);
|
||||||
|
} else {
|
||||||
|
Iterables.addAll(fields, all);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (returnFields.wantsScore()) {
|
||||||
|
fields.add("score");
|
||||||
|
} else {
|
||||||
|
fields.remove("score");
|
||||||
|
}
|
||||||
|
returnOnlyStored = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String field : fields) {
|
||||||
|
if (!returnFields.wantsField(field)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (field.equals("score")) {
|
||||||
|
XLField xlField = new XLField();
|
||||||
|
xlField.name = "score";
|
||||||
|
xlFields.put("score", xlField);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
SchemaField sf = schema.getFieldOrNull(field);
|
||||||
|
if (sf == null) {
|
||||||
|
FieldType ft = new StrField();
|
||||||
|
sf = new SchemaField(field, ft);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return only stored fields, unless an explicit field list is specified
|
||||||
|
if (returnOnlyStored && sf != null && !sf.stored()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
XLField xlField = new XLField();
|
||||||
|
xlField.name = field;
|
||||||
|
xlField.sf = sf;
|
||||||
|
xlFields.put(field, xlField);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
wb.addRow();
|
||||||
|
//write header
|
||||||
|
for (XLField xlField : xlFields.values()) {
|
||||||
|
String printName = xlField.name;
|
||||||
|
int colWidth = 14;
|
||||||
|
|
||||||
|
String niceName = colNamesMap.get(xlField.name);
|
||||||
|
if (niceName != null) {
|
||||||
|
printName = niceName;
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer niceWidth = colWidthsMap.get(xlField.name);
|
||||||
|
if (niceWidth != null) {
|
||||||
|
colWidth = niceWidth.intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
writeStr(xlField.name, printName, false);
|
||||||
|
wb.setColWidth(colWidth);
|
||||||
|
wb.setHeaderCell();
|
||||||
|
}
|
||||||
|
wb.setHeaderRow();
|
||||||
|
wb.addRow();
|
||||||
|
|
||||||
|
if (responseObj instanceof ResultContext) {
|
||||||
|
writeDocuments(null, (ResultContext)responseObj );
|
||||||
|
}
|
||||||
|
else if (responseObj instanceof DocList) {
|
||||||
|
ResultContext ctx = new BasicResultContext((DocList)responseObj, returnFields, null, null, req);
|
||||||
|
writeDocuments(null, ctx );
|
||||||
|
} else if (responseObj instanceof SolrDocumentList) {
|
||||||
|
writeSolrDocumentList(null, (SolrDocumentList)responseObj, returnFields );
|
||||||
|
}
|
||||||
|
|
||||||
|
wb.flush(out);
|
||||||
|
wb = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeNamedList(String name, NamedList val) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeStartDocumentList(String name,
|
||||||
|
long start, int size, long numFound, Float maxScore) throws IOException
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeEndDocumentList() throws IOException
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
//NOTE: a document cannot currently contain another document
|
||||||
|
List tmpList;
|
||||||
|
@Override
|
||||||
|
public void writeSolrDocument(String name, SolrDocument doc, ReturnFields returnFields, int idx ) throws IOException {
|
||||||
|
if (tmpList == null) {
|
||||||
|
tmpList = new ArrayList(1);
|
||||||
|
tmpList.add(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (XLField xlField : xlFields.values()) {
|
||||||
|
Object val = doc.getFieldValue(xlField.name);
|
||||||
|
int nVals = val instanceof Collection ? ((Collection)val).size() : (val==null ? 0 : 1);
|
||||||
|
if (nVals == 0) {
|
||||||
|
writeNull(xlField.name);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((xlField.sf != null && xlField.sf.multiValued()) || nVals > 1) {
|
||||||
|
Collection values;
|
||||||
|
// normalize to a collection
|
||||||
|
if (val instanceof Collection) {
|
||||||
|
values = (Collection)val;
|
||||||
|
} else {
|
||||||
|
tmpList.set(0, val);
|
||||||
|
values = tmpList;
|
||||||
|
}
|
||||||
|
|
||||||
|
writeArray(xlField.name, values.iterator());
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// normalize to first value
|
||||||
|
if (val instanceof Collection) {
|
||||||
|
Collection values = (Collection)val;
|
||||||
|
val = values.iterator().next();
|
||||||
|
}
|
||||||
|
writeVal(xlField.name, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wb.addRow();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeMap(String name, Map val, boolean excludeOuter, boolean isFirstVal) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeArray(String name, Iterator val) throws IOException {
|
||||||
|
StringBuffer output = new StringBuffer();
|
||||||
|
while (val.hasNext()) {
|
||||||
|
Object v = val.next();
|
||||||
|
if (v instanceof IndexableField) {
|
||||||
|
IndexableField f = (IndexableField)v;
|
||||||
|
if (v instanceof Date) {
|
||||||
|
output.append(((Date) val).toInstant().toString() + "; ");
|
||||||
|
} else {
|
||||||
|
output.append(f.stringValue() + "; ");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
output.append(v.toString() + "; ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (output.length() > 0) {
|
||||||
|
output.deleteCharAt(output.length()-1);
|
||||||
|
output.deleteCharAt(output.length()-1);
|
||||||
|
}
|
||||||
|
writeStr(name, output.toString(), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeNull(String name) throws IOException {
|
||||||
|
wb.writeCell("");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeInt(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeLong(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeBool(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeFloat(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeDouble(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeDate(String name, Date val) throws IOException {
|
||||||
|
writeDate(name, val.toInstant().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeDate(String name, String val) throws IOException {
|
||||||
|
wb.writeCell(val);
|
||||||
|
}
|
||||||
|
}
|
@ -415,6 +415,7 @@
|
|||||||
-->
|
-->
|
||||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
|
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
|
||||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||||
@ -422,6 +423,7 @@
|
|||||||
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
|
||||||
|
<dynamicField name="*_dt1" type="date" indexed="true" stored="true" multiValued="false"/>
|
||||||
|
|
||||||
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
||||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||||
|
@ -0,0 +1,257 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.handler.extraction;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.common.SolrDocument;
|
||||||
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.core.SolrCore;
|
||||||
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
|
import org.apache.solr.response.QueryResponseWriter;
|
||||||
|
import org.apache.solr.response.SolrQueryResponse;
|
||||||
|
import org.apache.solr.response.RawResponseWriter;
|
||||||
|
import org.apache.solr.search.SolrReturnFields;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestXLSXResponseWriter extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
private static XLSXResponseWriter writerXlsx;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void beforeClass() throws Exception {
|
||||||
|
System.setProperty("enable.update.log", "false");
|
||||||
|
initCore("solrconfig.xml","schema.xml",getFile("extraction/solr").getAbsolutePath());
|
||||||
|
createIndex();
|
||||||
|
//find a reference to the default response writer so we can redirect its output later
|
||||||
|
SolrCore testCore = h.getCore();
|
||||||
|
QueryResponseWriter writer = testCore.getQueryResponseWriter("xlsx");
|
||||||
|
if (writer instanceof XLSXResponseWriter) {
|
||||||
|
writerXlsx = (XLSXResponseWriter) testCore.getQueryResponseWriter("xlsx");
|
||||||
|
} else {
|
||||||
|
throw new Exception("XLSXResponseWriter not registered with solr core");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void createIndex() {
|
||||||
|
assertU(adoc("id","1", "foo_i","-1", "foo_s","hi", "foo_l","12345678987654321", "foo_b","false", "foo_f","1.414","foo_d","-1.0E300","foo_dt1","2000-01-02T03:04:05Z"));
|
||||||
|
assertU(adoc("id","2", "v_ss","hi", "v_ss","there", "v2_ss","nice", "v2_ss","output", "shouldbeunstored","foo"));
|
||||||
|
assertU(adoc("id","3", "shouldbeunstored","foo"));
|
||||||
|
assertU(adoc("id","4", "foo_s1","foo"));
|
||||||
|
assertU(commit());
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void cleanupWriter() throws Exception {
|
||||||
|
writerXlsx = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStructuredDataViaBaseWriters() throws IOException, Exception {
|
||||||
|
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||||
|
// Don't send a ContentStream back, this will fall back to the configured base writer.
|
||||||
|
// But abuse the CONTENT key to ensure writer is also checking type
|
||||||
|
rsp.add(RawResponseWriter.CONTENT, "test");
|
||||||
|
rsp.add("foo", "bar");
|
||||||
|
|
||||||
|
SolrQueryRequest r = req();
|
||||||
|
|
||||||
|
// check Content-Type
|
||||||
|
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", writerXlsx.getContentType(r, rsp));
|
||||||
|
|
||||||
|
// test our basic types,and that fields come back in the requested order
|
||||||
|
XSSFSheet resultSheet = getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1"));
|
||||||
|
|
||||||
|
assertEquals("id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1\n1,hi,-1,12345678987654321,F,1.414,-1.0E300,2000-01-02T03:04:05Z\n"
|
||||||
|
, getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "fl","id,score,foo_s"));
|
||||||
|
// test retrieving score
|
||||||
|
assertEquals("id,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req("q","id:1^0", "wt","xlsx", "colname.id", "I.D.", "colwidth.id", "10",
|
||||||
|
"fl","id,score,foo_s"));
|
||||||
|
// test override colname/width
|
||||||
|
assertEquals("I.D.,score,foo_s\n1,0.0,hi\n", getStringFromSheet(resultSheet));
|
||||||
|
// test colwidth (value returned is in 256ths of a character as per excel standard)
|
||||||
|
assertEquals(10*256, resultSheet.getColumnWidth(0));
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,v_ss"));
|
||||||
|
// test multivalued
|
||||||
|
assertEquals("id,v_ss\n2,hi; there\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// test retrieving fields from index
|
||||||
|
resultSheet = getWSResultForQuery(req("q","*:*", "wt","xslx", "fl","*,score"));
|
||||||
|
String result = getStringFromSheet(resultSheet);
|
||||||
|
for (String field : "id,foo_s,foo_i,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss,score".split(",")) {
|
||||||
|
assertTrue(result.indexOf(field) >= 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// test null values
|
||||||
|
resultSheet = getWSResultForQuery(req("q","id:2", "wt","xlsx", "fl","id,foo_s,v_ss"));
|
||||||
|
assertEquals("id,foo_s,v_ss\n2,,hi; there\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// now test SolrDocumentList
|
||||||
|
SolrDocument d = new SolrDocument();
|
||||||
|
SolrDocument d1 = d;
|
||||||
|
d.addField("id","1");
|
||||||
|
d.addField("foo_i",-1);
|
||||||
|
d.addField("foo_s","hi");
|
||||||
|
d.addField("foo_l","12345678987654321L");
|
||||||
|
d.addField("foo_b",false);
|
||||||
|
d.addField("foo_f",1.414f);
|
||||||
|
d.addField("foo_d",-1.0E300);
|
||||||
|
d.addField("foo_dt1", new Date(Instant.parse("2000-01-02T03:04:05Z").toEpochMilli()));
|
||||||
|
d.addField("score", "2.718");
|
||||||
|
|
||||||
|
d = new SolrDocument();
|
||||||
|
SolrDocument d2 = d;
|
||||||
|
d.addField("id","2");
|
||||||
|
d.addField("v_ss","hi");
|
||||||
|
d.addField("v_ss","there");
|
||||||
|
d.addField("v2_ss","nice");
|
||||||
|
d.addField("v2_ss","output");
|
||||||
|
d.addField("score", "89.83");
|
||||||
|
d.addField("shouldbeunstored","foo");
|
||||||
|
|
||||||
|
SolrDocumentList sdl = new SolrDocumentList();
|
||||||
|
sdl.add(d1);
|
||||||
|
sdl.add(d2);
|
||||||
|
|
||||||
|
SolrQueryRequest req = req("q","*:*");
|
||||||
|
rsp = new SolrQueryResponse();
|
||||||
|
rsp.addResponse(sdl);
|
||||||
|
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("id,foo_s", req) );
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("id,foo_s\n1,hi\n2,\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// try scores
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("id,score,foo_s", req) );
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("id,score,foo_s\n1,2.718,hi\n2,89.83,\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// get field values from docs... should be ordered and not include score unless requested
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("*", req) );
|
||||||
|
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1,v_ss,v2_ss\n" +
|
||||||
|
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z,,\n" +
|
||||||
|
"2,,,,,,,,hi; there,nice; output\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// get field values and scores - just check that the scores are there... we don't guarantee where
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("*,score", req) );
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
String s = getStringFromSheet(resultSheet);
|
||||||
|
assertTrue(s.indexOf("score") >=0 && s.indexOf("2.718") > 0 && s.indexOf("89.83") > 0 );
|
||||||
|
|
||||||
|
// Test field globs
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("id,foo*", req) );
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("id,foo_i,foo_s,foo_l,foo_b,foo_f,foo_d,foo_dt1\n" +
|
||||||
|
"1,-1,hi,12345678987654321L,false,1.414,-1.0E300,2000-01-02T03:04:05Z\n" +
|
||||||
|
"2,,,,,,,\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("id,*_d*", req) );
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("id,foo_d,foo_dt1\n" +
|
||||||
|
"1,-1.0E300,2000-01-02T03:04:05Z\n" +
|
||||||
|
"2,,\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// Test function queries
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("sum(1,1),id,exists(foo_s1),div(9,1),foo_f", req) );
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("sum(1,1),id,exists(foo_s1),div(9,1),foo_f\n" +
|
||||||
|
",1,,,1.414\n" +
|
||||||
|
",2,,,\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
// Test transformers
|
||||||
|
rsp.setReturnFields( new SolrReturnFields("mydocid:[docid],[explain]", req) );
|
||||||
|
resultSheet = getWSResultForQuery(req, rsp);
|
||||||
|
assertEquals("mydocid,[explain]\n" +
|
||||||
|
",\n" +
|
||||||
|
",\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
req.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPseudoFields() throws Exception {
|
||||||
|
// Use Pseudo Field
|
||||||
|
SolrQueryRequest req = req("q","id:1", "wt","xlsx", "fl","XXX:id,foo_s");
|
||||||
|
XSSFSheet resultSheet = getWSResultForQuery(req);
|
||||||
|
assertEquals("XXX,foo_s\n1,hi\n", getStringFromSheet(resultSheet));
|
||||||
|
|
||||||
|
String txt = getStringFromSheet(getWSResultForQuery(req("q","id:1", "wt","xlsx", "fl","XXX:id,YYY:[docid],FOO:foo_s")));
|
||||||
|
String[] lines = txt.split("\n");
|
||||||
|
assertEquals(2, lines.length);
|
||||||
|
assertEquals("XXX,YYY,FOO", lines[0] );
|
||||||
|
assertEquals("1,0,hi", lines[1] );
|
||||||
|
|
||||||
|
//assertions specific to multiple pseudofields functions like abs, div, exists, etc.. (SOLR-5423)
|
||||||
|
String funcText = getStringFromSheet(getWSResultForQuery(req("q","*", "wt","xlsx", "fl","XXX:id,YYY:exists(foo_s1)")));
|
||||||
|
String[] funcLines = funcText.split("\n");
|
||||||
|
assertEquals(5, funcLines.length);
|
||||||
|
assertEquals("XXX,YYY", funcLines[0] );
|
||||||
|
assertEquals("1,false", funcLines[1] );
|
||||||
|
assertEquals("3,false", funcLines[3] );
|
||||||
|
}
|
||||||
|
|
||||||
|
// returns first worksheet as XLSXResponseWriter only returns one sheet
|
||||||
|
private XSSFSheet getWSResultForQuery(SolrQueryRequest req) throws IOException, Exception {
|
||||||
|
SolrQueryResponse rsp = h.queryAndResponse("standard", req);
|
||||||
|
return getWSResultForQuery(req, rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private XSSFSheet getWSResultForQuery(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, Exception {
|
||||||
|
ByteArrayOutputStream xmlBout = new ByteArrayOutputStream();
|
||||||
|
writerXlsx.write(xmlBout, req, rsp);
|
||||||
|
XSSFWorkbook output = new XSSFWorkbook(new ByteArrayInputStream(xmlBout.toByteArray()));
|
||||||
|
XSSFSheet sheet = output.getSheetAt(0);
|
||||||
|
req.close();
|
||||||
|
output.close();
|
||||||
|
return sheet;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getStringFromSheet(XSSFSheet sheet) {
|
||||||
|
StringBuilder output = new StringBuilder();
|
||||||
|
for (Row row: sheet) {
|
||||||
|
for (Cell cell: row) {
|
||||||
|
output.append(cell.getStringCellValue());
|
||||||
|
output.append(",");
|
||||||
|
}
|
||||||
|
output.setLength(output.length() - 1);
|
||||||
|
output.append("\n");
|
||||||
|
}
|
||||||
|
return output.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -25,6 +25,7 @@ import java.nio.charset.StandardCharsets;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.NullWritable;
|
import org.apache.hadoop.io.NullWritable;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
@ -40,12 +41,11 @@ import org.apache.lucene.index.TieredMergePolicy;
|
|||||||
import org.apache.lucene.misc.IndexMergeTool;
|
import org.apache.lucene.misc.IndexMergeTool;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.solr.store.hdfs.HdfsDirectory;
|
import org.apache.solr.store.hdfs.HdfsDirectory;
|
||||||
|
import org.apache.solr.update.SolrIndexWriter;
|
||||||
import org.apache.solr.util.RTimer;
|
import org.apache.solr.util.RTimer;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* See {@link IndexMergeTool}.
|
* See {@link IndexMergeTool}.
|
||||||
*/
|
*/
|
||||||
@ -161,6 +161,10 @@ public class TreeMergeOutputFormat extends FileOutputFormat<Text, NullWritable>
|
|||||||
}
|
}
|
||||||
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime());
|
LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {}ms", maxSegments, timer.getTime());
|
||||||
|
|
||||||
|
// Set Solr's commit data so the created index is usable by SolrCloud. E.g. Currently SolrCloud relies on
|
||||||
|
// commitTimeMSec in the commit data to do replication.
|
||||||
|
SolrIndexWriter.setCommitData(writer);
|
||||||
|
|
||||||
timer = new RTimer();
|
timer = new RTimer();
|
||||||
LOG.info("Optimizing Solr: Closing index writer");
|
LOG.info("Optimizing Solr: Closing index writer");
|
||||||
writer.close();
|
writer.close();
|
||||||
|
@ -307,9 +307,24 @@ public class JettySolrRunner {
|
|||||||
/**
|
/**
|
||||||
* Start the Jetty server
|
* Start the Jetty server
|
||||||
*
|
*
|
||||||
|
* If the server has been started before, it will restart using the same port
|
||||||
|
*
|
||||||
* @throws Exception if an error occurs on startup
|
* @throws Exception if an error occurs on startup
|
||||||
*/
|
*/
|
||||||
public void start() throws Exception {
|
public void start() throws Exception {
|
||||||
|
start(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the Jetty server
|
||||||
|
*
|
||||||
|
* @param reusePort when true, will start up on the same port as used by any
|
||||||
|
* previous runs of this JettySolrRunner. If false, will use
|
||||||
|
* the port specified by the server's JettyConfig.
|
||||||
|
*
|
||||||
|
* @throws Exception if an error occurs on startup
|
||||||
|
*/
|
||||||
|
public void start(boolean reusePort) throws Exception {
|
||||||
// Do not let Jetty/Solr pollute the MDC for this thread
|
// Do not let Jetty/Solr pollute the MDC for this thread
|
||||||
Map<String, String> prevContext = MDC.getCopyOfContextMap();
|
Map<String, String> prevContext = MDC.getCopyOfContextMap();
|
||||||
MDC.clear();
|
MDC.clear();
|
||||||
@ -317,7 +332,8 @@ public class JettySolrRunner {
|
|||||||
// if started before, make a new server
|
// if started before, make a new server
|
||||||
if (startedBefore) {
|
if (startedBefore) {
|
||||||
waitOnSolr = false;
|
waitOnSolr = false;
|
||||||
init(lastPort);
|
int port = reusePort ? lastPort : this.config.port;
|
||||||
|
init(port);
|
||||||
} else {
|
} else {
|
||||||
startedBefore = true;
|
startedBefore = true;
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@ public class DeleteShardCmd implements Cmd {
|
|||||||
// TODO: Add check for range gaps on Slice deletion
|
// TODO: Add check for range gaps on Slice deletion
|
||||||
final Slice.State state = slice.getState();
|
final Slice.State state = slice.getState();
|
||||||
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY
|
if (!(slice.getRange() == null || state == Slice.State.INACTIVE || state == Slice.State.RECOVERY
|
||||||
|| state == Slice.State.CONSTRUCTION)) {
|
|| state == Slice.State.CONSTRUCTION) || state == Slice.State.RECOVERY_FAILED) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "The slice: " + slice.getName() + " is currently " + state
|
||||||
+ ". Only non-active (or custom-hashed) slices can be deleted.");
|
+ ". Only non-active (or custom-hashed) slices can be deleted.");
|
||||||
}
|
}
|
||||||
|
@ -288,9 +288,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||||||
String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
|
String coreName = leaderProps.getStr(ZkStateReader.CORE_NAME_PROP);
|
||||||
ActionThrottle lt;
|
ActionThrottle lt;
|
||||||
try (SolrCore core = cc.getCore(coreName)) {
|
try (SolrCore core = cc.getCore(coreName)) {
|
||||||
if (core == null) {
|
if (core == null ) {
|
||||||
|
if (cc.isShutDown()) {
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
throw new SolrException(ErrorCode.SERVER_ERROR, "SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
MDCLoggingContext.setCore(core);
|
MDCLoggingContext.setCore(core);
|
||||||
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
|
lt = core.getUpdateHandler().getSolrCoreState().getLeaderThrottle();
|
||||||
}
|
}
|
||||||
@ -325,9 +329,13 @@ final class ShardLeaderElectionContext extends ShardLeaderElectionContextBase {
|
|||||||
try (SolrCore core = cc.getCore(coreName)) {
|
try (SolrCore core = cc.getCore(coreName)) {
|
||||||
|
|
||||||
if (core == null) {
|
if (core == null) {
|
||||||
|
if (!zkController.getCoreContainer().isShutDown()) {
|
||||||
cancelElection();
|
cancelElection();
|
||||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||||
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
"SolrCore not found:" + coreName + " in " + cc.getCoreNames());
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// should I be leader?
|
// should I be leader?
|
||||||
|
@ -347,10 +347,12 @@ public class LeaderElector {
|
|||||||
// am I the next leader?
|
// am I the next leader?
|
||||||
checkIfIamLeader(context, true);
|
checkIfIamLeader(context, true);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
if (!zkClient.isClosed()) {
|
||||||
log.warn("", e);
|
log.warn("", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set up any ZooKeeper nodes needed for leader election.
|
* Set up any ZooKeeper nodes needed for leader election.
|
||||||
|
@ -176,8 +176,10 @@ public class OverseerTaskProcessor implements Runnable, Closeable {
|
|||||||
try {
|
try {
|
||||||
prioritizer.prioritizeOverseerNodes(myId);
|
prioritizer.prioritizeOverseerNodes(myId);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
if (!zkStateReader.getZkClient().isClosed()) {
|
||||||
log.error("Unable to prioritize overseer ", e);
|
log.error("Unable to prioritize overseer ", e);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Make maxThreads configurable.
|
// TODO: Make maxThreads configurable.
|
||||||
|
|
||||||
|
@ -582,7 +582,7 @@ public class RecoveryStrategy extends Thread implements Closeable {
|
|||||||
prepCmd.setCheckLive(true);
|
prepCmd.setCheckLive(true);
|
||||||
prepCmd.setOnlyIfLeader(true);
|
prepCmd.setOnlyIfLeader(true);
|
||||||
final Slice.State state = slice.getState();
|
final Slice.State state = slice.getState();
|
||||||
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY) {
|
if (state != Slice.State.CONSTRUCTION && state != Slice.State.RECOVERY && state != Slice.State.RECOVERY_FAILED) {
|
||||||
prepCmd.setOnlyIfLeaderActive(true);
|
prepCmd.setOnlyIfLeaderActive(true);
|
||||||
}
|
}
|
||||||
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
|
HttpUriRequestResponse mrr = client.httpUriRequest(prepCmd);
|
||||||
|
@ -47,6 +47,7 @@ import org.apache.solr.common.util.NamedList;
|
|||||||
import org.apache.solr.common.util.Utils;
|
import org.apache.solr.common.util.Utils;
|
||||||
import org.apache.solr.handler.component.ShardHandler;
|
import org.apache.solr.handler.component.ShardHandler;
|
||||||
import org.apache.solr.util.TestInjection;
|
import org.apache.solr.util.TestInjection;
|
||||||
|
import org.apache.zookeeper.data.Stat;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -126,6 +127,13 @@ public class SplitShardCmd implements Cmd {
|
|||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// let's record the ephemeralOwner of the parent leader node
|
||||||
|
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
|
||||||
|
if (leaderZnodeStat == null) {
|
||||||
|
// we just got to know the leader but its live node is gone already!
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
|
||||||
|
}
|
||||||
|
|
||||||
DocRouter.Range range = parentSlice.getRange();
|
DocRouter.Range range = parentSlice.getRange();
|
||||||
if (range == null) {
|
if (range == null) {
|
||||||
range = new PlainIdRouter().fullRange();
|
range = new PlainIdRouter().fullRange();
|
||||||
@ -253,6 +261,8 @@ public class SplitShardCmd implements Cmd {
|
|||||||
propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
|
propMap.put(ZkStateReader.SHARD_RANGE_PROP, subRange.toString());
|
||||||
propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
|
propMap.put(ZkStateReader.SHARD_STATE_PROP, Slice.State.CONSTRUCTION.toString());
|
||||||
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
|
propMap.put(ZkStateReader.SHARD_PARENT_PROP, parentSlice.getName());
|
||||||
|
propMap.put("shard_parent_node", parentShardLeader.getNodeName());
|
||||||
|
propMap.put("shard_parent_zk_session", leaderZnodeStat.getEphemeralOwner());
|
||||||
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||||
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
inQueue.offer(Utils.toJSON(new ZkNodeProps(propMap)));
|
||||||
|
|
||||||
@ -420,6 +430,32 @@ public class SplitShardCmd implements Cmd {
|
|||||||
|
|
||||||
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
|
assert TestInjection.injectSplitFailureBeforeReplicaCreation();
|
||||||
|
|
||||||
|
long ephemeralOwner = leaderZnodeStat.getEphemeralOwner();
|
||||||
|
// compare against the ephemeralOwner of the parent leader node
|
||||||
|
leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE + "/" + parentShardLeader.getNodeName(), null, true);
|
||||||
|
if (leaderZnodeStat == null || ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||||
|
// put sub-shards in recovery_failed state
|
||||||
|
DistributedQueue inQueue = Overseer.getStateUpdateQueue(zkStateReader.getZkClient());
|
||||||
|
Map<String, Object> propMap = new HashMap<>();
|
||||||
|
propMap.put(Overseer.QUEUE_OPERATION, OverseerAction.UPDATESHARDSTATE.toLower());
|
||||||
|
for (String subSlice : subSlices) {
|
||||||
|
propMap.put(subSlice, Slice.State.RECOVERY_FAILED.toString());
|
||||||
|
}
|
||||||
|
propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
|
||||||
|
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||||
|
inQueue.offer(Utils.toJSON(m));
|
||||||
|
|
||||||
|
if (leaderZnodeStat == null) {
|
||||||
|
// the leader is not live anymore, fail the split!
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "The shard leader node: " + parentShardLeader.getNodeName() + " is not live anymore!");
|
||||||
|
} else if (ephemeralOwner != leaderZnodeStat.getEphemeralOwner()) {
|
||||||
|
// there's a new leader, fail the split!
|
||||||
|
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||||
|
"The zk session id for the shard leader node: " + parentShardLeader.getNodeName() + " has changed from "
|
||||||
|
+ ephemeralOwner + " to " + leaderZnodeStat.getEphemeralOwner() + ". This can cause data loss so we must abort the split");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// we must set the slice state into recovery before actually creating the replica cores
|
// we must set the slice state into recovery before actually creating the replica cores
|
||||||
// this ensures that the logic inside Overseer to update sub-shard state to 'active'
|
// this ensures that the logic inside Overseer to update sub-shard state to 'active'
|
||||||
// always gets a chance to execute. See SOLR-7673
|
// always gets a chance to execute. See SOLR-7673
|
||||||
|
@ -59,11 +59,19 @@ public class CollectionMutator {
|
|||||||
String shardRange = message.getStr(ZkStateReader.SHARD_RANGE_PROP);
|
String shardRange = message.getStr(ZkStateReader.SHARD_RANGE_PROP);
|
||||||
String shardState = message.getStr(ZkStateReader.SHARD_STATE_PROP);
|
String shardState = message.getStr(ZkStateReader.SHARD_STATE_PROP);
|
||||||
String shardParent = message.getStr(ZkStateReader.SHARD_PARENT_PROP);
|
String shardParent = message.getStr(ZkStateReader.SHARD_PARENT_PROP);
|
||||||
|
String shardParentZkSession = message.getStr("shard_parent_zk_session");
|
||||||
|
String shardParentNode = message.getStr("shard_parent_node");
|
||||||
sliceProps.put(Slice.RANGE, shardRange);
|
sliceProps.put(Slice.RANGE, shardRange);
|
||||||
sliceProps.put(ZkStateReader.STATE_PROP, shardState);
|
sliceProps.put(ZkStateReader.STATE_PROP, shardState);
|
||||||
if (shardParent != null) {
|
if (shardParent != null) {
|
||||||
sliceProps.put(Slice.PARENT, shardParent);
|
sliceProps.put(Slice.PARENT, shardParent);
|
||||||
}
|
}
|
||||||
|
if (shardParentZkSession != null) {
|
||||||
|
sliceProps.put("shard_parent_zk_session", shardParentZkSession);
|
||||||
|
}
|
||||||
|
if (shardParentNode != null) {
|
||||||
|
sliceProps.put("shard_parent_node", shardParentNode);
|
||||||
|
}
|
||||||
collection = updateSlice(collectionName, collection, new Slice(shardId, replicas, sliceProps));
|
collection = updateSlice(collectionName, collection, new Slice(shardId, replicas, sliceProps));
|
||||||
return new ZkWriteCommand(collectionName, collection);
|
return new ZkWriteCommand(collectionName, collection);
|
||||||
} else {
|
} else {
|
||||||
|
@ -38,6 +38,7 @@ import org.apache.solr.common.cloud.Slice;
|
|||||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
import org.apache.solr.common.cloud.ZkStateReader;
|
||||||
import org.apache.solr.common.util.Utils;
|
import org.apache.solr.common.util.Utils;
|
||||||
|
import org.apache.zookeeper.data.Stat;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -403,9 +404,35 @@ public class ReplicaMutator {
|
|||||||
}
|
}
|
||||||
if (allActive) {
|
if (allActive) {
|
||||||
// hurray, all sub shard replicas are active
|
// hurray, all sub shard replicas are active
|
||||||
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE. Preparing to switch shard states.", sliceName);
|
log.info("Shard: {} - All replicas across all fellow sub-shards are now ACTIVE.", sliceName);
|
||||||
String parentSliceName = (String) sliceProps.remove(Slice.PARENT);
|
String parentSliceName = (String) sliceProps.remove(Slice.PARENT);
|
||||||
|
// now lets see if the parent leader is still the same or else there's a chance of data loss
|
||||||
|
// see SOLR-9438 for details
|
||||||
|
String shardParentZkSession = (String) sliceProps.remove("shard_parent_zk_session");
|
||||||
|
String shardParentNode = (String) sliceProps.remove("shard_parent_node");
|
||||||
|
boolean isLeaderSame = true;
|
||||||
|
if (shardParentNode != null && shardParentZkSession != null) {
|
||||||
|
log.info("Checking whether sub-shard leader node is still the same one at {} with ZK session id {}", shardParentNode, shardParentZkSession);
|
||||||
|
try {
|
||||||
|
Stat leaderZnodeStat = zkStateReader.getZkClient().exists(ZkStateReader.LIVE_NODES_ZKNODE
|
||||||
|
+ "/" + shardParentNode, null, true);
|
||||||
|
if (leaderZnodeStat == null) {
|
||||||
|
log.error("The shard leader node: {} is not live anymore!", shardParentNode);
|
||||||
|
isLeaderSame = false;
|
||||||
|
} else if (leaderZnodeStat.getEphemeralOwner() != Long.parseLong(shardParentZkSession)) {
|
||||||
|
log.error("The zk session id for shard leader node: {} has changed from {} to {}",
|
||||||
|
shardParentNode, shardParentZkSession, leaderZnodeStat.getEphemeralOwner());
|
||||||
|
isLeaderSame = false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Error occurred while checking if parent shard node is still live with the same zk session id. " +
|
||||||
|
"We cannot switch shard states at this time.", e);
|
||||||
|
return collection; // we aren't going to make any changes right now
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isLeaderSame) {
|
||||||
|
log.info("Sub-shard leader node is still the same one at {} with ZK session id {}. Preparing to switch shard states.", shardParentNode, shardParentZkSession);
|
||||||
Map<String, Object> propMap = new HashMap<>();
|
Map<String, Object> propMap = new HashMap<>();
|
||||||
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
||||||
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
|
propMap.put(parentSliceName, Slice.State.INACTIVE.toString());
|
||||||
@ -416,6 +443,18 @@ public class ReplicaMutator {
|
|||||||
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
||||||
ZkNodeProps m = new ZkNodeProps(propMap);
|
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||||
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
||||||
|
} else {
|
||||||
|
// we must mark the shard split as failed by switching sub-shards to recovery_failed state
|
||||||
|
Map<String, Object> propMap = new HashMap<>();
|
||||||
|
propMap.put(Overseer.QUEUE_OPERATION, "updateshardstate");
|
||||||
|
propMap.put(sliceName, Slice.State.RECOVERY_FAILED.toString());
|
||||||
|
for (Slice subShardSlice : subShardSlices) {
|
||||||
|
propMap.put(subShardSlice.getName(), Slice.State.RECOVERY_FAILED.toString());
|
||||||
|
}
|
||||||
|
propMap.put(ZkStateReader.COLLECTION_PROP, collection.getName());
|
||||||
|
ZkNodeProps m = new ZkNodeProps(propMap);
|
||||||
|
return new SliceMutator(zkStateReader).updateShardState(prevState, m).collection;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -164,9 +164,10 @@ public class SliceMutator {
|
|||||||
log.info("Update shard state " + key + " to " + message.getStr(key));
|
log.info("Update shard state " + key + " to " + message.getStr(key));
|
||||||
Map<String, Object> props = slice.shallowCopy();
|
Map<String, Object> props = slice.shallowCopy();
|
||||||
|
|
||||||
if (Slice.State.getState((String) props.get(ZkStateReader.STATE_PROP)) == Slice.State.RECOVERY
|
if (Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
|
||||||
&& Slice.State.getState(message.getStr(key)) == Slice.State.ACTIVE) {
|
|
||||||
props.remove(Slice.PARENT);
|
props.remove(Slice.PARENT);
|
||||||
|
props.remove("shard_parent_node");
|
||||||
|
props.remove("shard_parent_zk_session");
|
||||||
}
|
}
|
||||||
props.put(ZkStateReader.STATE_PROP, message.getStr(key));
|
props.put(ZkStateReader.STATE_PROP, message.getStr(key));
|
||||||
Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props);
|
Slice newSlice = new Slice(slice.getName(), slice.getReplicasCopy(), props);
|
||||||
|
@ -2215,6 +2215,12 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||||||
m.put("smile", new SmileResponseWriter());
|
m.put("smile", new SmileResponseWriter());
|
||||||
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
|
m.put(ReplicationHandler.FILE_STREAM, getFileStreamWriter());
|
||||||
DEFAULT_RESPONSE_WRITERS = Collections.unmodifiableMap(m);
|
DEFAULT_RESPONSE_WRITERS = Collections.unmodifiableMap(m);
|
||||||
|
try {
|
||||||
|
m.put("xlsx",
|
||||||
|
(QueryResponseWriter) Class.forName("org.apache.solr.handler.extraction.XLSXResponseWriter").newInstance());
|
||||||
|
} catch (Exception e) {
|
||||||
|
//don't worry; solrcell contrib not in class path
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BinaryResponseWriter getFileStreamWriter() {
|
private static BinaryResponseWriter getFileStreamWriter() {
|
||||||
@ -2237,7 +2243,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public interface RawWriter {
|
public interface RawWriter {
|
||||||
public void write(OutputStream os) throws IOException ;
|
void write(OutputStream os) throws IOException ;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Configure the query response writers. There will always be a default writer; additional
|
/** Configure the query response writers. There will always be a default writer; additional
|
||||||
|
@ -308,6 +308,9 @@ public class IndexFetcher {
|
|||||||
long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
|
long latestVersion = (Long) response.get(CMD_INDEX_VERSION);
|
||||||
long latestGeneration = (Long) response.get(GENERATION);
|
long latestGeneration = (Long) response.get(GENERATION);
|
||||||
|
|
||||||
|
LOG.info("Master's generation: " + latestGeneration);
|
||||||
|
LOG.info("Master's version: " + latestVersion);
|
||||||
|
|
||||||
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
// TODO: make sure that getLatestCommit only returns commit points for the main index (i.e. no side-car indexes)
|
||||||
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
|
IndexCommit commit = solrCore.getDeletionPolicy().getLatestCommit();
|
||||||
if (commit == null) {
|
if (commit == null) {
|
||||||
@ -326,6 +329,7 @@ public class IndexFetcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG.info("Slave's generation: " + commit.getGeneration());
|
||||||
|
|
||||||
if (latestVersion == 0L) {
|
if (latestVersion == 0L) {
|
||||||
if (forceReplication && commit.getGeneration() != 0) {
|
if (forceReplication && commit.getGeneration() != 0) {
|
||||||
@ -353,8 +357,6 @@ public class IndexFetcher {
|
|||||||
successfulInstall = true;
|
successfulInstall = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
LOG.info("Master's generation: " + latestGeneration);
|
|
||||||
LOG.info("Slave's generation: " + commit.getGeneration());
|
|
||||||
LOG.info("Starting replication process");
|
LOG.info("Starting replication process");
|
||||||
// get the list of files first
|
// get the list of files first
|
||||||
fetchFileList(latestGeneration);
|
fetchFileList(latestGeneration);
|
||||||
|
@ -1265,7 +1265,14 @@ public class FacetComponent extends SearchComponent {
|
|||||||
if (facetFs != null) {
|
if (facetFs != null) {
|
||||||
|
|
||||||
for (String field : facetFs) {
|
for (String field : facetFs) {
|
||||||
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
|
final DistribFieldFacet ff;
|
||||||
|
|
||||||
|
if (params.getFieldBool(field, FacetParams.FACET_EXISTS, false)) {
|
||||||
|
// cap facet count by 1 with this method
|
||||||
|
ff = new DistribFacetExistsField(rb, field);
|
||||||
|
} else {
|
||||||
|
ff = new DistribFieldFacet(rb, field);
|
||||||
|
}
|
||||||
facets.put(ff.getKey(), ff);
|
facets.put(ff.getKey(), ff);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1469,7 +1476,7 @@ public class FacetComponent extends SearchComponent {
|
|||||||
sfc.termNum = termNum++;
|
sfc.termNum = termNum++;
|
||||||
counts.put(name, sfc);
|
counts.put(name, sfc);
|
||||||
}
|
}
|
||||||
sfc.count += count;
|
incCount(sfc, count);
|
||||||
terms.set(sfc.termNum);
|
terms.set(sfc.termNum);
|
||||||
last = count;
|
last = count;
|
||||||
}
|
}
|
||||||
@ -1486,6 +1493,10 @@ public class FacetComponent extends SearchComponent {
|
|||||||
counted[shardNum] = terms;
|
counted[shardNum] = terms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void incCount(ShardFacetCount sfc, long count) {
|
||||||
|
sfc.count += count;
|
||||||
|
}
|
||||||
|
|
||||||
public ShardFacetCount[] getLexSorted() {
|
public ShardFacetCount[] getLexSorted() {
|
||||||
ShardFacetCount[] arr
|
ShardFacetCount[] arr
|
||||||
= counts.values().toArray(new ShardFacetCount[counts.size()]);
|
= counts.values().toArray(new ShardFacetCount[counts.size()]);
|
||||||
@ -1547,4 +1558,18 @@ public class FacetComponent extends SearchComponent {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static final class DistribFacetExistsField extends DistribFieldFacet {
|
||||||
|
private DistribFacetExistsField(ResponseBuilder rb, String facetStr) {
|
||||||
|
super(rb, facetStr);
|
||||||
|
SimpleFacets.checkMincountOnExists(field, minCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void incCount(ShardFacetCount sfc, long count) {
|
||||||
|
if (count>0) {
|
||||||
|
sfc.count = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -406,6 +406,7 @@ public class SimpleFacets {
|
|||||||
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
|
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
|
||||||
String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
|
String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
|
||||||
boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
|
boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
|
||||||
|
boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
|
||||||
|
|
||||||
NamedList<Integer> counts;
|
NamedList<Integer> counts;
|
||||||
SchemaField sf = searcher.getSchema().getField(field);
|
SchemaField sf = searcher.getSchema().getField(field);
|
||||||
@ -422,13 +423,15 @@ public class SimpleFacets {
|
|||||||
requestedMethod = FacetMethod.FC;
|
requestedMethod = FacetMethod.FC;
|
||||||
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
|
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
|
||||||
requestedMethod = FacetMethod.UIF;
|
requestedMethod = FacetMethod.UIF;
|
||||||
}else{
|
} else {
|
||||||
requestedMethod=null;
|
requestedMethod=null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
|
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
|
||||||
|
|
||||||
FacetMethod appliedFacetMethod = selectFacetMethod(sf, requestedMethod, mincount);
|
FacetMethod appliedFacetMethod = selectFacetMethod(field,
|
||||||
|
sf, requestedMethod, mincount,
|
||||||
|
exists);
|
||||||
|
|
||||||
RTimer timer = null;
|
RTimer timer = null;
|
||||||
if (fdebug != null) {
|
if (fdebug != null) {
|
||||||
@ -446,7 +449,8 @@ public class SimpleFacets {
|
|||||||
switch (appliedFacetMethod) {
|
switch (appliedFacetMethod) {
|
||||||
case ENUM:
|
case ENUM:
|
||||||
assert TrieField.getMainValuePrefix(ft) == null;
|
assert TrieField.getMainValuePrefix(ft) == null;
|
||||||
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase, params);
|
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase,
|
||||||
|
exists);
|
||||||
break;
|
break;
|
||||||
case FCS:
|
case FCS:
|
||||||
assert !multiToken;
|
assert !multiToken;
|
||||||
@ -538,6 +542,29 @@ public class SimpleFacets {
|
|||||||
return counts;
|
return counts;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param existsRequested facet.exists=true is passed for the given field
|
||||||
|
* */
|
||||||
|
static FacetMethod selectFacetMethod(String fieldName,
|
||||||
|
SchemaField field, FacetMethod method, Integer mincount,
|
||||||
|
boolean existsRequested) {
|
||||||
|
if (existsRequested) {
|
||||||
|
checkMincountOnExists(fieldName, mincount);
|
||||||
|
if (method == null) {
|
||||||
|
method = FacetMethod.ENUM;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final FacetMethod facetMethod = selectFacetMethod(field, method, mincount);
|
||||||
|
|
||||||
|
if (existsRequested && facetMethod!=FacetMethod.ENUM) {
|
||||||
|
throw new SolrException (ErrorCode.BAD_REQUEST,
|
||||||
|
FacetParams.FACET_EXISTS + "=true is requested, but "+
|
||||||
|
FacetParams.FACET_METHOD+"="+FacetParams.FACET_METHOD_enum+ " can't be used with "+fieldName
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return facetMethod;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method will force the appropriate facet method even if the user provided a different one as a request parameter
|
* This method will force the appropriate facet method even if the user provided a different one as a request parameter
|
||||||
*
|
*
|
||||||
@ -811,7 +838,8 @@ public class SimpleFacets {
|
|||||||
* @see FacetParams#FACET_ZEROS
|
* @see FacetParams#FACET_ZEROS
|
||||||
* @see FacetParams#FACET_MISSING
|
* @see FacetParams#FACET_MISSING
|
||||||
*/
|
*/
|
||||||
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase, SolrParams params)
|
public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing,
|
||||||
|
String sort, String prefix, String contains, boolean ignoreCase, boolean intersectsCheck)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
/* :TODO: potential optimization...
|
/* :TODO: potential optimization...
|
||||||
@ -901,7 +929,11 @@ public class SimpleFacets {
|
|||||||
deState.postingsEnum = postingsEnum;
|
deState.postingsEnum = postingsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (intersectsCheck) {
|
||||||
|
c = searcher.intersects(docs, deState) ? 1 : 0;
|
||||||
|
} else {
|
||||||
c = searcher.numDocs(docs, deState);
|
c = searcher.numDocs(docs, deState);
|
||||||
|
}
|
||||||
|
|
||||||
postingsEnum = deState.postingsEnum;
|
postingsEnum = deState.postingsEnum;
|
||||||
} else {
|
} else {
|
||||||
@ -916,19 +948,33 @@ public class SimpleFacets {
|
|||||||
if (postingsEnum instanceof MultiPostingsEnum) {
|
if (postingsEnum instanceof MultiPostingsEnum) {
|
||||||
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
|
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
|
||||||
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
|
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
|
||||||
|
|
||||||
|
SEGMENTS_LOOP:
|
||||||
for (int subindex = 0; subindex < numSubs; subindex++) {
|
for (int subindex = 0; subindex < numSubs; subindex++) {
|
||||||
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
|
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
|
||||||
if (sub.postingsEnum == null) continue;
|
if (sub.postingsEnum == null) continue;
|
||||||
int base = sub.slice.start;
|
int base = sub.slice.start;
|
||||||
int docid;
|
int docid;
|
||||||
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
if (fastForRandomSet.exists(docid + base)) c++;
|
if (fastForRandomSet.exists(docid + base)) {
|
||||||
|
c++;
|
||||||
|
if (intersectsCheck) {
|
||||||
|
assert c==1;
|
||||||
|
break SEGMENTS_LOOP;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int docid;
|
int docid;
|
||||||
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
if (fastForRandomSet.exists(docid)) c++;
|
if (fastForRandomSet.exists(docid)) {
|
||||||
|
c++;
|
||||||
|
if (intersectsCheck) {
|
||||||
|
assert c==1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -969,6 +1015,15 @@ public class SimpleFacets {
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void checkMincountOnExists(String fieldName, int mincount) {
|
||||||
|
if (mincount > 1) {
|
||||||
|
throw new SolrException (ErrorCode.BAD_REQUEST,
|
||||||
|
FacetParams.FACET_MINCOUNT + "="+mincount+" exceed 1 that's not supported with " +
|
||||||
|
FacetParams.FACET_EXISTS + "=true for " + fieldName
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A simple key=>val pair whose natural order is such that
|
* A simple key=>val pair whose natural order is such that
|
||||||
* <b>higher</b> vals come before lower vals.
|
* <b>higher</b> vals come before lower vals.
|
||||||
|
@ -19,7 +19,7 @@ import java.util.Map;
|
|||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
import org.apache.lucene.analysis.core.StopFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
@ -128,12 +128,14 @@ public class BoolField extends PrimitiveFieldType {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toExternal(IndexableField f) {
|
public String toExternal(IndexableField f) {
|
||||||
if (f.binaryValue() == null) {
|
if (null != f.binaryValue()) {
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
return indexedToReadable(f.binaryValue().utf8ToString());
|
return indexedToReadable(f.binaryValue().utf8ToString());
|
||||||
}
|
}
|
||||||
|
if (null != f.stringValue()) {
|
||||||
|
return indexedToReadable(f.stringValue());
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Boolean toObject(IndexableField f) {
|
public Boolean toObject(IndexableField f) {
|
||||||
|
@ -517,6 +517,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public boolean needsScores() { return true; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||||
this.contexts[context.ord] = context;
|
this.contexts[context.ord] = context;
|
||||||
@ -726,6 +728,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public boolean needsScores() { return true; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||||
this.contexts[context.ord] = context;
|
this.contexts[context.ord] = context;
|
||||||
@ -909,6 +913,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
|
||||||
|
|
||||||
public void setScorer(Scorer scorer) {
|
public void setScorer(Scorer scorer) {
|
||||||
this.collapseStrategy.setScorer(scorer);
|
this.collapseStrategy.setScorer(scorer);
|
||||||
}
|
}
|
||||||
@ -1069,6 +1075,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public boolean needsScores() { return needsScores || super.needsScores(); }
|
||||||
|
|
||||||
public void setScorer(Scorer scorer) {
|
public void setScorer(Scorer scorer) {
|
||||||
this.collapseStrategy.setScorer(scorer);
|
this.collapseStrategy.setScorer(scorer);
|
||||||
}
|
}
|
||||||
@ -1686,7 +1694,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
private float[] ordVals;
|
private float[] ordVals;
|
||||||
private Map rcontext;
|
private Map rcontext;
|
||||||
private final CollapseScore collapseScore = new CollapseScore();
|
private final CollapseScore collapseScore = new CollapseScore();
|
||||||
private final boolean cscore;
|
|
||||||
private float score;
|
private float score;
|
||||||
|
|
||||||
public OrdValueSourceStrategy(int maxDoc,
|
public OrdValueSourceStrategy(int maxDoc,
|
||||||
@ -1714,7 +1721,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
Arrays.fill(ordVals, Float.MAX_VALUE);
|
Arrays.fill(ordVals, Float.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||||
|
|
||||||
if(this.needsScores) {
|
if(this.needsScores) {
|
||||||
this.scores = new float[ords.length];
|
this.scores = new float[ords.length];
|
||||||
@ -1735,7 +1742,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
this.boostDocs.add(globalDoc);
|
this.boostDocs.add(globalDoc);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(needsScores || cscore) {
|
if (needsScores) {
|
||||||
this.score = scorer.score();
|
this.score = scorer.score();
|
||||||
this.collapseScore.score = score;
|
this.collapseScore.score = score;
|
||||||
}
|
}
|
||||||
@ -2208,7 +2215,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
private FunctionValues functionValues;
|
private FunctionValues functionValues;
|
||||||
private Map rcontext;
|
private Map rcontext;
|
||||||
private final CollapseScore collapseScore = new CollapseScore();
|
private final CollapseScore collapseScore = new CollapseScore();
|
||||||
private final boolean cscore;
|
|
||||||
private float score;
|
private float score;
|
||||||
private int index=-1;
|
private int index=-1;
|
||||||
|
|
||||||
@ -2240,7 +2246,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
comp = new MinFloatComp();
|
comp = new MinFloatComp();
|
||||||
}
|
}
|
||||||
|
|
||||||
this.cscore = collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
collapseScore.setupIfNeeded(groupHeadSelector, rcontext);
|
||||||
|
|
||||||
if(needsScores) {
|
if(needsScores) {
|
||||||
this.scores = new float[size];
|
this.scores = new float[size];
|
||||||
@ -2263,7 +2269,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(needsScores || cscore) {
|
if (needsScores) {
|
||||||
this.score = scorer.score();
|
this.score = scorer.score();
|
||||||
this.collapseScore.score = score;
|
this.collapseScore.score = score;
|
||||||
}
|
}
|
||||||
|
@ -2285,6 +2285,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||||||
return all.andNotSize(positiveA.union(positiveB));
|
return all.andNotSize(positiveA.union(positiveB));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @lucene.internal */
|
||||||
|
public boolean intersects(DocSet a, DocsEnumState deState) throws IOException {
|
||||||
|
return a.intersects(getDocSet(deState));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
|
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
|
||||||
*/
|
*/
|
||||||
|
@ -20,10 +20,8 @@ import java.io.IOException;
|
|||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.atomic.LongAdder;
|
import java.util.concurrent.atomic.LongAdder;
|
||||||
@ -47,7 +45,6 @@ import org.apache.solr.common.SolrException;
|
|||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.common.util.SuppressForbidden;
|
|
||||||
import org.apache.solr.core.SolrConfig.UpdateHandlerInfo;
|
import org.apache.solr.core.SolrConfig.UpdateHandlerInfo;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
@ -517,15 +514,6 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
|
|
||||||
" but currently suspiciously used for replication as well")
|
|
||||||
private void setCommitData(IndexWriter iw) {
|
|
||||||
final Map<String,String> commitData = new HashMap<>();
|
|
||||||
commitData.put(SolrIndexWriter.COMMIT_TIME_MSEC_KEY,
|
|
||||||
String.valueOf(System.currentTimeMillis()));
|
|
||||||
iw.setLiveCommitData(commitData.entrySet());
|
|
||||||
}
|
|
||||||
|
|
||||||
public void prepareCommit(CommitUpdateCommand cmd) throws IOException {
|
public void prepareCommit(CommitUpdateCommand cmd) throws IOException {
|
||||||
|
|
||||||
boolean error=true;
|
boolean error=true;
|
||||||
@ -534,7 +522,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||||||
log.info("start "+cmd);
|
log.info("start "+cmd);
|
||||||
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
RefCounted<IndexWriter> iw = solrCoreState.getIndexWriter(core);
|
||||||
try {
|
try {
|
||||||
setCommitData(iw.get());
|
SolrIndexWriter.setCommitData(iw.get());
|
||||||
iw.get().prepareCommit();
|
iw.get().prepareCommit();
|
||||||
} finally {
|
} finally {
|
||||||
iw.decref();
|
iw.decref();
|
||||||
@ -615,7 +603,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||||||
// SolrCore.verbose("writer.commit() start writer=",writer);
|
// SolrCore.verbose("writer.commit() start writer=",writer);
|
||||||
|
|
||||||
if (writer.hasUncommittedChanges()) {
|
if (writer.hasUncommittedChanges()) {
|
||||||
setCommitData(writer);
|
SolrIndexWriter.setCommitData(writer);
|
||||||
writer.commit();
|
writer.commit();
|
||||||
} else {
|
} else {
|
||||||
log.info("No uncommitted changes. Skipping IW.commit.");
|
log.info("No uncommitted changes. Skipping IW.commit.");
|
||||||
@ -800,7 +788,7 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
|
|||||||
}
|
}
|
||||||
|
|
||||||
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
|
// todo: refactor this shared code (or figure out why a real CommitUpdateCommand can't be used)
|
||||||
setCommitData(writer);
|
SolrIndexWriter.setCommitData(writer);
|
||||||
writer.commit();
|
writer.commit();
|
||||||
|
|
||||||
synchronized (solrCoreState.getUpdateLock()) {
|
synchronized (solrCoreState.getUpdateLock()) {
|
||||||
|
@ -22,12 +22,12 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.index.CodecReader;
|
import org.apache.lucene.index.CodecReader;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.FilterCodecReader;
|
import org.apache.lucene.index.FilterCodecReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.Fields;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.SlowCodecReaderWrapper;
|
import org.apache.lucene.index.SlowCodecReaderWrapper;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
@ -134,6 +134,11 @@ public class SolrIndexSplitter {
|
|||||||
CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
|
CodecReader subReader = SlowCodecReaderWrapper.wrap(leaves.get(segmentNumber).reader());
|
||||||
iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
|
iw.addIndexes(new LiveDocsReader(subReader, segmentDocSets.get(segmentNumber)[partitionNumber]));
|
||||||
}
|
}
|
||||||
|
// we commit explicitly instead of sending a CommitUpdateCommand through the processor chain
|
||||||
|
// because the sub-shard cores will just ignore such a commit because the update log is not
|
||||||
|
// in active state at this time.
|
||||||
|
SolrIndexWriter.setCommitData(iw);
|
||||||
|
iw.commit();
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (iwRef != null) {
|
if (iwRef != null) {
|
||||||
@ -151,8 +156,6 @@ public class SolrIndexSplitter {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
|
FixedBitSet[] split(LeafReaderContext readerContext) throws IOException {
|
||||||
LeafReader reader = readerContext.reader();
|
LeafReader reader = readerContext.reader();
|
||||||
FixedBitSet[] docSets = new FixedBitSet[numPieces];
|
FixedBitSet[] docSets = new FixedBitSet[numPieces];
|
||||||
|
@ -18,6 +18,8 @@ package org.apache.solr.update;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
import org.apache.lucene.codecs.Codec;
|
import org.apache.lucene.codecs.Codec;
|
||||||
@ -27,8 +29,9 @@ import org.apache.lucene.index.IndexWriterConfig;
|
|||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.InfoStream;
|
import org.apache.lucene.util.InfoStream;
|
||||||
import org.apache.solr.common.util.IOUtils;
|
import org.apache.solr.common.util.IOUtils;
|
||||||
import org.apache.solr.core.DirectoryFactory.DirContext;
|
import org.apache.solr.common.util.SuppressForbidden;
|
||||||
import org.apache.solr.core.DirectoryFactory;
|
import org.apache.solr.core.DirectoryFactory;
|
||||||
|
import org.apache.solr.core.DirectoryFactory.DirContext;
|
||||||
import org.apache.solr.core.SolrCore;
|
import org.apache.solr.core.SolrCore;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -87,6 +90,15 @@ public class SolrIndexWriter extends IndexWriter {
|
|||||||
numOpens.incrementAndGet();
|
numOpens.incrementAndGet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressForbidden(reason = "Need currentTimeMillis, commit time should be used only for debugging purposes, " +
|
||||||
|
" but currently suspiciously used for replication as well")
|
||||||
|
public static void setCommitData(IndexWriter iw) {
|
||||||
|
log.info("Calling setCommitData with IW:" + iw.toString());
|
||||||
|
final Map<String,String> commitData = new HashMap<>();
|
||||||
|
commitData.put(COMMIT_TIME_MSEC_KEY, String.valueOf(System.currentTimeMillis()));
|
||||||
|
iw.setLiveCommitData(commitData.entrySet());
|
||||||
|
}
|
||||||
|
|
||||||
private void setDirectoryFactory(DirectoryFactory factory) {
|
private void setDirectoryFactory(DirectoryFactory factory) {
|
||||||
this.directoryFactory = factory;
|
this.directoryFactory = factory;
|
||||||
}
|
}
|
||||||
|
@ -1169,12 +1169,12 @@ public class TestDistributedSearch extends BaseDistributedSearchTestCase {
|
|||||||
if (upShards.contains(s)) {
|
if (upShards.contains(s)) {
|
||||||
// this is no longer true if there was a query timeout on an up shard
|
// this is no longer true if there was a query timeout on an up shard
|
||||||
// assertTrue("Expected to find numFound in the up shard info",info.get("numFound") != null);
|
// assertTrue("Expected to find numFound in the up shard info",info.get("numFound") != null);
|
||||||
assertTrue("Expected to find shardAddress in the up shard info",info.get("shardAddress") != null);
|
assertTrue("Expected to find shardAddress in the up shard info: " + info.toString(), info.get("shardAddress") != null);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
assertEquals("Expected to find the "+SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY+" header set if a shard is down",
|
assertEquals("Expected to find the "+SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY+" header set if a shard is down",
|
||||||
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
|
Boolean.TRUE, rsp.getHeader().get(SolrQueryResponse.RESPONSE_HEADER_PARTIAL_RESULTS_KEY));
|
||||||
assertTrue("Expected to find error in the down shard info",info.get("error") != null);
|
assertTrue("Expected to find error in the down shard info: " + info.toString(), info.get("error") != null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,22 +16,39 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.solr;
|
package org.apache.solr;
|
||||||
|
|
||||||
import org.apache.lucene.util.TestUtil;
|
import java.io.IOException;
|
||||||
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ListIterator;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.request.SolrQueryRequest;
|
import org.apache.solr.request.SolrQueryRequest;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.noggit.JSONUtil;
|
||||||
|
import org.noggit.ObjectBuilder;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
@Slow
|
@Slow
|
||||||
public class TestRandomFaceting extends SolrTestCaseJ4 {
|
public class TestRandomFaceting extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
private static final Pattern trieFields = Pattern.compile(".*_t.");
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
|
||||||
public static final String FOO_STRING_FIELD = "foo_s1";
|
public static final String FOO_STRING_FIELD = "foo_s1";
|
||||||
@ -80,6 +97,21 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||||||
types.add(new FldType("missing_ss",new IRange(0,0), new SVal('a','b',1,1)));
|
types.add(new FldType("missing_ss",new IRange(0,0), new SVal('a','b',1,1)));
|
||||||
|
|
||||||
// TODO: doubles, multi-floats, ints with precisionStep>0, booleans
|
// TODO: doubles, multi-floats, ints with precisionStep>0, booleans
|
||||||
|
types.add(new FldType("small_tf",ZERO_ONE, new FVal(-4,5)));
|
||||||
|
assert trieFields.matcher("small_tf").matches();
|
||||||
|
assert !trieFields.matcher("small_f").matches();
|
||||||
|
|
||||||
|
types.add(new FldType("foo_ti",ZERO_ONE, new IRange(-2,indexSize)));
|
||||||
|
assert trieFields.matcher("foo_ti").matches();
|
||||||
|
assert !trieFields.matcher("foo_i").matches();
|
||||||
|
|
||||||
|
types.add(new FldType("bool_b",ZERO_ONE, new Vals(){
|
||||||
|
@Override
|
||||||
|
public Comparable get() {
|
||||||
|
return random().nextBoolean();
|
||||||
|
}
|
||||||
|
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
void addMoreDocs(int ndocs) throws Exception {
|
void addMoreDocs(int ndocs) throws Exception {
|
||||||
@ -144,8 +176,8 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc"});
|
List<String> multiValuedMethods = Arrays.asList(new String[]{"enum","fc", null});
|
||||||
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs"});
|
List<String> singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs", null});
|
||||||
|
|
||||||
|
|
||||||
void doFacetTests(FldType ftype) throws Exception {
|
void doFacetTests(FldType ftype) throws Exception {
|
||||||
@ -154,10 +186,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||||||
Random rand = random();
|
Random rand = random();
|
||||||
boolean validate = validateResponses;
|
boolean validate = validateResponses;
|
||||||
ModifiableSolrParams params = params("facet","true", "wt","json", "indent","true", "omitHeader","true");
|
ModifiableSolrParams params = params("facet","true", "wt","json", "indent","true", "omitHeader","true");
|
||||||
params.add("q","*:*", "rows","0"); // TODO: select subsets
|
params.add("q","*:*"); // TODO: select subsets
|
||||||
params.add("rows","0");
|
params.add("rows","0");
|
||||||
|
|
||||||
|
|
||||||
SchemaField sf = req.getSchema().getField(ftype.fname);
|
SchemaField sf = req.getSchema().getField(ftype.fname);
|
||||||
boolean multiValued = sf.getType().multiValuedFieldCache();
|
boolean multiValued = sf.getType().multiValuedFieldCache();
|
||||||
|
|
||||||
@ -198,6 +229,10 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||||||
params.add("facet.missing", "true");
|
params.add("facet.missing", "true");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
params.add("facet.enum.cache.minDf",""+ rand.nextInt(indexSize));
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: randomly add other facet params
|
// TODO: randomly add other facet params
|
||||||
String key = ftype.fname;
|
String key = ftype.fname;
|
||||||
String facet_field = ftype.fname;
|
String facet_field = ftype.fname;
|
||||||
@ -210,45 +245,207 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
|
|||||||
List<String> methods = multiValued ? multiValuedMethods : singleValuedMethods;
|
List<String> methods = multiValued ? multiValuedMethods : singleValuedMethods;
|
||||||
List<String> responses = new ArrayList<>(methods.size());
|
List<String> responses = new ArrayList<>(methods.size());
|
||||||
for (String method : methods) {
|
for (String method : methods) {
|
||||||
|
for (boolean exists : new boolean [] {false, true}) {
|
||||||
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
|
// params.add("facet.field", "{!key="+method+"}" + ftype.fname);
|
||||||
// TODO: allow method to be passed on local params?
|
// TODO: allow method to be passed on local params?
|
||||||
|
if (method!=null) {
|
||||||
params.set("facet.method", method);
|
params.set("facet.method", method);
|
||||||
|
} else {
|
||||||
|
params.remove("facet.method");
|
||||||
|
}
|
||||||
|
|
||||||
|
params.set("facet.exists", ""+exists);
|
||||||
|
if (!exists && rand.nextBoolean()) {
|
||||||
|
params.remove("facet.exists");
|
||||||
|
}
|
||||||
|
|
||||||
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
|
// if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
|
||||||
|
if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation
|
||||||
|
if (exists && params.getInt("facet.mincount", 0)>1) {
|
||||||
|
assertQEx("no mincount on facet.exists",
|
||||||
|
rand.nextBoolean() ? "facet.exists":"facet.mincount",
|
||||||
|
req(params), ErrorCode.BAD_REQUEST);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
|
||||||
|
final boolean notEnum = method != null && !method.equals("enum");
|
||||||
|
final boolean trieField = trieFields.matcher(ftype.fname).matches();
|
||||||
|
if ((notEnum || trieField) && exists) {
|
||||||
|
assertQEx("facet.exists only when enum or ommitted",
|
||||||
|
"facet.exists", req(params), ErrorCode.BAD_REQUEST);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
String strResponse = h.query(req(params));
|
String strResponse = h.query(req(params));
|
||||||
// Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
|
||||||
// System.out.println(strResponse);
|
|
||||||
|
|
||||||
responses.add(strResponse);
|
responses.add(strResponse);
|
||||||
|
|
||||||
|
if (responses.size()>1) {
|
||||||
|
validateResponse(responses.get(0), strResponse, params, method, methods);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
String strResponse = h.query(req(params));
|
String strResponse = h.query(req(params));
|
||||||
Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
Object realResponse = ObjectBuilder.fromJSON(strResponse);
|
||||||
**/
|
**/
|
||||||
|
} finally {
|
||||||
|
req.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private void validateResponse(String expected, String actual, ModifiableSolrParams params, String method,
|
||||||
|
List<String> methods) throws Exception {
|
||||||
|
if (params.getBool("facet.exists", false)) {
|
||||||
|
if (isSortByCount(params)) { // it's challenged with facet.sort=count
|
||||||
|
expected = getExpectationForSortByCount(params, methods);// that requires to recalculate expactation
|
||||||
|
} else { // facet.sort=index
|
||||||
|
expected = capFacetCountsTo1(expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (validate) {
|
String err = JSONTestUtil.match("/", actual, expected, 0.0);
|
||||||
for (int i=1; i<methods.size(); i++) {
|
|
||||||
String err = JSONTestUtil.match("/", responses.get(i), responses.get(0), 0.0);
|
|
||||||
if (err != null) {
|
if (err != null) {
|
||||||
log.error("ERROR: mismatch facet response: " + err +
|
log.error("ERROR: mismatch facet response: " + err +
|
||||||
"\n expected =" + responses.get(0) +
|
"\n expected =" + expected +
|
||||||
"\n response = " + responses.get(i) +
|
"\n response = " + actual +
|
||||||
"\n request = " + params
|
"\n request = " + params
|
||||||
);
|
);
|
||||||
fail(err);
|
fail(err);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** if facet.exists=true with facet.sort=counts,
|
||||||
|
* it should return all values with 1 hits ordered by label index
|
||||||
|
* then all vals with 0 , and then missing count with null label,
|
||||||
|
* in the implementation below they are called three stratas
|
||||||
|
* */
|
||||||
|
private String getExpectationForSortByCount( ModifiableSolrParams params, List<String> methods) throws Exception {
|
||||||
|
String indexSortedResponse = getIndexSortedAllFacetValues(params, methods);
|
||||||
|
|
||||||
|
return transformFacetFields(indexSortedResponse, e -> {
|
||||||
|
List<Object> facetSortedByIndex = (List<Object>) e.getValue();
|
||||||
|
Map<Integer,List<Object>> stratas = new HashMap<Integer,List<Object>>(){
|
||||||
|
@Override // poor man multimap, I won't do that anymore, I swear.
|
||||||
|
public List<Object> get(Object key) {
|
||||||
|
if (!containsKey(key)) {
|
||||||
|
put((Integer) key, new ArrayList<>());
|
||||||
|
}
|
||||||
|
return super.get(key);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (Iterator iterator = facetSortedByIndex.iterator(); iterator.hasNext();) {
|
||||||
|
Object label = (Object) iterator.next();
|
||||||
|
Long count = (Long) iterator.next();
|
||||||
|
final Integer strata;
|
||||||
|
if (label==null) { // missing (here "stratas" seems like overengineering )
|
||||||
|
strata = null;
|
||||||
|
}else {
|
||||||
|
if (count>0) {
|
||||||
|
count = 1L; // capping here
|
||||||
|
strata = 1; // non-zero count become zero
|
||||||
|
} else {
|
||||||
|
strata = 0; // zero-count
|
||||||
|
}
|
||||||
|
}
|
||||||
|
final List<Object> facet = stratas.get(strata);
|
||||||
|
facet.add(label);
|
||||||
|
facet.add(count);
|
||||||
|
}
|
||||||
|
List stratified =new ArrayList<>();
|
||||||
|
for(Integer s : new Integer[]{1, 0}) { // non-zero capped to one goes first, zeroes go then
|
||||||
|
stratified.addAll(stratas.get(s));
|
||||||
|
}// cropping them now
|
||||||
|
int offset=params.getInt("facet.offset", 0) * 2;
|
||||||
|
int end = offset + params.getInt("facet.limit", 100) * 2 ;
|
||||||
|
int fromIndex = offset > stratified.size() ? stratified.size() : offset;
|
||||||
|
stratified = stratified.subList(fromIndex,
|
||||||
|
end > stratified.size() ? stratified.size() : end);
|
||||||
|
|
||||||
|
if (params.getInt("facet.limit", 100)>0) { /// limit=0 omits even miss count
|
||||||
|
stratified.addAll(stratas.get(null));
|
||||||
|
}
|
||||||
|
facetSortedByIndex.clear();
|
||||||
|
facetSortedByIndex.addAll(stratified);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getIndexSortedAllFacetValues(ModifiableSolrParams in, List<String> methods) throws Exception {
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams(in);
|
||||||
|
params.set("facet.sort", "index");
|
||||||
|
String goodOldMethod = methods.get(random().nextInt( methods.size()));
|
||||||
|
params.set("facet.method", goodOldMethod);
|
||||||
|
params.set("facet.exists", "false");
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
params.remove("facet.exists");
|
||||||
|
}
|
||||||
|
params.set("facet.limit",-1);
|
||||||
|
params.set("facet.offset",0);
|
||||||
|
final String query;
|
||||||
|
SolrQueryRequest req = null;
|
||||||
|
try {
|
||||||
|
req = req(params);
|
||||||
|
query = h.query(req);
|
||||||
} finally {
|
} finally {
|
||||||
req.close();
|
req.close();
|
||||||
}
|
}
|
||||||
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean isSortByCount(ModifiableSolrParams in) {
|
||||||
|
boolean sortIsCount;
|
||||||
|
String sortParam = in.get("facet.sort");
|
||||||
|
sortIsCount = "count".equals(sortParam) || (sortParam==null && in.getInt("facet.limit",100)>0);
|
||||||
|
return sortIsCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* {
|
||||||
|
"response":{"numFound":6,"start":0,"docs":[]
|
||||||
|
},
|
||||||
|
"facet_counts":{
|
||||||
|
"facet_queries":{},
|
||||||
|
"facet_fields":{
|
||||||
|
"foo_i":[
|
||||||
|
"6",2,
|
||||||
|
"2",1,
|
||||||
|
"3",1]},
|
||||||
|
"facet_ranges":{},
|
||||||
|
"facet_intervals":{},
|
||||||
|
"facet_heatmaps":{}}}
|
||||||
|
* */
|
||||||
|
@SuppressWarnings({"rawtypes", "unchecked"})
|
||||||
|
private String capFacetCountsTo1(String expected) throws IOException {
|
||||||
|
return transformFacetFields(expected, e -> {
|
||||||
|
List<Object> facetValues = (List<Object>) e.getValue();
|
||||||
|
for (ListIterator iterator = facetValues.listIterator(); iterator.hasNext();) {
|
||||||
|
Object value = iterator.next();
|
||||||
|
Long count = (Long) iterator.next();
|
||||||
|
if (value!=null && count > 1) {
|
||||||
|
iterator.set(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private String transformFacetFields(String expected, Consumer<Map.Entry<Object,Object>> consumer) throws IOException {
|
||||||
|
Object json = ObjectBuilder.fromJSON(expected);
|
||||||
|
Map facet_fields = getFacetFieldMap(json);
|
||||||
|
Set entries = facet_fields.entrySet();
|
||||||
|
for (Object facetTuples : entries) { //despite there should be only one field
|
||||||
|
Entry entry = (Entry)facetTuples;
|
||||||
|
consumer.accept(entry);
|
||||||
|
}
|
||||||
|
return JSONUtil.toJSON(json);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map getFacetFieldMap(Object json) {
|
||||||
|
Object facet_counts = ((Map)json).get("facet_counts");
|
||||||
|
Map facet_fields = (Map) ((Map)facet_counts).get("facet_fields");
|
||||||
|
return facet_fields;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -25,20 +25,28 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.Slow;
|
import org.apache.lucene.util.LuceneTestCase.Slow;
|
||||||
import org.apache.solr.client.solrj.SolrClient;
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
|
import org.apache.solr.client.solrj.request.CoreAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.QueryRequest;
|
import org.apache.solr.client.solrj.request.QueryRequest;
|
||||||
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
|
||||||
|
import org.apache.solr.client.solrj.response.CoreAdminResponse;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.cloud.ClusterState;
|
import org.apache.solr.common.cloud.ClusterState;
|
||||||
|
import org.apache.solr.common.cloud.CollectionStateWatcher;
|
||||||
import org.apache.solr.common.cloud.CompositeIdRouter;
|
import org.apache.solr.common.cloud.CompositeIdRouter;
|
||||||
import org.apache.solr.common.cloud.DocCollection;
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
import org.apache.solr.common.cloud.DocRouter;
|
import org.apache.solr.common.cloud.DocRouter;
|
||||||
@ -56,6 +64,7 @@ import org.slf4j.Logger;
|
|||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
|
import static org.apache.solr.cloud.OverseerCollectionMessageHandler.NUM_SLICES;
|
||||||
|
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||||
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
import static org.apache.solr.common.cloud.ZkStateReader.MAX_SHARDS_PER_NODE;
|
||||||
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
import static org.apache.solr.common.cloud.ZkStateReader.REPLICATION_FACTOR;
|
||||||
|
|
||||||
@ -71,6 +80,12 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||||||
schemaString = "schema15.xml"; // we need a string id
|
schemaString = "schema15.xml"; // we need a string id
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void distribSetUp() throws Exception {
|
||||||
|
super.distribSetUp();
|
||||||
|
useFactory(null);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
|
|
||||||
@ -91,6 +106,146 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||||||
//waitForThingsToLevelOut(15);
|
//waitForThingsToLevelOut(15);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Creates a collection with replicationFactor=1, splits a shard. Restarts the sub-shard leader node.
|
||||||
|
Add a replica. Ensure count matches in leader and replica.
|
||||||
|
*/
|
||||||
|
public void testSplitStaticIndexReplication() throws Exception {
|
||||||
|
waitForThingsToLevelOut(15);
|
||||||
|
|
||||||
|
DocCollection defCol = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
Replica replica = defCol.getReplicas().get(0);
|
||||||
|
String nodeName = replica.getNodeName();
|
||||||
|
|
||||||
|
String collectionName = "testSplitStaticIndexReplication";
|
||||||
|
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, "conf1", 1, 1);
|
||||||
|
create.setMaxShardsPerNode(5); // some high number so we can create replicas without hindrance
|
||||||
|
create.setCreateNodeSet(nodeName); // we want to create the leader on a fixed node so that we know which one to restart later
|
||||||
|
create.process(cloudClient);
|
||||||
|
try (CloudSolrClient client = getCloudSolrClient(zkServer.getZkAddress(), true, cloudClient.getLbClient().getHttpClient())) {
|
||||||
|
client.setDefaultCollection(collectionName);
|
||||||
|
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, client, "i1", true);
|
||||||
|
try {
|
||||||
|
thread.start();
|
||||||
|
Thread.sleep(1000); // give the indexer sometime to do its work
|
||||||
|
thread.safeStop();
|
||||||
|
thread.join();
|
||||||
|
client.commit();
|
||||||
|
controlClient.commit();
|
||||||
|
|
||||||
|
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName);
|
||||||
|
splitShard.setShardName(SHARD1);
|
||||||
|
String asyncId = splitShard.processAsync(client);
|
||||||
|
RequestStatusState state = CollectionAdminRequest.requestStatus(asyncId).waitFor(client, 120);
|
||||||
|
if (state == RequestStatusState.COMPLETED) {
|
||||||
|
waitForRecoveriesToFinish(collectionName, true);
|
||||||
|
// let's wait to see parent shard become inactive
|
||||||
|
CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
|
||||||
|
@Override
|
||||||
|
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||||
|
Slice parent = collectionState.getSlice(SHARD1);
|
||||||
|
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||||
|
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||||
|
if (slice10 != null && slice11 != null &&
|
||||||
|
parent.getState() == Slice.State.INACTIVE &&
|
||||||
|
slice10.getState() == Slice.State.ACTIVE &&
|
||||||
|
slice11.getState() == Slice.State.ACTIVE) {
|
||||||
|
latch.countDown();
|
||||||
|
return true; // removes the watch
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
latch.await(1, TimeUnit.MINUTES);
|
||||||
|
if (latch.getCount() != 0) {
|
||||||
|
// sanity check
|
||||||
|
fail("Sub-shards did not become active even after waiting for 1 minute");
|
||||||
|
}
|
||||||
|
|
||||||
|
int liveNodeCount = client.getZkStateReader().getClusterState().getLiveNodes().size();
|
||||||
|
|
||||||
|
// restart the sub-shard leader node
|
||||||
|
boolean restarted = false;
|
||||||
|
for (JettySolrRunner jetty : jettys) {
|
||||||
|
int port = jetty.getBaseUrl().getPort();
|
||||||
|
if (replica.getStr(BASE_URL_PROP).contains(":" + port)) {
|
||||||
|
ChaosMonkey.kill(jetty);
|
||||||
|
ChaosMonkey.start(jetty);
|
||||||
|
restarted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!restarted) {
|
||||||
|
// sanity check
|
||||||
|
fail("We could not find a jetty to kill for replica: " + replica.getCoreUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
// add a new replica for the sub-shard
|
||||||
|
CollectionAdminRequest.AddReplica addReplica = CollectionAdminRequest.addReplicaToShard(collectionName, SHARD1_0);
|
||||||
|
// use control client because less chances of it being the node being restarted
|
||||||
|
// this is to avoid flakiness of test because of NoHttpResponseExceptions
|
||||||
|
String control_collection = client.getZkStateReader().getClusterState().getCollection("control_collection").getReplicas().get(0).getStr(BASE_URL_PROP);
|
||||||
|
try (HttpSolrClient control = new HttpSolrClient.Builder(control_collection).withHttpClient(client.getLbClient().getHttpClient()).build()) {
|
||||||
|
state = addReplica.processAndWait(control, 30);
|
||||||
|
}
|
||||||
|
if (state == RequestStatusState.COMPLETED) {
|
||||||
|
CountDownLatch newReplicaLatch = new CountDownLatch(1);
|
||||||
|
client.getZkStateReader().registerCollectionStateWatcher(collectionName, new CollectionStateWatcher() {
|
||||||
|
@Override
|
||||||
|
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||||
|
if (liveNodes.size() != liveNodeCount) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Slice slice = collectionState.getSlice(SHARD1_0);
|
||||||
|
if (slice.getReplicas().size() == 2) {
|
||||||
|
if (!slice.getReplicas().stream().anyMatch(r -> r.getState() == Replica.State.RECOVERING)) {
|
||||||
|
// we see replicas and none of them are recovering
|
||||||
|
newReplicaLatch.countDown();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
newReplicaLatch.await(30, TimeUnit.SECONDS);
|
||||||
|
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
|
||||||
|
// handle new shards/replica so well.
|
||||||
|
ClusterState clusterState = client.getZkStateReader().getClusterState();
|
||||||
|
DocCollection collection = clusterState.getCollection(collectionName);
|
||||||
|
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
|
||||||
|
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
|
||||||
|
} else {
|
||||||
|
fail("Adding a replica to sub-shard did not complete even after waiting for 30 seconds!. Saw state = " + state.getKey());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fail("We expected shard split to succeed on a static index but it didn't. Found state = " + state.getKey());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
thread.safeStop();
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private int assertConsistentReplicas(Slice shard) throws SolrServerException, IOException {
|
||||||
|
long numFound = Long.MIN_VALUE;
|
||||||
|
int count = 0;
|
||||||
|
for (Replica replica : shard.getReplicas()) {
|
||||||
|
HttpSolrClient client = new HttpSolrClient.Builder(replica.getCoreUrl())
|
||||||
|
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build();
|
||||||
|
QueryResponse response = client.query(new SolrQuery("q", "*:*", "distrib", "false"));
|
||||||
|
log.info("Found numFound={} on replica: {}", response.getResults().getNumFound(), replica.getCoreUrl());
|
||||||
|
if (numFound == Long.MIN_VALUE) {
|
||||||
|
numFound = response.getResults().getNumFound();
|
||||||
|
} else {
|
||||||
|
assertEquals("Shard " + shard.getName() + " replicas do not have same number of documents", numFound, response.getResults().getNumFound());
|
||||||
|
}
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used to test that we can split a shard when a previous split event
|
* Used to test that we can split a shard when a previous split event
|
||||||
* left sub-shards in construction or recovery state.
|
* left sub-shards in construction or recovery state.
|
||||||
@ -143,6 +298,218 @@ public class ShardSplitTest extends BasicDistributedZkTest {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSplitWithChaosMonkey() throws Exception {
|
||||||
|
waitForThingsToLevelOut(15);
|
||||||
|
|
||||||
|
List<StoppableIndexingThread> indexers = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < 1; i++) {
|
||||||
|
StoppableIndexingThread thread = new StoppableIndexingThread(controlClient, cloudClient, String.valueOf(i), true);
|
||||||
|
indexers.add(thread);
|
||||||
|
thread.start();
|
||||||
|
}
|
||||||
|
Thread.sleep(1000); // give the indexers some time to do their work
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Error in test", e);
|
||||||
|
} finally {
|
||||||
|
for (StoppableIndexingThread indexer : indexers) {
|
||||||
|
indexer.safeStop();
|
||||||
|
indexer.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cloudClient.commit();
|
||||||
|
controlClient.commit();
|
||||||
|
|
||||||
|
AtomicBoolean stop = new AtomicBoolean();
|
||||||
|
AtomicBoolean killed = new AtomicBoolean(false);
|
||||||
|
Runnable monkey = new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
ZkStateReader zkStateReader = cloudClient.getZkStateReader();
|
||||||
|
zkStateReader.registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
|
||||||
|
@Override
|
||||||
|
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||||
|
if (stop.get()) {
|
||||||
|
return true; // abort and remove the watch
|
||||||
|
}
|
||||||
|
Slice slice = collectionState.getSlice(SHARD1_0);
|
||||||
|
if (slice != null && slice.getReplicas().size() > 1) {
|
||||||
|
// ensure that only one watcher invocation thread can kill!
|
||||||
|
if (killed.compareAndSet(false, true)) {
|
||||||
|
log.info("Monkey thread found 2 replicas for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
|
||||||
|
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000 + random().nextInt(500));
|
||||||
|
ChaosMonkey.kill(cjetty);
|
||||||
|
stop.set(true);
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Monkey unable to kill jetty at port " + cjetty.jetty.getLocalPort(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.info("Monkey thread found only one replica for {} {}", AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Thread monkeyThread = null;
|
||||||
|
/*
|
||||||
|
somehow the cluster state object inside this zk state reader has static copy of the collection which is never updated
|
||||||
|
so any call to waitForRecoveriesToFinish just keeps looping until timeout.
|
||||||
|
We workaround by explicitly registering the collection as an interesting one so that it is watched by ZkStateReader
|
||||||
|
see SOLR-9440. Todo remove this hack after SOLR-9440 is fixed.
|
||||||
|
*/
|
||||||
|
cloudClient.getZkStateReader().registerCore(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
|
||||||
|
monkeyThread = new Thread(monkey);
|
||||||
|
monkeyThread.start();
|
||||||
|
try {
|
||||||
|
CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
splitShard.setShardName(SHARD1);
|
||||||
|
String asyncId = splitShard.processAsync(cloudClient);
|
||||||
|
RequestStatusState splitStatus = null;
|
||||||
|
try {
|
||||||
|
splitStatus = CollectionAdminRequest.requestStatus(asyncId).waitFor(cloudClient, 120);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Failed to get request status, maybe because the overseer node was shutdown by monkey", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't care if the split failed because we are injecting faults and it is likely
|
||||||
|
// that the split has failed but in any case we want to assert that all docs that got
|
||||||
|
// indexed are available in SolrCloud and if the split succeeded then all replicas of the sub-shard
|
||||||
|
// must be consistent (i.e. have same numdocs)
|
||||||
|
|
||||||
|
log.info("Shard split request state is COMPLETED");
|
||||||
|
stop.set(true);
|
||||||
|
monkeyThread.join();
|
||||||
|
Set<String> addFails = new HashSet<>();
|
||||||
|
Set<String> deleteFails = new HashSet<>();
|
||||||
|
for (StoppableIndexingThread indexer : indexers) {
|
||||||
|
addFails.addAll(indexer.getAddFails());
|
||||||
|
deleteFails.addAll(indexer.getDeleteFails());
|
||||||
|
}
|
||||||
|
|
||||||
|
CloudJettyRunner cjetty = shardToLeaderJetty.get(SHARD1);
|
||||||
|
log.info("Starting shard1 leader jetty at port {}", cjetty.jetty.getLocalPort());
|
||||||
|
ChaosMonkey.start(cjetty.jetty);
|
||||||
|
cloudClient.getZkStateReader().forceUpdateCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
log.info("Current collection state: {}", printClusterStateInfo(AbstractDistribZkTestBase.DEFAULT_COLLECTION));
|
||||||
|
|
||||||
|
boolean replicaCreationsFailed = false;
|
||||||
|
if (splitStatus == RequestStatusState.FAILED) {
|
||||||
|
// either one or more replica creation failed (because it may have been created on the same parent shard leader node)
|
||||||
|
// or the split may have failed while trying to soft-commit *after* all replicas have been created
|
||||||
|
// the latter counts as a successful switch even if the API doesn't say so
|
||||||
|
// so we must find a way to distinguish between the two
|
||||||
|
// an easy way to do that is to look at the sub-shard replicas and check if the replica core actually exists
|
||||||
|
// instead of existing solely inside the cluster state
|
||||||
|
DocCollection collectionState = cloudClient.getZkStateReader().getClusterState().getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||||
|
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||||
|
if (slice10 != null && slice11 != null) {
|
||||||
|
for (Replica replica : slice10) {
|
||||||
|
if (!doesReplicaCoreExist(replica)) {
|
||||||
|
replicaCreationsFailed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (Replica replica : slice11) {
|
||||||
|
if (!doesReplicaCoreExist(replica)) {
|
||||||
|
replicaCreationsFailed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// true if sub-shard states switch to 'active' eventually
|
||||||
|
AtomicBoolean areSubShardsActive = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
if (!replicaCreationsFailed) {
|
||||||
|
// all sub-shard replicas were created successfully so all cores must recover eventually
|
||||||
|
waitForRecoveriesToFinish(AbstractDistribZkTestBase.DEFAULT_COLLECTION, true);
|
||||||
|
// let's wait for the overseer to switch shard states
|
||||||
|
CountDownLatch latch = new CountDownLatch(1);
|
||||||
|
cloudClient.getZkStateReader().registerCollectionStateWatcher(AbstractDistribZkTestBase.DEFAULT_COLLECTION, new CollectionStateWatcher() {
|
||||||
|
@Override
|
||||||
|
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
|
||||||
|
Slice parent = collectionState.getSlice(SHARD1);
|
||||||
|
Slice slice10 = collectionState.getSlice(SHARD1_0);
|
||||||
|
Slice slice11 = collectionState.getSlice(SHARD1_1);
|
||||||
|
if (slice10 != null && slice11 != null &&
|
||||||
|
parent.getState() == Slice.State.INACTIVE &&
|
||||||
|
slice10.getState() == Slice.State.ACTIVE &&
|
||||||
|
slice11.getState() == Slice.State.ACTIVE) {
|
||||||
|
areSubShardsActive.set(true);
|
||||||
|
latch.countDown();
|
||||||
|
return true; // removes the watch
|
||||||
|
} else if (slice10 != null && slice11 != null &&
|
||||||
|
parent.getState() == Slice.State.ACTIVE &&
|
||||||
|
slice10.getState() == Slice.State.RECOVERY_FAILED &&
|
||||||
|
slice11.getState() == Slice.State.RECOVERY_FAILED) {
|
||||||
|
areSubShardsActive.set(false);
|
||||||
|
latch.countDown();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
latch.await(2, TimeUnit.MINUTES);
|
||||||
|
|
||||||
|
if (latch.getCount() != 0) {
|
||||||
|
// sanity check
|
||||||
|
fail("We think that split was successful but sub-shard states were not updated even after 2 minutes.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cloudClient.commit(); // for visibility of results on sub-shards
|
||||||
|
|
||||||
|
checkShardConsistency(true, true, addFails, deleteFails);
|
||||||
|
long ctrlDocs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||||
|
// ensure we have added more than 0 docs
|
||||||
|
long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
|
||||||
|
assertTrue("Found " + ctrlDocs + " control docs", cloudClientDocs > 0);
|
||||||
|
assertEquals("Found " + ctrlDocs + " control docs and " + cloudClientDocs + " cloud docs", ctrlDocs, cloudClientDocs);
|
||||||
|
|
||||||
|
// check consistency of sub-shard replica explicitly because checkShardConsistency methods doesn't
|
||||||
|
// handle new shards/replica so well.
|
||||||
|
if (areSubShardsActive.get()) {
|
||||||
|
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
|
||||||
|
DocCollection collection = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION);
|
||||||
|
int numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_0));
|
||||||
|
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_0", 2, numReplicasChecked);
|
||||||
|
numReplicasChecked = assertConsistentReplicas(collection.getSlice(SHARD1_1));
|
||||||
|
assertEquals("We should have checked consistency for exactly 2 replicas of shard1_1", 2, numReplicasChecked);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
stop.set(true);
|
||||||
|
monkeyThread.join();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean doesReplicaCoreExist(Replica replica) throws IOException {
|
||||||
|
try (HttpSolrClient client = new HttpSolrClient.Builder(replica.getStr(BASE_URL_PROP))
|
||||||
|
.withHttpClient(cloudClient.getLbClient().getHttpClient()).build()) {
|
||||||
|
String coreName = replica.getCoreName();
|
||||||
|
try {
|
||||||
|
CoreAdminResponse status = CoreAdminRequest.getStatus(coreName, client);
|
||||||
|
if (status.getCoreStatus(coreName) == null || status.getCoreStatus(coreName).size() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Error gettting core status of replica " + replica + ". Perhaps it does not exist!", e);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSplitShardWithRule() throws Exception {
|
public void testSplitShardWithRule() throws Exception {
|
||||||
waitForThingsToLevelOut(15);
|
waitForThingsToLevelOut(15);
|
||||||
|
@ -17,7 +17,6 @@
|
|||||||
package org.apache.solr.cloud;
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -27,10 +26,8 @@ import java.util.HashMap;
|
|||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||||
|
|
||||||
import org.apache.lucene.index.TieredMergePolicy;
|
import org.apache.lucene.index.TieredMergePolicy;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
import org.apache.lucene.util.LuceneTestCase.SuppressSysoutChecks;
|
||||||
@ -235,70 +232,6 @@ public class TestMiniSolrCloudCluster extends LuceneTestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testErrorsInStartup() throws Exception {
|
|
||||||
|
|
||||||
AtomicInteger jettyIndex = new AtomicInteger();
|
|
||||||
|
|
||||||
MiniSolrCloudCluster cluster = null;
|
|
||||||
try {
|
|
||||||
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
|
||||||
@Override
|
|
||||||
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
|
|
||||||
if (jettyIndex.incrementAndGet() != 2)
|
|
||||||
return super.startJettySolrRunner(name, context, config);
|
|
||||||
throw new IOException("Fake exception on startup!");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
|
|
||||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
|
||||||
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
if (cluster != null)
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testErrorsInShutdown() throws Exception {
|
|
||||||
|
|
||||||
AtomicInteger jettyIndex = new AtomicInteger();
|
|
||||||
|
|
||||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
|
||||||
@Override
|
|
||||||
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
|
||||||
JettySolrRunner j = super.stopJettySolrRunner(jetty);
|
|
||||||
if (jettyIndex.incrementAndGet() == 2)
|
|
||||||
throw new IOException("Fake IOException on shutdown!");
|
|
||||||
return j;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
try {
|
|
||||||
cluster.shutdown();
|
|
||||||
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
|
|
||||||
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
|
||||||
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testExtraFilters() throws Exception {
|
|
||||||
Builder jettyConfig = JettyConfig.builder();
|
|
||||||
jettyConfig.waitForLoadingCoresToFinish(null);
|
|
||||||
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
|
|
||||||
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
|
|
||||||
cluster.shutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCollectionCreateWithoutCoresThenDelete() throws Exception {
|
public void testCollectionCreateWithoutCoresThenDelete() throws Exception {
|
||||||
|
|
||||||
|
@ -1,207 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.solr.cloud;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.lang.invoke.MethodHandles;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
|
||||||
import org.apache.lucene.index.TieredMergePolicy;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
|
||||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
|
||||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
|
||||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
|
||||||
import org.apache.solr.common.cloud.ClusterState;
|
|
||||||
import org.apache.solr.common.cloud.Replica;
|
|
||||||
import org.apache.solr.common.cloud.Slice;
|
|
||||||
import org.apache.solr.common.cloud.SolrZkClient;
|
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
|
||||||
import org.apache.solr.core.CoreDescriptor;
|
|
||||||
import org.apache.solr.index.TieredMergePolicyFactory;
|
|
||||||
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
|
|
||||||
import org.junit.ClassRule;
|
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.RuleChain;
|
|
||||||
import org.junit.rules.TestRule;
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
|
|
||||||
public class TestMiniSolrCloudClusterBase extends LuceneTestCase {
|
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
|
||||||
protected int NUM_SERVERS = 5;
|
|
||||||
protected int NUM_SHARDS = 2;
|
|
||||||
protected int REPLICATION_FACTOR = 2;
|
|
||||||
|
|
||||||
public TestMiniSolrCloudClusterBase () {
|
|
||||||
NUM_SERVERS = 5;
|
|
||||||
NUM_SHARDS = 2;
|
|
||||||
REPLICATION_FACTOR = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Rule
|
|
||||||
public TestRule solrTestRules = RuleChain
|
|
||||||
.outerRule(new SystemPropertiesRestoreRule());
|
|
||||||
|
|
||||||
@ClassRule
|
|
||||||
public static TestRule solrClassRules = RuleChain.outerRule(
|
|
||||||
new SystemPropertiesRestoreRule()).around(
|
|
||||||
new RevertDefaultThreadHandlerRule());
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testBasics() throws Exception {
|
|
||||||
final String collectionName = "testSolrCloudCollection";
|
|
||||||
testCollectionCreateSearchDelete(collectionName);
|
|
||||||
}
|
|
||||||
|
|
||||||
private MiniSolrCloudCluster createMiniSolrCloudCluster() throws Exception {
|
|
||||||
JettyConfig.Builder jettyConfig = JettyConfig.builder();
|
|
||||||
jettyConfig.waitForLoadingCoresToFinish(null);
|
|
||||||
return new MiniSolrCloudCluster(NUM_SERVERS, createTempDir(), jettyConfig.build());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId) throws Exception {
|
|
||||||
String configName = "solrCloudCollectionConfig";
|
|
||||||
File configDir = new File(SolrTestCaseJ4.TEST_HOME() + File.separator + "collection1" + File.separator + "conf");
|
|
||||||
miniCluster.uploadConfigDir(configDir, configName);
|
|
||||||
|
|
||||||
Map<String, String> collectionProperties = new HashMap<>();
|
|
||||||
collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
|
|
||||||
collectionProperties.put("solr.tests.maxBufferedDocs", "100000");
|
|
||||||
collectionProperties.put("solr.tests.ramBufferSizeMB", "100");
|
|
||||||
// use non-test classes so RandomizedRunner isn't necessary
|
|
||||||
if (random().nextBoolean()) {
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
|
|
||||||
} else {
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
|
|
||||||
collectionProperties.put(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
|
|
||||||
}
|
|
||||||
collectionProperties.put("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
|
|
||||||
collectionProperties.put("solr.directoryFactory", "solr.RAMDirectoryFactory");
|
|
||||||
|
|
||||||
miniCluster.createCollection(collectionName, NUM_SHARDS, REPLICATION_FACTOR, configName, createNodeSet, asyncId, collectionProperties);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void testCollectionCreateSearchDelete(String collectionName) throws Exception {
|
|
||||||
|
|
||||||
MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
|
|
||||||
|
|
||||||
final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
|
|
||||||
|
|
||||||
try {
|
|
||||||
assertNotNull(miniCluster.getZkServer());
|
|
||||||
List<JettySolrRunner> jettys = miniCluster.getJettySolrRunners();
|
|
||||||
assertEquals(NUM_SERVERS, jettys.size());
|
|
||||||
for (JettySolrRunner jetty : jettys) {
|
|
||||||
assertTrue(jetty.isRunning());
|
|
||||||
}
|
|
||||||
|
|
||||||
// shut down a server
|
|
||||||
JettySolrRunner stoppedServer = miniCluster.stopJettySolrRunner(0);
|
|
||||||
assertTrue(stoppedServer.isStopped());
|
|
||||||
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
|
|
||||||
|
|
||||||
// create a server
|
|
||||||
JettySolrRunner startedServer = miniCluster.startJettySolrRunner();
|
|
||||||
assertTrue(startedServer.isRunning());
|
|
||||||
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
|
|
||||||
|
|
||||||
// create collection
|
|
||||||
final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
|
|
||||||
createCollection(miniCluster, collectionName, null, asyncId);
|
|
||||||
if (asyncId != null) {
|
|
||||||
final RequestStatusState state = AbstractFullDistribZkTestBase.getRequestStateAfterCompletion(asyncId, 330,
|
|
||||||
cloudSolrClient);
|
|
||||||
assertSame("did not see async createCollection completion", RequestStatusState.COMPLETED, state);
|
|
||||||
}
|
|
||||||
|
|
||||||
try (SolrZkClient zkClient = new SolrZkClient
|
|
||||||
(miniCluster.getZkServer().getZkAddress(), AbstractZkTestCase.TIMEOUT, AbstractZkTestCase.TIMEOUT, null);
|
|
||||||
ZkStateReader zkStateReader = new ZkStateReader(zkClient)) {
|
|
||||||
zkStateReader.createClusterStateWatchersAndUpdate();
|
|
||||||
AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
|
|
||||||
|
|
||||||
// modify/query collection
|
|
||||||
cloudSolrClient.setDefaultCollection(collectionName);
|
|
||||||
SolrInputDocument doc = new SolrInputDocument();
|
|
||||||
doc.setField("id", "1");
|
|
||||||
cloudSolrClient.add(doc);
|
|
||||||
cloudSolrClient.commit();
|
|
||||||
SolrQuery query = new SolrQuery();
|
|
||||||
query.setQuery("*:*");
|
|
||||||
QueryResponse rsp = cloudSolrClient.query(query);
|
|
||||||
assertEquals(1, rsp.getResults().getNumFound());
|
|
||||||
|
|
||||||
// remove a server not hosting any replicas
|
|
||||||
zkStateReader.forceUpdateCollection(collectionName);
|
|
||||||
ClusterState clusterState = zkStateReader.getClusterState();
|
|
||||||
HashMap<String, JettySolrRunner> jettyMap = new HashMap<String, JettySolrRunner>();
|
|
||||||
for (JettySolrRunner jetty : miniCluster.getJettySolrRunners()) {
|
|
||||||
String key = jetty.getBaseUrl().toString().substring((jetty.getBaseUrl().getProtocol() + "://").length());
|
|
||||||
jettyMap.put(key, jetty);
|
|
||||||
}
|
|
||||||
Collection<Slice> slices = clusterState.getSlices(collectionName);
|
|
||||||
// track the servers not host repliacs
|
|
||||||
for (Slice slice : slices) {
|
|
||||||
jettyMap.remove(slice.getLeader().getNodeName().replace("_solr", "/solr"));
|
|
||||||
for (Replica replica : slice.getReplicas()) {
|
|
||||||
jettyMap.remove(replica.getNodeName().replace("_solr", "/solr"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assertTrue("Expected to find a node without a replica", jettyMap.size() > 0);
|
|
||||||
JettySolrRunner jettyToStop = jettyMap.entrySet().iterator().next().getValue();
|
|
||||||
jettys = miniCluster.getJettySolrRunners();
|
|
||||||
for (int i = 0; i < jettys.size(); ++i) {
|
|
||||||
if (jettys.get(i).equals(jettyToStop)) {
|
|
||||||
miniCluster.stopJettySolrRunner(i);
|
|
||||||
assertEquals(NUM_SERVERS - 1, miniCluster.getJettySolrRunners().size());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now restore the original state so that this function could be called multiple times
|
|
||||||
|
|
||||||
// re-create a server (to restore original NUM_SERVERS count)
|
|
||||||
startedServer = miniCluster.startJettySolrRunner();
|
|
||||||
assertTrue(startedServer.isRunning());
|
|
||||||
assertEquals(NUM_SERVERS, miniCluster.getJettySolrRunners().size());
|
|
||||||
|
|
||||||
doExtraTests(miniCluster, zkClient, zkStateReader,cloudSolrClient, collectionName);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
miniCluster.shutdown();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader, CloudSolrClient cloudSolrClient,
|
|
||||||
String defaultCollName) throws Exception { /*do nothing*/ }
|
|
||||||
|
|
||||||
}
|
|
@ -128,20 +128,6 @@ public class TestMiniSolrCloudClusterKerberos extends TestMiniSolrCloudCluster {
|
|||||||
super.testCollectionCreateSearchDelete();
|
super.testCollectionCreateSearchDelete();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
|
|
||||||
@Test
|
|
||||||
@Override
|
|
||||||
public void testErrorsInShutdown() throws Exception {
|
|
||||||
super.testErrorsInShutdown();
|
|
||||||
}
|
|
||||||
|
|
||||||
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/HADOOP-9893")
|
|
||||||
@Test
|
|
||||||
@Override
|
|
||||||
public void testErrorsInStartup() throws Exception {
|
|
||||||
super.testErrorsInStartup();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
System.clearProperty("java.security.auth.login.config");
|
System.clearProperty("java.security.auth.login.config");
|
||||||
|
@ -25,6 +25,7 @@ import java.util.TreeMap;
|
|||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.lucene.util.Constants;
|
||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.client.solrj.SolrClient;
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
@ -57,9 +58,10 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||||||
private static SolrClient solrClient;
|
private static SolrClient solrClient;
|
||||||
|
|
||||||
private static String getUsersFirstGroup() throws Exception {
|
private static String getUsersFirstGroup() throws Exception {
|
||||||
|
String group = "*"; // accept any group if a group can't be found
|
||||||
|
if (!Constants.WINDOWS) { // does not work on Windows!
|
||||||
org.apache.hadoop.security.Groups hGroups =
|
org.apache.hadoop.security.Groups hGroups =
|
||||||
new org.apache.hadoop.security.Groups(new Configuration());
|
new org.apache.hadoop.security.Groups(new Configuration());
|
||||||
String group = "*"; // accept any group if a group can't be found
|
|
||||||
try {
|
try {
|
||||||
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
|
List<String> g = hGroups.getGroups(System.getProperty("user.name"));
|
||||||
if (g != null && g.size() > 0) {
|
if (g != null && g.size() > 0) {
|
||||||
@ -68,6 +70,7 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||||||
} catch (NullPointerException npe) {
|
} catch (NullPointerException npe) {
|
||||||
// if user/group doesn't exist on test box
|
// if user/group doesn't exist on test box
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return group;
|
return group;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,6 +95,8 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void startup() throws Exception {
|
public static void startup() throws Exception {
|
||||||
|
assumeFalse("Hadoop does not work on Windows", Constants.WINDOWS);
|
||||||
|
|
||||||
System.setProperty("authenticationPlugin", HttpParamDelegationTokenPlugin.class.getName());
|
System.setProperty("authenticationPlugin", HttpParamDelegationTokenPlugin.class.getName());
|
||||||
System.setProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED, "true");
|
System.setProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED, "true");
|
||||||
|
|
||||||
@ -151,7 +156,9 @@ public class TestSolrCloudWithSecureImpersonation extends SolrTestCaseJ4 {
|
|||||||
miniCluster.shutdown();
|
miniCluster.shutdown();
|
||||||
}
|
}
|
||||||
miniCluster = null;
|
miniCluster = null;
|
||||||
|
if (solrClient != null) {
|
||||||
solrClient.close();
|
solrClient.close();
|
||||||
|
}
|
||||||
solrClient = null;
|
solrClient = null;
|
||||||
System.clearProperty("authenticationPlugin");
|
System.clearProperty("authenticationPlugin");
|
||||||
System.clearProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED);
|
System.clearProperty(KerberosPlugin.DELEGATION_TOKEN_ENABLED);
|
||||||
|
@ -1,113 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.solr.core;
|
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.OutputStreamWriter;
|
|
||||||
import java.io.Writer;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.util.Properties;
|
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
import org.apache.solr.common.SolrException;
|
|
||||||
import org.apache.solr.handler.IndexFetcher;
|
|
||||||
import org.apache.solr.util.AbstractSolrTestCase;
|
|
||||||
import org.junit.BeforeClass;
|
|
||||||
import org.junit.Rule;
|
|
||||||
import org.junit.Test;
|
|
||||||
import org.junit.rules.TestRule;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
public class TestArbitraryIndexDir extends AbstractSolrTestCase {
|
|
||||||
|
|
||||||
@Rule
|
|
||||||
public TestRule testRules = new SystemPropertiesRestoreRule();
|
|
||||||
|
|
||||||
// TODO: fix this test to not require FSDirectory
|
|
||||||
|
|
||||||
@BeforeClass
|
|
||||||
public static void beforeClass() {
|
|
||||||
// this test wants to start solr, and then open a separate indexwriter of its own on the same dir.
|
|
||||||
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
|
|
||||||
System.setProperty("solr.directoryFactory", "org.apache.solr.core.MockFSDirectoryFactory");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setUp() throws Exception {
|
|
||||||
super.setUp();
|
|
||||||
initCore("solrconfig.xml", "schema12.xml");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testLoadNewIndexDir() throws IOException, ParserConfigurationException, SAXException {
|
|
||||||
//add a doc in original index dir
|
|
||||||
assertU(adoc("id", String.valueOf(1),
|
|
||||||
"name", "name"+String.valueOf(1)));
|
|
||||||
//create a new index dir and index.properties file
|
|
||||||
File idxprops = new File(h.getCore().getDataDir() + IndexFetcher.INDEX_PROPERTIES);
|
|
||||||
Properties p = new Properties();
|
|
||||||
File newDir = new File(h.getCore().getDataDir() + "index_temp");
|
|
||||||
newDir.mkdirs();
|
|
||||||
p.put("index", newDir.getName());
|
|
||||||
Writer os = null;
|
|
||||||
try {
|
|
||||||
os = new OutputStreamWriter(new FileOutputStream(idxprops), StandardCharsets.UTF_8);
|
|
||||||
p.store(os, "index properties");
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
|
||||||
"Unable to write " + IndexFetcher.INDEX_PROPERTIES, e);
|
|
||||||
} finally {
|
|
||||||
IOUtils.closeWhileHandlingException(os);
|
|
||||||
}
|
|
||||||
|
|
||||||
//add a doc in the new index dir
|
|
||||||
Directory dir = newFSDirectory(newDir.toPath());
|
|
||||||
IndexWriter iw = new IndexWriter(
|
|
||||||
dir,
|
|
||||||
new IndexWriterConfig(new StandardAnalyzer())
|
|
||||||
);
|
|
||||||
Document doc = new Document();
|
|
||||||
doc.add(new TextField("id", "2", Field.Store.YES));
|
|
||||||
doc.add(new TextField("name", "name2", Field.Store.YES));
|
|
||||||
iw.addDocument(doc);
|
|
||||||
iw.commit();
|
|
||||||
iw.close();
|
|
||||||
|
|
||||||
//commit will cause searcher to open with the new index dir
|
|
||||||
assertU(commit());h.getCoreContainer().reload(h.getCore().getName());
|
|
||||||
//new index dir contains just 1 doc.
|
|
||||||
assertQ("return doc with id 2",
|
|
||||||
req("id:2"),
|
|
||||||
"*[count(//doc)=1]"
|
|
||||||
);
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
}
|
|
@ -278,11 +278,11 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||||||
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
|
assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
|
||||||
assertEquals("Query has only one token", 1, tokenList.size());
|
assertEquals("Query has only one token", 1, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
|
||||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.LowerCaseFilter");
|
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
|
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
|
||||||
assertEquals("Query has only one token", 1, tokenList.size());
|
assertEquals("Query has only one token", 1, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
|
||||||
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.StopFilter");
|
tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||||
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
|
assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
|
||||||
assertEquals("Query has only one token", 1, tokenList.size());
|
assertEquals("Query has only one token", 1, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
|
||||||
@ -311,7 +311,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||||||
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
|
assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
|
||||||
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
|
assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
|
||||||
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
|
assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
|
||||||
tokenList = valueResult.get("org.apache.lucene.analysis.LowerCaseFilter");
|
tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||||
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
|
assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
|
||||||
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
assertEquals("Expecting 6 tokens", 6, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||||
@ -320,7 +320,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
|
|||||||
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
|
assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
|
||||||
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
|
assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
|
||||||
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
|
assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
|
||||||
tokenList = valueResult.get("org.apache.lucene.analysis.StopFilter");
|
tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
|
||||||
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
|
assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
|
||||||
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
|
assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));
|
||||||
|
@ -209,7 +209,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
|
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
|
||||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
|
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
|
||||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
|
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
|
||||||
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||||
assertEquals(tokenList.size(), 10);
|
assertEquals(tokenList.size(), 10);
|
||||||
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||||
@ -222,7 +222,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||||||
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
|
assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
|
||||||
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
|
assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
|
||||||
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
|
assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
|
||||||
tokenList = indexPart.get("org.apache.lucene.analysis.StopFilter");
|
tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||||
assertEquals(tokenList.size(), 8);
|
assertEquals(tokenList.size(), 8);
|
||||||
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
|
||||||
@ -258,12 +258,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||||||
assertEquals(2, tokenList.size());
|
assertEquals(2, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
|
||||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
|
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
|
||||||
tokenList = queryPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||||
assertEquals(2, tokenList.size());
|
assertEquals(2, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
|
||||||
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
|
assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
|
||||||
tokenList = queryPart.get("org.apache.lucene.analysis.StopFilter");
|
tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
|
||||||
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
|
||||||
assertEquals(2, tokenList.size());
|
assertEquals(2, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
|
||||||
@ -416,7 +416,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
|||||||
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
|
assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
|
||||||
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
|
assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
|
||||||
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
|
assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
|
||||||
tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
|
tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
|
||||||
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
|
||||||
assertEquals(6, tokenList.size());
|
assertEquals(6, tokenList.size());
|
||||||
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));
|
assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));
|
||||||
|
@ -0,0 +1,236 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.solr.handler.component;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.solr.BaseDistributedSearchTestCase;
|
||||||
|
import org.apache.solr.client.solrj.SolrClient;
|
||||||
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
import org.apache.solr.client.solrj.response.FacetField;
|
||||||
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
|
import org.junit.Before;
|
||||||
|
|
||||||
|
public class DistributedFacetExistsSmallTest extends BaseDistributedSearchTestCase {
|
||||||
|
|
||||||
|
public static final String FLD = "t_s";
|
||||||
|
private int maxId;
|
||||||
|
|
||||||
|
public DistributedFacetExistsSmallTest() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void prepareIndex() throws Exception {
|
||||||
|
del("*:*");
|
||||||
|
|
||||||
|
final Random rnd = random();
|
||||||
|
index(id, maxId=rnd.nextInt(5), FLD, "AAA");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "B");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "BB");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "BBB");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "CC");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||||
|
index(id, maxId+=1+rnd.nextInt(5), FLD, "CCC");
|
||||||
|
|
||||||
|
final SolrClient shard0 = clients.get(0);
|
||||||
|
// expectidly fails test
|
||||||
|
//shard0.add(sdoc("id", 13, FLD, "DDD"));
|
||||||
|
commit();
|
||||||
|
|
||||||
|
handle.clear();
|
||||||
|
handle.put("QTime", SKIPVAL);
|
||||||
|
handle.put("timestamp", SKIPVAL);
|
||||||
|
handle.put("maxScore", SKIPVAL);
|
||||||
|
handle.put("_version_", SKIPVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ShardsFixed(num=4)
|
||||||
|
public void test() throws Exception{
|
||||||
|
checkBasicRequest();
|
||||||
|
checkWithMinCountEqOne();
|
||||||
|
checkWithSortCount();
|
||||||
|
checkWithMethodSetPerField();
|
||||||
|
|
||||||
|
{
|
||||||
|
// empty enum for checking npe
|
||||||
|
final ModifiableSolrParams params = buildParams();
|
||||||
|
params.remove("facet.exists");
|
||||||
|
QueryResponse rsp = query(params);
|
||||||
|
}
|
||||||
|
|
||||||
|
checkRandomParams();
|
||||||
|
|
||||||
|
checkInvalidMincount();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkRandomParams() throws Exception {
|
||||||
|
final ModifiableSolrParams params = buildParams();
|
||||||
|
Random rand = random();
|
||||||
|
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
int from;
|
||||||
|
params.set("q", "["+(from = rand.nextInt(maxId/2))+
|
||||||
|
" TO "+((from-1)+(rand.nextInt(maxId)))+"]");
|
||||||
|
}
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
int indexSize = 6;
|
||||||
|
if (rand .nextInt(100) < 20) {
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
offset = rand.nextInt(100) < 10 ? rand.nextInt(indexSize *2) : rand.nextInt(indexSize/3+1);
|
||||||
|
}
|
||||||
|
params.add("facet.offset", Integer.toString(offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
int limit = 100;
|
||||||
|
if (rand.nextInt(100) < 20) {
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
limit = rand.nextInt(100) < 10 ? rand.nextInt(indexSize/2+1) : rand.nextInt(indexSize*2);
|
||||||
|
}
|
||||||
|
params.add("facet.limit", Integer.toString(limit));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rand.nextBoolean()) {
|
||||||
|
params.add("facet.sort", rand.nextBoolean() ? "index" : "count");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( rand.nextInt(100) < 20) {
|
||||||
|
final String[] prefixes = new String[] {"A","B","C"};
|
||||||
|
params.add("facet.prefix", prefixes[rand.nextInt(prefixes.length)]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rand.nextInt(100) < 20) {
|
||||||
|
params.add("facet.missing", "true");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rand.nextInt(100) < 20) { // assigning only valid vals
|
||||||
|
params.add("facet.mincount", rand.nextBoolean() ? "0": "1" );
|
||||||
|
}
|
||||||
|
|
||||||
|
final boolean shardRespondsWithMissingEvenLimitIsZero =
|
||||||
|
params.getBool("facet.missing", false) && params.getInt("facet.limit", 100)==0;
|
||||||
|
// skip miss count check, here cloud is different to non-distrib
|
||||||
|
if (shardRespondsWithMissingEvenLimitIsZero ) {
|
||||||
|
handle.put(null, SKIP);
|
||||||
|
}
|
||||||
|
query(params);
|
||||||
|
if (shardRespondsWithMissingEvenLimitIsZero ) {
|
||||||
|
handle.remove(null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkInvalidMincount() throws SolrServerException, IOException {
|
||||||
|
final ModifiableSolrParams params = buildParams();
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
params.remove("facet.exists");
|
||||||
|
params.set("f."+FLD+".facet.exists","true");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
params.set("facet.mincount", ""+(2+random().nextInt(100)) );
|
||||||
|
} else {
|
||||||
|
params.set("f."+FLD+".facet.mincount", ""+(2+random().nextInt(100)) );
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (random().nextBoolean()) {
|
||||||
|
setDistributedParams(params);
|
||||||
|
queryServer(params);
|
||||||
|
} else {
|
||||||
|
params.set("distrib", "false");
|
||||||
|
controlClient.query(params);
|
||||||
|
}
|
||||||
|
fail();
|
||||||
|
} catch(SolrException e) { // check that distr and single index search fail the same
|
||||||
|
assertEquals(e.code(), ErrorCode.BAD_REQUEST.code);
|
||||||
|
assertTrue(e.getMessage().contains("facet.exists"));
|
||||||
|
assertTrue(e.getMessage().contains("facet.mincount"));
|
||||||
|
assertTrue(e.getMessage().contains(FLD));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkBasicRequest() throws Exception {
|
||||||
|
final ModifiableSolrParams params = buildParams();
|
||||||
|
QueryResponse rsp = query(params);
|
||||||
|
assertResponse(rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkWithMinCountEqOne() throws Exception {
|
||||||
|
final ModifiableSolrParams params = buildParams("facet.mincount","1");
|
||||||
|
QueryResponse rsp = query(params);
|
||||||
|
assertResponse(rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkWithSortCount() throws Exception {
|
||||||
|
final ModifiableSolrParams params = buildParams("facet.sort","count");
|
||||||
|
QueryResponse rsp = query(params);
|
||||||
|
assertResponse(rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkWithMethodSetPerField() throws Exception {
|
||||||
|
final ModifiableSolrParams params = buildParams("f." + FLD + ".facet.exists", "true");
|
||||||
|
params.remove("facet.exists");
|
||||||
|
QueryResponse rsp = query(params);
|
||||||
|
assertResponse(rsp);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ModifiableSolrParams buildParams(String... additionalParams) {
|
||||||
|
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
|
||||||
|
params.add("q", "*:*");
|
||||||
|
params.add("rows", "0");
|
||||||
|
//params.add("debugQuery", "true");
|
||||||
|
params.add("facet", "true");
|
||||||
|
params.add("sort", "id asc");
|
||||||
|
|
||||||
|
if(random().nextBoolean()){
|
||||||
|
params.add("facet.method", "enum");
|
||||||
|
}
|
||||||
|
|
||||||
|
params.add("facet.exists", "true");
|
||||||
|
params.add("facet.field", FLD);
|
||||||
|
for(int i = 0; i < additionalParams.length;) {
|
||||||
|
params.add(additionalParams[i++], additionalParams[i++]);
|
||||||
|
}
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertResponse(QueryResponse rsp) {
|
||||||
|
final FacetField facetField = rsp.getFacetField(FLD);
|
||||||
|
|
||||||
|
assertThat(facetField.getValueCount(), is(6));
|
||||||
|
final List<FacetField.Count> counts = facetField.getValues();
|
||||||
|
for (FacetField.Count count : counts) {
|
||||||
|
assertThat("Count for: " + count.getName(), count.getCount(), is(1L));
|
||||||
|
}
|
||||||
|
assertThat(counts.get(0).getName(), is("AAA"));
|
||||||
|
assertThat(counts.get(1).getName(), is("B"));
|
||||||
|
assertThat(counts.get(2).getName(), is("BB"));
|
||||||
|
}
|
||||||
|
}
|
@ -38,7 +38,6 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.util.TimeZoneUtils;
|
import org.apache.solr.util.TimeZoneUtils;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Ignore;
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.noggit.ObjectBuilder;
|
import org.noggit.ObjectBuilder;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -494,11 +493,9 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||||||
|
|
||||||
ModifiableSolrParams params = params("q","*:*", "rows","0", "facet","true", "facet.field","{!key=myalias}"+field);
|
ModifiableSolrParams params = params("q","*:*", "rows","0", "facet","true", "facet.field","{!key=myalias}"+field);
|
||||||
|
|
||||||
String[] methods = {null, "fc","enum","fcs", "uif"
|
String[] methods = {null, "fc","enum","fcs", "uif"};
|
||||||
};
|
|
||||||
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
|
||||||
methods = new String[]{null, "fc","enum", "uif"
|
methods = new String[]{null, "fc","enum", "uif"};
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prefixes = prefixes==null ? new String[]{null} : prefixes;
|
prefixes = prefixes==null ? new String[]{null} : prefixes;
|
||||||
@ -2017,6 +2014,49 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||||||
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3");
|
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3");
|
||||||
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100");
|
doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100");
|
||||||
doFacetPrefix("t_s", null, "", "facet.method", "fc");
|
doFacetPrefix("t_s", null, "", "facet.method", "fc");
|
||||||
|
doFacetExistsPrefix("t_s", null, "");
|
||||||
|
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "3");
|
||||||
|
doFacetExistsPrefix("t_s", null, "", "facet.enum.cache.minDf", "100");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFacetExistsShouldThrowExceptionForMincountGreaterThanOne () throws Exception {
|
||||||
|
final String f = "t_s";
|
||||||
|
final List<String> msg = Arrays.asList("facet.mincount", "facet.exists", f);
|
||||||
|
Collections.shuffle(msg, random());
|
||||||
|
assertQEx("checking global method or per field", msg.get(0),
|
||||||
|
req("q", "id:[* TO *]"
|
||||||
|
,"indent","on"
|
||||||
|
,"facet","true"
|
||||||
|
, random().nextBoolean() ? "facet.exists": "f."+f+".facet.exists", "true"
|
||||||
|
,"facet.field", f
|
||||||
|
, random().nextBoolean() ? "facet.mincount" : "f."+f+".facet.mincount" ,
|
||||||
|
"" + (2+random().nextInt(Integer.MAX_VALUE-2))
|
||||||
|
)
|
||||||
|
, ErrorCode.BAD_REQUEST);
|
||||||
|
|
||||||
|
assertQ("overriding per field",
|
||||||
|
req("q", "id:[* TO *]"
|
||||||
|
,"indent","on"
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"f."+f+".facet.exists", "false"
|
||||||
|
,"facet.field", f
|
||||||
|
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
|
||||||
|
),
|
||||||
|
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
|
||||||
|
|
||||||
|
assertQ("overriding per field",
|
||||||
|
req("q", "id:[* TO *]"
|
||||||
|
,"indent","on"
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", f
|
||||||
|
,"facet.mincount",""+(2+random().nextInt(Integer.MAX_VALUE-2))
|
||||||
|
,"f."+f+".facet.mincount", random().nextBoolean() ? "0":"1"
|
||||||
|
),
|
||||||
|
"//lst[@name='facet_fields']/lst[@name='"+f+"']");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void indexFacetPrefixSingleValued() {
|
static void indexFacetPrefixSingleValued() {
|
||||||
@ -2037,7 +2077,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
|
//@Ignore("SOLR-8466 - facet.method=uif ignores facet.contains")
|
||||||
public void testFacetContainsUif() {
|
public void testFacetContainsUif() {
|
||||||
doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "uif");
|
doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "uif");
|
||||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "uif", "facet.contains", "Ast");
|
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "uif", "facet.contains", "Ast");
|
||||||
@ -2063,6 +2103,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true");
|
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true");
|
||||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true");
|
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true");
|
||||||
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true");
|
doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true");
|
||||||
|
doFacetExistsPrefix("contains_s1", null, "Astra", "facet.contains", "Ast");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) {
|
static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) {
|
||||||
@ -2313,6 +2354,239 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void doFacetExistsPrefix(String f, String local, String termSuffix, String... params) {
|
||||||
|
String indent="on";
|
||||||
|
String pre = "//lst[@name='"+f+"']";
|
||||||
|
String lf = local==null ? f : local+f;
|
||||||
|
|
||||||
|
assertQ("test field facet.method",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent", indent
|
||||||
|
,"facet", "true"
|
||||||
|
,"f."+lf+".facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount", "0"
|
||||||
|
,"facet.offset", "0"
|
||||||
|
,"facet.limit", "100"
|
||||||
|
,"facet.sort", "count"
|
||||||
|
,"facet.prefix", "B"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||||
|
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix middle, exact match first term",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","B"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||||
|
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix middle, exact match first term, unsorted",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","index"
|
||||||
|
,"facet.prefix","B"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
|
||||||
|
,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='BB"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[3][@name='BBB"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix middle, paging",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","1"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","B"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||||
|
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='BBB"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix middle, paging",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","1"
|
||||||
|
,"facet.limit","1"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","B"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||||
|
,pre+"/int[1][@name='BB"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix end, not exact match",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","C"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||||
|
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix end, exact match",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","CC"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
|
||||||
|
,pre+"/int[1][@name='CC"+termSuffix+"'][.='1']"
|
||||||
|
,pre+"/int[2][@name='CCC"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix past end",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","X"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix past end",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","1"
|
||||||
|
,"facet.limit","-1"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","X"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix at start, exact match",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","AAA"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||||
|
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
assertQ("test facet.prefix at Start, not exact match",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","AA"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
|
||||||
|
,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
|
||||||
|
);
|
||||||
|
assertQ("test facet.prefix before start",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","0"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","999"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("test facet.prefix before start",
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","0"
|
||||||
|
,"facet.offset","2"
|
||||||
|
,"facet.limit","100"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","999"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||||
|
);
|
||||||
|
|
||||||
|
// test offset beyond what is collected internally in queue
|
||||||
|
assertQ(
|
||||||
|
req(params, "q", "id:[* TO *]"
|
||||||
|
,"indent",indent
|
||||||
|
,"facet","true"
|
||||||
|
,"facet.exists", "true"
|
||||||
|
,"facet.field", lf
|
||||||
|
,"facet.mincount","1"
|
||||||
|
,"facet.offset","5"
|
||||||
|
,"facet.limit","10"
|
||||||
|
,"facet.sort","count"
|
||||||
|
,"facet.prefix","CC"
|
||||||
|
)
|
||||||
|
,"*[count(//lst[@name='facet_fields']/lst/int)=0]"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) {
|
public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) {
|
||||||
String indent="on";
|
String indent="on";
|
||||||
String pre = "//lst[@name='"+f+"']";
|
String pre = "//lst[@name='"+f+"']";
|
||||||
|
@ -263,6 +263,23 @@ public class TestCollapseQParserPlugin extends SolrTestCaseJ4 {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test // https://issues.apache.org/jira/browse/SOLR-9494
|
||||||
|
public void testNeedsScoreBugFixed() throws Exception {
|
||||||
|
String[] doc = {"id","1", "group_s", "xyz", "text_ws", "hello xxx world"};
|
||||||
|
assertU(adoc(doc));
|
||||||
|
assertU(commit());
|
||||||
|
|
||||||
|
ModifiableSolrParams params = params(
|
||||||
|
"q", "{!surround df=text_ws} 2W(hello, world)", // a SpanQuery that matches
|
||||||
|
"fq", "{!collapse field=group_s}", // collapse on some field
|
||||||
|
// note: rows= whatever; doesn't matter
|
||||||
|
"facet", "true", // facet on something
|
||||||
|
"facet.field", "group_s"
|
||||||
|
);
|
||||||
|
assertQ(req(params));
|
||||||
|
assertQ(req(params)); // fails *second* time!
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMergeBoost() throws Exception {
|
public void testMergeBoost() throws Exception {
|
||||||
|
|
||||||
|
@ -40,20 +40,16 @@ import org.apache.http.message.BasicHeader;
|
|||||||
import org.apache.http.util.EntityUtils;
|
import org.apache.http.util.EntityUtils;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
||||||
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
import org.apache.solr.client.solrj.impl.HttpClientUtil;
|
||||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||||
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
import org.apache.solr.client.solrj.request.GenericSolrRequest;
|
||||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||||
import org.apache.solr.cloud.MiniSolrCloudCluster;
|
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||||
import org.apache.solr.cloud.TestMiniSolrCloudClusterBase;
|
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.cloud.DocCollection;
|
import org.apache.solr.common.cloud.DocCollection;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
import org.apache.solr.common.cloud.Slice;
|
import org.apache.solr.common.cloud.Slice;
|
||||||
import org.apache.solr.common.cloud.SolrZkClient;
|
|
||||||
import org.apache.solr.common.cloud.ZkStateReader;
|
|
||||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||||
import org.apache.solr.common.util.Base64;
|
import org.apache.solr.common.util.Base64;
|
||||||
import org.apache.solr.common.util.ContentStreamBase;
|
import org.apache.solr.common.util.ContentStreamBase;
|
||||||
@ -61,50 +57,50 @@ import org.apache.solr.common.util.NamedList;
|
|||||||
import org.apache.solr.common.util.StrUtils;
|
import org.apache.solr.common.util.StrUtils;
|
||||||
import org.apache.solr.common.util.Utils;
|
import org.apache.solr.common.util.Utils;
|
||||||
import org.apache.solr.util.SolrCLI;
|
import org.apache.solr.util.SolrCLI;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import static java.nio.charset.StandardCharsets.UTF_8;
|
import static java.nio.charset.StandardCharsets.UTF_8;
|
||||||
import static java.util.Collections.singletonMap;
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.apache.solr.SolrTestCaseJ4.getHttpSolrClient;
|
|
||||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
|
||||||
|
|
||||||
public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
public class BasicAuthIntegrationTest extends SolrCloudTestCase {
|
||||||
|
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
|
||||||
@Override
|
private static final String COLLECTION = "authCollection";
|
||||||
protected void doExtraTests(MiniSolrCloudCluster miniCluster, SolrZkClient zkClient, ZkStateReader zkStateReader,
|
|
||||||
CloudSolrClient cloudSolrClient, String defaultCollName) throws Exception {
|
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupCluster() throws Exception {
|
||||||
|
configureCluster(3)
|
||||||
|
.addConfig("conf", configset("cloud-minimal"))
|
||||||
|
.configure();
|
||||||
|
|
||||||
|
CollectionAdminRequest.createCollection(COLLECTION, "conf", 3, 1).process(cluster.getSolrClient());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicAuth() throws Exception {
|
||||||
|
|
||||||
String authcPrefix = "/admin/authentication";
|
String authcPrefix = "/admin/authentication";
|
||||||
String authzPrefix = "/admin/authorization";
|
String authzPrefix = "/admin/authorization";
|
||||||
|
|
||||||
String old = cloudSolrClient.getDefaultCollection();
|
|
||||||
cloudSolrClient.setDefaultCollection(null);
|
|
||||||
|
|
||||||
NamedList<Object> rsp;
|
NamedList<Object> rsp;
|
||||||
HttpClient cl = null;
|
HttpClient cl = null;
|
||||||
try {
|
try {
|
||||||
cl = HttpClientUtil.createClient(null);
|
cl = HttpClientUtil.createClient(null);
|
||||||
String baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
|
||||||
|
JettySolrRunner randomJetty = cluster.getRandomJetty(random());
|
||||||
|
String baseUrl = randomJetty.getBaseUrl().toString();
|
||||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20);
|
verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20);
|
||||||
zkClient.setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
|
zkClient().setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true);
|
||||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
||||||
|
|
||||||
boolean found = false;
|
randomJetty.stop();
|
||||||
for (JettySolrRunner jettySolrRunner : miniCluster.getJettySolrRunners()) {
|
randomJetty.start(false);
|
||||||
if(baseUrl.contains(String.valueOf(jettySolrRunner.getLocalPort()))){
|
baseUrl = randomJetty.getBaseUrl().toString();
|
||||||
found = true;
|
|
||||||
jettySolrRunner.stop();
|
|
||||||
jettySolrRunner.start();
|
|
||||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20);
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assertTrue("No server found to restart , looking for : "+baseUrl , found);
|
|
||||||
|
|
||||||
String command = "{\n" +
|
String command = "{\n" +
|
||||||
"'set-user': {'harry':'HarryIsCool'}\n" +
|
"'set-user': {'harry':'HarryIsCool'}\n" +
|
||||||
@ -112,11 +108,12 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
|
|
||||||
GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams());
|
GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams());
|
||||||
genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), "")));
|
genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), "")));
|
||||||
try {
|
|
||||||
cloudSolrClient.request(genericReq);
|
HttpSolrClient.RemoteSolrException exp = expectThrows(HttpSolrClient.RemoteSolrException.class, () -> {
|
||||||
fail("Should have failed with a 401");
|
cluster.getSolrClient().request(genericReq);
|
||||||
} catch (HttpSolrClient.RemoteSolrException e) {
|
});
|
||||||
}
|
assertEquals(401, exp.code());
|
||||||
|
|
||||||
command = "{\n" +
|
command = "{\n" +
|
||||||
"'set-user': {'harry':'HarryIsUberCool'}\n" +
|
"'set-user': {'harry':'HarryIsUberCool'}\n" +
|
||||||
"}";
|
"}";
|
||||||
@ -130,7 +127,8 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
int statusCode = r.getStatusLine().getStatusCode();
|
int statusCode = r.getStatusLine().getStatusCode();
|
||||||
Utils.consumeFully(r.getEntity());
|
Utils.consumeFully(r.getEntity());
|
||||||
assertEquals("proper_cred sent, but access denied", 200, statusCode);
|
assertEquals("proper_cred sent, but access denied", 200, statusCode);
|
||||||
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
|
||||||
|
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||||
|
|
||||||
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20);
|
verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20);
|
||||||
command = "{\n" +
|
command = "{\n" +
|
||||||
@ -139,7 +137,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
|
|
||||||
executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks");
|
executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks");
|
||||||
|
|
||||||
baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP);
|
baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString();
|
||||||
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20);
|
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20);
|
||||||
|
|
||||||
executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap
|
executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap
|
||||||
@ -153,7 +151,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" );
|
("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" );
|
||||||
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20);
|
verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20);
|
||||||
|
|
||||||
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(defaultCollName);
|
CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(COLLECTION);
|
||||||
|
|
||||||
try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) {
|
try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) {
|
||||||
try {
|
try {
|
||||||
@ -170,18 +168,17 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
|
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
|
||||||
.setBasicAuthCredentials("harry", "HarryIsUberCool"));
|
.setBasicAuthCredentials("harry", "HarryIsUberCool"));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName)
|
cluster.getSolrClient().request(CollectionAdminRequest.reloadCollection(COLLECTION)
|
||||||
.setBasicAuthCredentials("harry", "Cool12345"));
|
.setBasicAuthCredentials("harry", "Cool12345"));
|
||||||
fail("This should not succeed");
|
fail("This should not succeed");
|
||||||
} catch (HttpSolrClient.RemoteSolrException e) {
|
} catch (HttpSolrClient.RemoteSolrException e) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cloudSolrClient.setDefaultCollection(old);
|
|
||||||
executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool");
|
executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool");
|
||||||
|
|
||||||
SolrInputDocument doc = new SolrInputDocument();
|
SolrInputDocument doc = new SolrInputDocument();
|
||||||
@ -190,7 +187,7 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase {
|
|||||||
update.setBasicAuthCredentials("harry","HarryIsUberCool");
|
update.setBasicAuthCredentials("harry","HarryIsUberCool");
|
||||||
update.add(doc);
|
update.add(doc);
|
||||||
update.setCommitWithin(100);
|
update.setCommitWithin(100);
|
||||||
cloudSolrClient.request(update);
|
cluster.getSolrClient().request(update, COLLECTION);
|
||||||
|
|
||||||
|
|
||||||
executeCommand(baseUrl + authcPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool");
|
executeCommand(baseUrl + authcPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool");
|
||||||
|
@ -116,23 +116,7 @@
|
|||||||
persistent, and doesn't work with replication.
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
|
||||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
|
||||||
you can safely remove this section. -->
|
|
||||||
<!-- The root directory that collection data should be written to. -->
|
|
||||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
|
||||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
|
||||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
|
||||||
<!-- Enable/Disable the hdfs cache. -->
|
|
||||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
|
||||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
|
||||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
|
||||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
|
||||||
|
|
||||||
</directoryFactory>
|
|
||||||
|
|
||||||
<!-- The CodecFactory for defining the format of the inverted index.
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
@ -119,23 +119,7 @@
|
|||||||
persistent, and doesn't work with replication.
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
|
||||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
|
||||||
you can safely remove this section. -->
|
|
||||||
<!-- The root directory that collection data should be written to. -->
|
|
||||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
|
||||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
|
||||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
|
||||||
<!-- Enable/Disable the hdfs cache. -->
|
|
||||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
|
||||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
|
||||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
|
||||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
|
||||||
|
|
||||||
</directoryFactory>
|
|
||||||
|
|
||||||
<!-- The CodecFactory for defining the format of the inverted index.
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
@ -116,23 +116,7 @@
|
|||||||
persistent, and doesn't work with replication.
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
|
||||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
|
||||||
you can safely remove this section. -->
|
|
||||||
<!-- The root directory that collection data should be written to. -->
|
|
||||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
|
||||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
|
||||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
|
||||||
<!-- Enable/Disable the hdfs cache. -->
|
|
||||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
|
||||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
|
||||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
|
||||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
|
||||||
|
|
||||||
</directoryFactory>
|
|
||||||
|
|
||||||
<!-- The CodecFactory for defining the format of the inverted index.
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
@ -116,23 +116,7 @@
|
|||||||
persistent, and doesn't work with replication.
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
|
||||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
|
||||||
you can safely remove this section. -->
|
|
||||||
<!-- The root directory that collection data should be written to. -->
|
|
||||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
|
||||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
|
||||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
|
||||||
<!-- Enable/Disable the hdfs cache. -->
|
|
||||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
|
||||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
|
||||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
|
||||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
|
||||||
|
|
||||||
</directoryFactory>
|
|
||||||
|
|
||||||
<!-- The CodecFactory for defining the format of the inverted index.
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
@ -117,23 +117,7 @@
|
|||||||
persistent, and doesn't work with replication.
|
persistent, and doesn't work with replication.
|
||||||
-->
|
-->
|
||||||
<directoryFactory name="DirectoryFactory"
|
<directoryFactory name="DirectoryFactory"
|
||||||
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}">
|
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>
|
||||||
|
|
||||||
|
|
||||||
<!-- These will be used if you are using the solr.HdfsDirectoryFactory,
|
|
||||||
otherwise they will be ignored. If you don't plan on using hdfs,
|
|
||||||
you can safely remove this section. -->
|
|
||||||
<!-- The root directory that collection data should be written to. -->
|
|
||||||
<str name="solr.hdfs.home">${solr.hdfs.home:}</str>
|
|
||||||
<!-- The hadoop configuration files to use for the hdfs client. -->
|
|
||||||
<str name="solr.hdfs.confdir">${solr.hdfs.confdir:}</str>
|
|
||||||
<!-- Enable/Disable the hdfs cache. -->
|
|
||||||
<str name="solr.hdfs.blockcache.enabled">${solr.hdfs.blockcache.enabled:true}</str>
|
|
||||||
<!-- Enable/Disable using one global cache for all SolrCores.
|
|
||||||
The settings used will be from the first HdfsDirectoryFactory created. -->
|
|
||||||
<str name="solr.hdfs.blockcache.global">${solr.hdfs.blockcache.global:true}</str>
|
|
||||||
|
|
||||||
</directoryFactory>
|
|
||||||
|
|
||||||
<!-- The CodecFactory for defining the format of the inverted index.
|
<!-- The CodecFactory for defining the format of the inverted index.
|
||||||
The default implementation is SchemaCodecFactory, which is the official Lucene
|
The default implementation is SchemaCodecFactory, which is the official Lucene
|
||||||
|
@ -81,7 +81,16 @@ public class Slice extends ZkNodeProps implements Iterable<Replica> {
|
|||||||
* shard in that state still receives update requests from the parent shard
|
* shard in that state still receives update requests from the parent shard
|
||||||
* leader, however does not participate in distributed search.
|
* leader, however does not participate in distributed search.
|
||||||
*/
|
*/
|
||||||
RECOVERY;
|
RECOVERY,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sub-shards of a split shard are put in that state when the split is deemed failed
|
||||||
|
* by the overseer even though all replicas are active because either the leader node is
|
||||||
|
* no longer live or has a different ephemeral owner (zk session id). Such conditions can potentially
|
||||||
|
* lead to data loss. See SOLR-9438 for details. A shard in that state will neither receive
|
||||||
|
* update requests from the parent shard leader, nor participate in distributed search.
|
||||||
|
*/
|
||||||
|
RECOVERY_FAILED;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -32,6 +32,7 @@ import java.nio.charset.StandardCharsets;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.RejectedExecutionException;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
@ -263,7 +264,14 @@ public class SolrZkClient implements Closeable {
|
|||||||
@Override
|
@Override
|
||||||
public void process(final WatchedEvent event) {
|
public void process(final WatchedEvent event) {
|
||||||
log.debug("Submitting job to respond to event " + event);
|
log.debug("Submitting job to respond to event " + event);
|
||||||
|
try {
|
||||||
zkCallbackExecutor.submit(() -> watcher.process(event));
|
zkCallbackExecutor.submit(() -> watcher.process(event));
|
||||||
|
} catch (RejectedExecutionException e) {
|
||||||
|
// If not a graceful shutdown
|
||||||
|
if (!isClosed()) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -185,6 +185,14 @@ public interface FacetParams {
|
|||||||
* only use the filterCache for terms with a df >= to this parameter.
|
* only use the filterCache for terms with a df >= to this parameter.
|
||||||
*/
|
*/
|
||||||
public static final String FACET_ENUM_CACHE_MINDF = FACET + ".enum.cache.minDf";
|
public static final String FACET_ENUM_CACHE_MINDF = FACET + ".enum.cache.minDf";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A boolean parameter that caps the facet counts at 1.
|
||||||
|
* With this set, a returned count will only be 0 or 1.
|
||||||
|
* For apps that don't need the count, this should be an optimization
|
||||||
|
*/
|
||||||
|
public static final String FACET_EXISTS = FACET+".exists";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Any field whose terms the user wants to enumerate over for
|
* Any field whose terms the user wants to enumerate over for
|
||||||
* Facet Contraint Counts (multi-value)
|
* Facet Contraint Counts (multi-value)
|
||||||
|
@ -182,11 +182,14 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
|
|||||||
// test a second query, test making a copy of the main query
|
// test a second query, test making a copy of the main query
|
||||||
SolrQuery query2 = query.getCopy();
|
SolrQuery query2 = query.getCopy();
|
||||||
query2.addFilterQuery("inStock:true");
|
query2.addFilterQuery("inStock:true");
|
||||||
|
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
|
||||||
response = client.query( query2 );
|
response = client.query( query2 );
|
||||||
Assert.assertEquals(1, query2.getFilterQueries().length);
|
Assert.assertEquals(1, query2.getFilterQueries().length);
|
||||||
Assert.assertEquals(0, response.getStatus());
|
Assert.assertEquals(0, response.getStatus());
|
||||||
Assert.assertEquals(2, response.getResults().getNumFound() );
|
Assert.assertEquals(2, response.getResults().getNumFound() );
|
||||||
Assert.assertFalse(query.getFilterQueries() == query2.getFilterQueries());
|
for (SolrDocument outDoc : response.getResults()) {
|
||||||
|
assertEquals(true, outDoc.getFieldValue("inStock"));
|
||||||
|
}
|
||||||
|
|
||||||
// sanity check round tripping of params...
|
// sanity check round tripping of params...
|
||||||
query = new SolrQuery("foo");
|
query = new SolrQuery("foo");
|
||||||
|
@ -175,31 +175,34 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
|
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
|
||||||
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
|
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
|
||||||
List<Object> matchObj = generateAllDataTypes();
|
List<Object> matchObj = generateAllDataTypes();
|
||||||
|
compareObjects(unmarshaledObj, matchObj);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void compareObjects(List unmarshaledObj, List matchObj) {
|
||||||
assertEquals(unmarshaledObj.size(), matchObj.size());
|
assertEquals(unmarshaledObj.size(), matchObj.size());
|
||||||
for(int i=0; i < unmarshaledObj.size(); i++) {
|
for (int i = 0; i < unmarshaledObj.size(); i++) {
|
||||||
|
|
||||||
if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
|
if (unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
|
||||||
byte[] b1 = (byte[]) unmarshaledObj.get(i);
|
byte[] b1 = (byte[]) unmarshaledObj.get(i);
|
||||||
byte[] b2 = (byte[]) matchObj.get(i);
|
byte[] b2 = (byte[]) matchObj.get(i);
|
||||||
assertTrue(Arrays.equals(b1, b2));
|
assertTrue(Arrays.equals(b1, b2));
|
||||||
} else if(unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument ) {
|
} else if (unmarshaledObj.get(i) instanceof SolrDocument && matchObj.get(i) instanceof SolrDocument) {
|
||||||
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
assertTrue(compareSolrDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||||
} else if(unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList ) {
|
} else if (unmarshaledObj.get(i) instanceof SolrDocumentList && matchObj.get(i) instanceof SolrDocumentList) {
|
||||||
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
|
assertTrue(compareSolrDocumentList(unmarshaledObj.get(i), matchObj.get(i)));
|
||||||
} else if(unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
|
} else if (unmarshaledObj.get(i) instanceof SolrInputDocument && matchObj.get(i) instanceof SolrInputDocument) {
|
||||||
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
assertTrue(compareSolrInputDocument(unmarshaledObj.get(i), matchObj.get(i)));
|
||||||
} else if(unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
|
} else if (unmarshaledObj.get(i) instanceof SolrInputField && matchObj.get(i) instanceof SolrInputField) {
|
||||||
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
|
assertTrue(assertSolrInputFieldEquals(unmarshaledObj.get(i), matchObj.get(i)));
|
||||||
} else {
|
} else {
|
||||||
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
|
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -267,14 +270,33 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testResponseChildDocuments() throws IOException {
|
public void testAllTypes() throws IOException {
|
||||||
|
List<Object> obj = generateAllDataTypes();
|
||||||
|
compareObjects(
|
||||||
|
(List) getObject(getBytes(obj)),
|
||||||
|
(List) obj
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Object serializeAndDeserialize(Object o) throws IOException {
|
||||||
|
return getObject(getBytes(o));
|
||||||
|
}
|
||||||
|
private static byte[] getBytes(Object o) throws IOException {
|
||||||
JavaBinCodec javabin = new JavaBinCodec();
|
JavaBinCodec javabin = new JavaBinCodec();
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
javabin.marshal(generateSolrDocumentWithChildDocs(), baos);
|
javabin.marshal(o, baos);
|
||||||
|
return baos.toByteArray();
|
||||||
|
}
|
||||||
|
|
||||||
SolrDocument result = (SolrDocument) javabin.unmarshal(new ByteArrayInputStream(baos.toByteArray()));
|
private static Object getObject(byte[] bytes) throws IOException {
|
||||||
|
return new JavaBinCodec().unmarshal(new ByteArrayInputStream(bytes));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testResponseChildDocuments() throws IOException {
|
||||||
|
SolrDocument result = (SolrDocument) serializeAndDeserialize(generateSolrDocumentWithChildDocs());
|
||||||
assertEquals(2, result.size());
|
assertEquals(2, result.size());
|
||||||
assertEquals("1", result.getFieldValue("id"));
|
assertEquals("1", result.getFieldValue("id"));
|
||||||
assertEquals("parentDocument", result.getFieldValue("subject"));
|
assertEquals("parentDocument", result.getFieldValue("subject"));
|
||||||
@ -305,13 +327,11 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
@Test
|
@Test
|
||||||
public void testStringCaching() throws Exception {
|
public void testStringCaching() throws Exception {
|
||||||
Map<String, Object> m = Utils.makeMap("key1", "val1", "key2", "val2");
|
Map<String, Object> m = Utils.makeMap("key1", "val1", "key2", "val2");
|
||||||
|
byte[] b1 = getBytes(m);//copy 1
|
||||||
|
byte[] b2 = getBytes(m);//copy 2
|
||||||
|
Map m1 = (Map) getObject(b1);
|
||||||
|
Map m2 = (Map) getObject(b1);
|
||||||
|
|
||||||
ByteArrayOutputStream os1 = new ByteArrayOutputStream();
|
|
||||||
new JavaBinCodec().marshal(m, os1);
|
|
||||||
Map m1 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os1.toByteArray()));
|
|
||||||
ByteArrayOutputStream os2 = new ByteArrayOutputStream();
|
|
||||||
new JavaBinCodec().marshal(m, os2);
|
|
||||||
Map m2 = (Map) new JavaBinCodec().unmarshal(new ByteArrayInputStream(os2.toByteArray()));
|
|
||||||
List l1 = new ArrayList<>(m1.keySet());
|
List l1 = new ArrayList<>(m1.keySet());
|
||||||
List l2 = new ArrayList<>(m2.keySet());
|
List l2 = new ArrayList<>(m2.keySet());
|
||||||
|
|
||||||
@ -346,8 +366,8 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os1.toByteArray()));
|
m1 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b1));
|
||||||
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(os2.toByteArray()));
|
m2 = (Map) new JavaBinCodec(null, stringCache).unmarshal(new ByteArrayInputStream(b2));
|
||||||
l1 = new ArrayList<>(m1.keySet());
|
l1 = new ArrayList<>(m1.keySet());
|
||||||
l2 = new ArrayList<>(m2.keySet());
|
l2 = new ArrayList<>(m2.keySet());
|
||||||
assertTrue(l1.get(0).equals(l2.get(0)));
|
assertTrue(l1.get(0).equals(l2.get(0)));
|
||||||
@ -359,26 +379,19 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void genBinaryFiles() throws IOException {
|
public void genBinaryFiles() throws IOException {
|
||||||
JavaBinCodec javabin = new JavaBinCodec();
|
|
||||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
|
||||||
|
|
||||||
Object data = generateAllDataTypes();
|
Object data = generateAllDataTypes();
|
||||||
|
byte[] out = getBytes(data);
|
||||||
javabin.marshal(data, os);
|
|
||||||
byte[] out = os.toByteArray();
|
|
||||||
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
|
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
|
||||||
BufferedOutputStream bos = new BufferedOutputStream(fs);
|
BufferedOutputStream bos = new BufferedOutputStream(fs);
|
||||||
bos.write(out);
|
bos.write(out);
|
||||||
bos.close();
|
bos.close();
|
||||||
|
|
||||||
//Binary file with child documents
|
//Binary file with child documents
|
||||||
javabin = new JavaBinCodec();
|
|
||||||
SolrDocument sdoc = generateSolrDocumentWithChildDocs();
|
SolrDocument sdoc = generateSolrDocumentWithChildDocs();
|
||||||
os = new ByteArrayOutputStream();
|
|
||||||
javabin.marshal(sdoc, os);
|
|
||||||
fs = new FileOutputStream(new File(BIN_FILE_LOCATION_CHILD_DOCS));
|
fs = new FileOutputStream(new File(BIN_FILE_LOCATION_CHILD_DOCS));
|
||||||
bos = new BufferedOutputStream(fs);
|
bos = new BufferedOutputStream(fs);
|
||||||
bos.write(os.toByteArray());
|
bos.write(getBytes(sdoc));
|
||||||
bos.close();
|
bos.close();
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -553,12 +566,7 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
|
|||||||
sdoc.put("some_boolean", ""+r.nextBoolean());
|
sdoc.put("some_boolean", ""+r.nextBoolean());
|
||||||
sdoc.put("another_boolean", ""+r.nextBoolean());
|
sdoc.put("another_boolean", ""+r.nextBoolean());
|
||||||
|
|
||||||
|
buffers[bufnum] = getBytes(sdoc);
|
||||||
JavaBinCodec javabin = new JavaBinCodec();
|
|
||||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
|
||||||
javabin.marshal(sdoc, os);
|
|
||||||
os.toByteArray();
|
|
||||||
buffers[bufnum] = os.toByteArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
|
@ -52,9 +52,6 @@
|
|||||||
<!-- redefine the clover setup, because we dont want to run clover for the test-framework -->
|
<!-- redefine the clover setup, because we dont want to run clover for the test-framework -->
|
||||||
<target name="-clover.setup" if="run.clover"/>
|
<target name="-clover.setup" if="run.clover"/>
|
||||||
|
|
||||||
<!-- redefine the test compilation, so it's just a no-op -->
|
|
||||||
<target name="compile-test"/>
|
|
||||||
|
|
||||||
<!-- redefine the forbidden apis for tests, as we check ourselves -->
|
<!-- redefine the forbidden apis for tests, as we check ourselves -->
|
||||||
<target name="-check-forbidden-tests" depends="-init-forbidden-apis,compile-core">
|
<target name="-check-forbidden-tests" depends="-init-forbidden-apis,compile-core">
|
||||||
<forbidden-apis suppressAnnotation="**.SuppressForbidden" signaturesFile="${common.dir}/tools/forbiddenApis/tests.txt" classpathref="forbidden-apis.allclasses.classpath">
|
<forbidden-apis suppressAnnotation="**.SuppressForbidden" signaturesFile="${common.dir}/tools/forbiddenApis/tests.txt" classpathref="forbidden-apis.allclasses.classpath">
|
||||||
|
@ -374,7 +374,7 @@ public class MiniSolrCloudCluster {
|
|||||||
* @throws Exception on error
|
* @throws Exception on error
|
||||||
*/
|
*/
|
||||||
public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
public JettySolrRunner startJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
||||||
jetty.start();
|
jetty.start(false);
|
||||||
jettys.add(jetty);
|
jettys.add(jetty);
|
||||||
return jetty;
|
return jetty;
|
||||||
}
|
}
|
||||||
|
@ -27,6 +27,7 @@ import java.util.List;
|
|||||||
import org.apache.solr.SolrTestCaseJ4;
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||||
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
||||||
|
import org.apache.solr.common.cloud.SolrZkClient;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
@ -143,6 +144,10 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
|
|||||||
/** The cluster */
|
/** The cluster */
|
||||||
protected static MiniSolrCloudCluster cluster;
|
protected static MiniSolrCloudCluster cluster;
|
||||||
|
|
||||||
|
protected SolrZkClient zkClient() {
|
||||||
|
return cluster.getSolrClient().getZkStateReader().getZkClient();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Call this to configure a cluster of n nodes.
|
* Call this to configure a cluster of n nodes.
|
||||||
*
|
*
|
||||||
|
11
solr/test-framework/src/test-files/log4j.properties
Normal file
11
solr/test-framework/src/test-files/log4j.properties
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
# Logging level
|
||||||
|
log4j.rootLogger=INFO, CONSOLE
|
||||||
|
|
||||||
|
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||||
|
log4j.appender.CONSOLE.Target=System.err
|
||||||
|
log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
|
||||||
|
log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
|
||||||
|
log4j.logger.org.apache.zookeeper=WARN
|
||||||
|
log4j.logger.org.apache.hadoop=WARN
|
||||||
|
log4j.logger.org.apache.directory=WARN
|
||||||
|
log4j.logger.org.apache.solr.hadoop=INFO
|
@ -0,0 +1,66 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import org.apache.solr.SolrTestCaseJ4;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.hamcrest.core.IsNot.not;
|
||||||
|
|
||||||
|
public class JettySolrRunnerTest extends SolrTestCaseJ4 {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRestartPorts() throws Exception {
|
||||||
|
|
||||||
|
Path solrHome = createTempDir();
|
||||||
|
Files.write(solrHome.resolve("solr.xml"), MiniSolrCloudCluster.DEFAULT_CLOUD_SOLR_XML.getBytes(Charset.defaultCharset()));
|
||||||
|
|
||||||
|
JettyConfig config = JettyConfig.builder().build();
|
||||||
|
|
||||||
|
JettySolrRunner jetty = new JettySolrRunner(solrHome.toString(), config);
|
||||||
|
try {
|
||||||
|
jetty.start();
|
||||||
|
|
||||||
|
URL url = jetty.getBaseUrl();
|
||||||
|
int usedPort = url.getPort();
|
||||||
|
|
||||||
|
jetty.stop();
|
||||||
|
jetty.start();
|
||||||
|
|
||||||
|
assertEquals("After restart, jetty port should be the same", usedPort, jetty.getBaseUrl().getPort());
|
||||||
|
|
||||||
|
jetty.stop();
|
||||||
|
jetty.start(false);
|
||||||
|
|
||||||
|
assertThat("After restart, jetty port should be different", jetty.getBaseUrl().getPort(), not(usedPort));
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (jetty.isRunning())
|
||||||
|
jetty.stop();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,105 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.cloud;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettyConfig;
|
||||||
|
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||||
|
import org.apache.solr.util.RevertDefaultThreadHandlerRule;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.RuleChain;
|
||||||
|
import org.junit.rules.TestRule;
|
||||||
|
|
||||||
|
@LuceneTestCase.SuppressSysoutChecks(bugUrl = "Solr logs to JUL")
|
||||||
|
public class MiniSolrCloudClusterTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static TestRule solrClassRules = RuleChain.outerRule(
|
||||||
|
new SystemPropertiesRestoreRule()).around(
|
||||||
|
new RevertDefaultThreadHandlerRule());
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorsInStartup() throws Exception {
|
||||||
|
|
||||||
|
AtomicInteger jettyIndex = new AtomicInteger();
|
||||||
|
|
||||||
|
MiniSolrCloudCluster cluster = null;
|
||||||
|
try {
|
||||||
|
cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||||
|
@Override
|
||||||
|
public JettySolrRunner startJettySolrRunner(String name, String context, JettyConfig config) throws Exception {
|
||||||
|
if (jettyIndex.incrementAndGet() != 2)
|
||||||
|
return super.startJettySolrRunner(name, context, config);
|
||||||
|
throw new IOException("Fake exception on startup!");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
fail("Expected an exception to be thrown from MiniSolrCloudCluster");
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
assertEquals("Error starting up MiniSolrCloudCluster", e.getMessage());
|
||||||
|
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||||
|
assertEquals("Fake exception on startup!", e.getSuppressed()[0].getMessage());
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
if (cluster != null)
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testErrorsInShutdown() throws Exception {
|
||||||
|
|
||||||
|
AtomicInteger jettyIndex = new AtomicInteger();
|
||||||
|
|
||||||
|
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(3, createTempDir(), JettyConfig.builder().build()) {
|
||||||
|
@Override
|
||||||
|
protected JettySolrRunner stopJettySolrRunner(JettySolrRunner jetty) throws Exception {
|
||||||
|
JettySolrRunner j = super.stopJettySolrRunner(jetty);
|
||||||
|
if (jettyIndex.incrementAndGet() == 2)
|
||||||
|
throw new IOException("Fake IOException on shutdown!");
|
||||||
|
return j;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
cluster.shutdown();
|
||||||
|
fail("Expected an exception to be thrown on MiniSolrCloudCluster shutdown");
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
assertEquals("Error shutting down MiniSolrCloudCluster", e.getMessage());
|
||||||
|
assertEquals("Expected one suppressed exception", 1, e.getSuppressed().length);
|
||||||
|
assertEquals("Fake IOException on shutdown!", e.getSuppressed()[0].getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testExtraFilters() throws Exception {
|
||||||
|
JettyConfig.Builder jettyConfig = JettyConfig.builder();
|
||||||
|
jettyConfig.waitForLoadingCoresToFinish(null);
|
||||||
|
jettyConfig.withFilter(JettySolrRunner.DebugFilter.class, "*");
|
||||||
|
MiniSolrCloudCluster cluster = new MiniSolrCloudCluster(random().nextInt(3) + 1, createTempDir(), jettyConfig.build());
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user