mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
97693234de
|
@ -296,7 +296,7 @@ def checkSummary(fullPath):
|
|||
print()
|
||||
print(fullPath)
|
||||
printed = True
|
||||
print(' missing: %s' % unescapeHTML(lastHREF))
|
||||
print(' missing description: %s' % unescapeHTML(lastHREF))
|
||||
anyMissing = True
|
||||
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
|
||||
if not printed:
|
||||
|
|
|
@ -57,6 +57,8 @@ API Changes
|
|||
instead, which derived from the UH. WholeBreakIterator and
|
||||
CustomSeparatorBreakIterator were moved to UH's package. (David Smiley)
|
||||
|
||||
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
|
||||
|
@ -88,6 +90,10 @@ Optimizations
|
|||
values using different numbers of bits per value if this proves to save
|
||||
storage. (Adrien Grand)
|
||||
|
||||
* LUCENE-7845: Enhance spatial-extras RecursivePrefixTreeStrategy queries when the
|
||||
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
|
||||
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||
|
@ -99,6 +105,8 @@ Other
|
|||
* LUCENE-7753: Make fields static when possible.
|
||||
(Daniel Jelinski via Adrien Grand)
|
||||
|
||||
* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi)
|
||||
|
||||
======================= Lucene 6.7.0 =======================
|
||||
|
||||
Other
|
||||
|
@ -107,6 +115,10 @@ Other
|
|||
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
|
||||
Uwe Schindler, Dawid Weiss)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
|
||||
|
||||
======================= Lucene 6.6.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -74,3 +74,9 @@ collecting TopDocs for each group, but instead takes a GroupReducer that will
|
|||
perform any type of reduction on the top groups collected on a first-pass. To
|
||||
reproduce the old behaviour of SecondPassGroupingCollector, you should instead
|
||||
use TopGroupsCollector.
|
||||
|
||||
## Removed legacy numerics (LUCENE-7850)
|
||||
|
||||
Support for legacy numerics has been removed since legacy numerics had been
|
||||
deprecated since Lucene 6.0. Points should be used instead, see
|
||||
org.apache.lucene.index.PointValues for an introduction.
|
||||
|
|
|
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Emits the entire input as a single token.
|
||||
*/
|
||||
|
@ -41,16 +43,16 @@ public final class KeywordTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
public KeywordTokenizer(int bufferSize) {
|
||||
if (bufferSize <= 0) {
|
||||
throw new IllegalArgumentException("bufferSize must be > 0");
|
||||
if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
|
||||
}
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
}
|
||||
|
||||
public KeywordTokenizer(AttributeFactory factory, int bufferSize) {
|
||||
super(factory);
|
||||
if (bufferSize <= 0) {
|
||||
throw new IllegalArgumentException("bufferSize must be > 0");
|
||||
if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
|
||||
}
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
}
|
||||
|
|
|
@ -16,26 +16,39 @@
|
|||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Factory for {@link KeywordTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory"/>
|
||||
* <tokenizer class="solr.KeywordTokenizerFactory" maxTokenLen="256"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* Options:
|
||||
* <ul>
|
||||
* <li>maxTokenLen: max token length, should be greater than 0 and less than
|
||||
* MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
|
||||
* else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class KeywordTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLen;
|
||||
|
||||
/** Creates a new KeywordTokenizerFactory */
|
||||
public KeywordTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
maxTokenLen = getInt(args, "maxTokenLen", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -43,6 +56,6 @@ public class KeywordTokenizerFactory extends TokenizerFactory {
|
|||
|
||||
@Override
|
||||
public KeywordTokenizer create(AttributeFactory factory) {
|
||||
return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
|
||||
return new KeywordTokenizer(factory, maxTokenLen);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,20 @@ public class LetterTokenizer extends CharTokenizer {
|
|||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new LetterTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
|
||||
*/
|
||||
public LetterTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
/** Collects only characters which satisfy
|
||||
* {@link Character#isLetter(int)}.*/
|
||||
@Override
|
||||
|
|
|
@ -17,25 +17,40 @@
|
|||
package org.apache.lucene.analysis.core;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Factory for {@link LetterTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LetterTokenizerFactory"/>
|
||||
* <tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="256"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
* Options:
|
||||
* <ul>
|
||||
* <li>maxTokenLen: max token length, must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
|
||||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class LetterTokenizerFactory extends TokenizerFactory {
|
||||
private final int maxTokenLen;
|
||||
|
||||
/** Creates a new LetterTokenizerFactory */
|
||||
public LetterTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -43,6 +58,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
|
|||
|
||||
@Override
|
||||
public LetterTokenizer create(AttributeFactory factory) {
|
||||
return new LetterTokenizer(factory);
|
||||
return new LetterTokenizer(factory, maxTokenLen);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,6 +50,19 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
|
|||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new LowerCaseTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
*/
|
||||
public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
/** Converts char to lower case
|
||||
* {@link Character#toLowerCase(int)}.*/
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
@ -25,20 +26,36 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseTokenizer}.
|
||||
* Factory for {@link LowerCaseTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LowerCaseTokenizerFactory"/>
|
||||
* </analyzer>
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
* <p>
|
||||
* Options:
|
||||
* <ul>
|
||||
* <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
|
||||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
|
||||
|
||||
/** Creates a new LowerCaseTokenizerFactory */
|
||||
public LowerCaseTokenizerFactory(Map<String,String> args) {
|
||||
private final int maxTokenLen;
|
||||
|
||||
/**
|
||||
* Creates a new LowerCaseTokenizerFactory
|
||||
*/
|
||||
public LowerCaseTokenizerFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -46,11 +63,13 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
|
|||
|
||||
@Override
|
||||
public LowerCaseTokenizer create(AttributeFactory factory) {
|
||||
return new LowerCaseTokenizer(factory);
|
||||
return new LowerCaseTokenizer(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AbstractAnalysisFactory getMultiTermComponent() {
|
||||
return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs()));
|
||||
Map map = new HashMap<>(getOriginalArgs());
|
||||
map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
|
||||
return new LowerCaseFilterFactory(map);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -58,7 +58,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <ul>
|
||||
* <li><code>wordset</code> - This is the default format, which supports one word per
|
||||
* line (including any intra-word whitespace) and allows whole line comments
|
||||
* begining with the "#" character. Blank lines are ignored. See
|
||||
* beginning with the "#" character. Blank lines are ignored. See
|
||||
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
|
||||
* </li>
|
||||
* <li><code>snowball</code> - This format allows for multiple words specified on each
|
||||
|
|
|
@ -47,6 +47,19 @@ public final class UnicodeWhitespaceTokenizer extends CharTokenizer {
|
|||
public UnicodeWhitespaceTokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new UnicodeWhitespaceTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
*/
|
||||
public UnicodeWhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
/** Collects only characters which do not satisfy Unicode's WHITESPACE property. */
|
||||
@Override
|
||||
|
|
|
@ -46,6 +46,19 @@ public final class WhitespaceTokenizer extends CharTokenizer {
|
|||
public WhitespaceTokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new WhitespaceTokenizer using a given
|
||||
* {@link org.apache.lucene.util.AttributeFactory}.
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
*/
|
||||
public WhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory, maxTokenLen);
|
||||
}
|
||||
|
||||
/** Collects only characters which do not satisfy
|
||||
* {@link Character#isWhitespace(int)}.*/
|
||||
|
|
|
@ -22,15 +22,18 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* Factory for {@link WhitespaceTokenizer}.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"/>
|
||||
* <tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode" maxTokenLen="256"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*
|
||||
|
@ -38,6 +41,9 @@ import org.apache.lucene.util.AttributeFactory;
|
|||
* <ul>
|
||||
* <li>rule: either "java" for {@link WhitespaceTokenizer}
|
||||
* or "unicode" for {@link UnicodeWhitespaceTokenizer}</li>
|
||||
* <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
|
||||
* It is rare to need to change this
|
||||
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
|
||||
* </ul>
|
||||
*/
|
||||
public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
||||
|
@ -46,13 +52,17 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
|||
private static final Collection<String> RULE_NAMES = Arrays.asList(RULE_JAVA, RULE_UNICODE);
|
||||
|
||||
private final String rule;
|
||||
private final int maxTokenLen;
|
||||
|
||||
/** Creates a new WhitespaceTokenizerFactory */
|
||||
public WhitespaceTokenizerFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
|
||||
rule = get(args, "rule", RULE_NAMES, RULE_JAVA);
|
||||
|
||||
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
|
@ -62,9 +72,9 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
|
|||
public Tokenizer create(AttributeFactory factory) {
|
||||
switch (rule) {
|
||||
case RULE_JAVA:
|
||||
return new WhitespaceTokenizer(factory);
|
||||
return new WhitespaceTokenizer(factory, maxTokenLen);
|
||||
case RULE_UNICODE:
|
||||
return new UnicodeWhitespaceTokenizer(factory);
|
||||
return new UnicodeWhitespaceTokenizer(factory, maxTokenLen);
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
|
||||
|
||||
/**
|
||||
* An abstract base class for simple, character-oriented tokenizers.
|
||||
* <p>
|
||||
|
@ -50,6 +52,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
* Creates a new {@link CharTokenizer} instance
|
||||
*/
|
||||
public CharTokenizer() {
|
||||
this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -60,6 +63,23 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
*/
|
||||
public CharTokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link CharTokenizer} instance
|
||||
*
|
||||
* @param factory the attribute factory to use for this {@link Tokenizer}
|
||||
* @param maxTokenLen maximum token length the tokenizer will emit.
|
||||
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
|
||||
* @throws IllegalArgumentException if maxTokenLen is invalid.
|
||||
*/
|
||||
public CharTokenizer(AttributeFactory factory, int maxTokenLen) {
|
||||
super(factory);
|
||||
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
|
||||
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
|
||||
}
|
||||
this.maxTokenLen = maxTokenLen;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -193,9 +213,10 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
|
||||
private static final int MAX_WORD_LEN = 255;
|
||||
public static final int DEFAULT_MAX_WORD_LEN = 255;
|
||||
private static final int IO_BUFFER_SIZE = 4096;
|
||||
|
||||
private final int maxTokenLen;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
|
@ -256,7 +277,7 @@ public abstract class CharTokenizer extends Tokenizer {
|
|||
}
|
||||
end += charCount;
|
||||
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
|
||||
if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test
|
||||
if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
|
||||
break;
|
||||
}
|
||||
} else if (length > 0) { // at non-Letter w/ chars
|
||||
|
|
|
@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet;
|
|||
|
||||
/**
|
||||
* This file contains unicode properties used by various {@link CharTokenizer}s.
|
||||
* The data was created using ICU4J v56.1.0.0
|
||||
* The data was created using ICU4J v59.1.0.0
|
||||
* <p>
|
||||
* Unicode version: 8.0.0.0
|
||||
* Unicode version: 9.0.0.0
|
||||
*/
|
||||
public final class UnicodeProps {
|
||||
private UnicodeProps() {}
|
||||
|
||||
/** Unicode version that was used to generate this file: {@value} */
|
||||
public static final String UNICODE_VERSION = "8.0.0.0";
|
||||
public static final String UNICODE_VERSION = "9.0.0.0";
|
||||
|
||||
/** Bitset with Unicode WHITESPACE code points. */
|
||||
public static final Bits WHITESPACE = createBits(
|
||||
|
|
|
@ -53,7 +53,7 @@
|
|||
<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
|
||||
characters as described before, between any two word characters a digit
|
||||
in the range 0 to 9 may be specified. The absence of a digit is equivalent
|
||||
to zero. The '.' character is reserved to indicate begining or ending
|
||||
to zero. The '.' character is reserved to indicate beginning or ending
|
||||
of words. -->
|
||||
<!ELEMENT patterns (#PCDATA)>
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@
|
|||
<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
|
||||
characters as described before, between any two word characters a digit
|
||||
in the range 0 to 9 may be specified. The absence of a digit is equivalent
|
||||
to zero. The '.' character is reserved to indicate begining or ending
|
||||
to zero. The '.' character is reserved to indicate beginning or ending
|
||||
of words. -->
|
||||
<!ELEMENT patterns (#PCDATA)>
|
||||
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
public class TestKeywordTokenizer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testSimple() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
KeywordTokenizer tokenizer = new KeywordTokenizer();
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer \ud801\udc1ctest"});
|
||||
}
|
||||
|
||||
public void testFactory() {
|
||||
Map<String, String> args = new HashMap<>();
|
||||
KeywordTokenizerFactory factory = new KeywordTokenizerFactory(args);
|
||||
AttributeFactory attributeFactory = newAttributeFactory();
|
||||
Tokenizer tokenizer = factory.create(attributeFactory);
|
||||
assertEquals(KeywordTokenizer.class, tokenizer.getClass());
|
||||
}
|
||||
|
||||
private Map<String, String> makeArgs(String... args) {
|
||||
Map<String, String> ret = new HashMap<>();
|
||||
for (int idx = 0; idx < args.length; idx += 2) {
|
||||
ret.put(args[idx], args[idx + 1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void testParamsFactory() throws IOException {
|
||||
// negative maxTokenLen
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "-1")));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
|
||||
|
||||
// zero maxTokenLen
|
||||
iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "0")));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
|
||||
|
||||
// Added random param, should throw illegal error
|
||||
iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "255", "randomParam", "rValue")));
|
||||
assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
|
||||
|
||||
// tokeniser will never split, no matter what is passed,
|
||||
// but the buffer will not be more than length of the token
|
||||
|
||||
KeywordTokenizerFactory factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "5"));
|
||||
AttributeFactory attributeFactory = newAttributeFactory();
|
||||
Tokenizer tokenizer = factory.create(attributeFactory);
|
||||
StringReader reader = new StringReader("Tokenizertest");
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"Tokenizertest"});
|
||||
|
||||
// tokeniser will never split, no matter what is passed,
|
||||
// but the buffer will not be more than length of the token
|
||||
factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "2"));
|
||||
attributeFactory = newAttributeFactory();
|
||||
tokenizer = factory.create(attributeFactory);
|
||||
reader = new StringReader("Tokenizer\u00A0test");
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer\u00A0test"});
|
||||
}
|
||||
}
|
|
@ -54,4 +54,55 @@ public class TestUnicodeWhitespaceTokenizer extends BaseTokenStreamTestCase {
|
|||
assertEquals(UnicodeWhitespaceTokenizer.class, tokenizer.getClass());
|
||||
}
|
||||
|
||||
private Map<String, String> makeArgs(String... args) {
|
||||
Map<String, String> ret = new HashMap<>();
|
||||
for (int idx = 0; idx < args.length; idx += 2) {
|
||||
ret.put(args[idx], args[idx + 1]);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void testParamsFactory() throws IOException {
|
||||
|
||||
|
||||
// negative maxTokenLen
|
||||
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "-1")));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
|
||||
|
||||
// zero maxTokenLen
|
||||
iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "0")));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
|
||||
|
||||
// Added random param, should throw illegal error
|
||||
iae = expectThrows(IllegalArgumentException.class, () ->
|
||||
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "255", "randomParam", "rValue")));
|
||||
assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
|
||||
|
||||
// tokeniser will split at 5, Token | izer, no matter what happens
|
||||
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "5"));
|
||||
AttributeFactory attributeFactory = newAttributeFactory();
|
||||
Tokenizer tokenizer = factory.create(attributeFactory);
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"Token", "izer", "\ud801\udc1ctes", "t"});
|
||||
|
||||
// tokeniser will split at 2, To | ke | ni | ze | r, no matter what happens
|
||||
factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "2"));
|
||||
attributeFactory = newAttributeFactory();
|
||||
tokenizer = factory.create(attributeFactory);
|
||||
reader = new StringReader("Tokenizer\u00A0test");
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"To", "ke", "ni", "ze", "r", "te", "st"});
|
||||
|
||||
// tokeniser will split at 10, no matter what happens,
|
||||
// but tokens' length are less than that
|
||||
factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "10"));
|
||||
attributeFactory = newAttributeFactory();
|
||||
tokenizer = factory.create(attributeFactory);
|
||||
reader = new StringReader("Tokenizer\u00A0test");
|
||||
tokenizer.setReader(reader);
|
||||
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer", "test"});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
@ -89,6 +91,99 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
/*
|
||||
* tests the max word length passed as parameter - tokenizer will split at the passed position char no matter what happens
|
||||
*/
|
||||
public void testCustomMaxTokenLength() throws IOException {
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
builder.append("A");
|
||||
}
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
|
||||
// Tricky, passing two copies of the string to the reader....
|
||||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT),
|
||||
builder.toString().toLowerCase(Locale.ROOT) });
|
||||
|
||||
Exception e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), -1));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
|
||||
|
||||
tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
|
||||
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString(), builder.toString()});
|
||||
|
||||
|
||||
// Let's test that we can get a token longer than 255 through.
|
||||
builder.setLength(0);
|
||||
for (int i = 0; i < 500; i++) {
|
||||
builder.append("Z");
|
||||
}
|
||||
tokenizer = new LetterTokenizer(newAttributeFactory(), 500);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
|
||||
|
||||
|
||||
// Just to be sure what is happening here, token lengths of zero make no sense,
|
||||
// Let's try the edge cases, token > I/O buffer (4096)
|
||||
builder.setLength(0);
|
||||
for (int i = 0; i < 600; i++) {
|
||||
builder.append("aUrOkIjq"); // 600 * 8 = 4800 chars.
|
||||
}
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), 0));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
|
||||
|
||||
tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
|
||||
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new KeywordTokenizer(newAttributeFactory(), 0));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new KeywordTokenizer(newAttributeFactory(), 10_000_000));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
|
||||
|
||||
|
||||
tokenizer = new KeywordTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LetterTokenizer(newAttributeFactory(), 0));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new LetterTokenizer(newAttributeFactory(), 2_000_000));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 2000000", e.getMessage());
|
||||
|
||||
tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new WhitespaceTokenizer(newAttributeFactory(), 0));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class, () ->
|
||||
new WhitespaceTokenizer(newAttributeFactory(), 3_000_000));
|
||||
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 3000000", e.getMessage());
|
||||
|
||||
tokenizer = new WhitespaceTokenizer(newAttributeFactory(), 4800);
|
||||
tokenizer.setReader(new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* tests the max word length of 255 with a surrogate pair at position 255
|
||||
|
|
|
@ -168,11 +168,14 @@ FFE3>
|
|||
1134D>
|
||||
11366..1136C>
|
||||
11370..11374>
|
||||
11442>
|
||||
11446>
|
||||
114C2..114C3>
|
||||
115BF..115C0>
|
||||
1163F>
|
||||
116B6..116B7>
|
||||
1172B>
|
||||
11C3F>
|
||||
16AF0..16AF4>
|
||||
16F8F..16F9F>
|
||||
1D167..1D169>
|
||||
|
@ -181,6 +184,8 @@ FFE3>
|
|||
1D185..1D18B>
|
||||
1D1AA..1D1AD>
|
||||
1E8D0..1E8D6>
|
||||
1E944..1E946>
|
||||
1E948..1E94A>
|
||||
|
||||
# Latin script "composed" that do not further decompose, so decompose here
|
||||
# These are from AsciiFoldingFilter
|
||||
|
|
|
@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
|||
112F7>0037 # KHUDAWADI DIGIT SEVEN
|
||||
112F8>0038 # KHUDAWADI DIGIT EIGHT
|
||||
112F9>0039 # KHUDAWADI DIGIT NINE
|
||||
11450>0030 # NEWA DIGIT ZERO
|
||||
11451>0031 # NEWA DIGIT ONE
|
||||
11452>0032 # NEWA DIGIT TWO
|
||||
11453>0033 # NEWA DIGIT THREE
|
||||
11454>0034 # NEWA DIGIT FOUR
|
||||
11455>0035 # NEWA DIGIT FIVE
|
||||
11456>0036 # NEWA DIGIT SIX
|
||||
11457>0037 # NEWA DIGIT SEVEN
|
||||
11458>0038 # NEWA DIGIT EIGHT
|
||||
11459>0039 # NEWA DIGIT NINE
|
||||
114D0>0030 # TIRHUTA DIGIT ZERO
|
||||
114D1>0031 # TIRHUTA DIGIT ONE
|
||||
114D2>0032 # TIRHUTA DIGIT TWO
|
||||
|
@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
|||
118E7>0037 # WARANG CITI DIGIT SEVEN
|
||||
118E8>0038 # WARANG CITI DIGIT EIGHT
|
||||
118E9>0039 # WARANG CITI DIGIT NINE
|
||||
11C50>0030 # BHAIKSUKI DIGIT ZERO
|
||||
11C51>0031 # BHAIKSUKI DIGIT ONE
|
||||
11C52>0032 # BHAIKSUKI DIGIT TWO
|
||||
11C53>0033 # BHAIKSUKI DIGIT THREE
|
||||
11C54>0034 # BHAIKSUKI DIGIT FOUR
|
||||
11C55>0035 # BHAIKSUKI DIGIT FIVE
|
||||
11C56>0036 # BHAIKSUKI DIGIT SIX
|
||||
11C57>0037 # BHAIKSUKI DIGIT SEVEN
|
||||
11C58>0038 # BHAIKSUKI DIGIT EIGHT
|
||||
11C59>0039 # BHAIKSUKI DIGIT NINE
|
||||
16A60>0030 # MRO DIGIT ZERO
|
||||
16A61>0031 # MRO DIGIT ONE
|
||||
16A62>0032 # MRO DIGIT TWO
|
||||
|
@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
|
|||
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
|
||||
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
|
||||
16B59>0039 # PAHAWH HMONG DIGIT NINE
|
||||
1E950>0030 # ADLAM DIGIT ZERO
|
||||
1E951>0031 # ADLAM DIGIT ONE
|
||||
1E952>0032 # ADLAM DIGIT TWO
|
||||
1E953>0033 # ADLAM DIGIT THREE
|
||||
1E954>0034 # ADLAM DIGIT FOUR
|
||||
1E955>0035 # ADLAM DIGIT FIVE
|
||||
1E956>0036 # ADLAM DIGIT SIX
|
||||
1E957>0037 # ADLAM DIGIT SEVEN
|
||||
1E958>0038 # ADLAM DIGIT EIGHT
|
||||
1E959>0039 # ADLAM DIGIT NINE
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 1999-2014, International Business Machines
|
||||
# Copyright (C) 1999-2016, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: nfc.txt
|
||||
|
@ -7,7 +7,7 @@
|
|||
#
|
||||
# Complete data for Unicode NFC normalization.
|
||||
|
||||
* Unicode 7.0.0
|
||||
* Unicode 9.0.0
|
||||
|
||||
# Canonical_Combining_Class (ccc) values
|
||||
0300..0314:230
|
||||
|
@ -129,6 +129,8 @@
|
|||
0825..0827:230
|
||||
0829..082D:230
|
||||
0859..085B:220
|
||||
08D4..08E1:230
|
||||
08E3:220
|
||||
08E4..08E5:230
|
||||
08E6:220
|
||||
08E7..08E8:230
|
||||
|
@ -232,6 +234,7 @@
|
|||
1DCF:220
|
||||
1DD0:202
|
||||
1DD1..1DF5:230
|
||||
1DFB:230
|
||||
1DFC:233
|
||||
1DFD:220
|
||||
1DFE:230
|
||||
|
@ -260,7 +263,7 @@
|
|||
3099..309A:8
|
||||
A66F:230
|
||||
A674..A67D:230
|
||||
A69F:230
|
||||
A69E..A69F:230
|
||||
A6F0..A6F1:230
|
||||
A806:9
|
||||
A8C4:9
|
||||
|
@ -280,6 +283,7 @@ ABED:9
|
|||
FB1E:26
|
||||
FE20..FE26:230
|
||||
FE27..FE2D:220
|
||||
FE2E..FE2F:230
|
||||
101FD:220
|
||||
102E0:220
|
||||
10376..1037A:230
|
||||
|
@ -299,6 +303,7 @@ FE27..FE2D:220
|
|||
11133..11134:9
|
||||
11173:7
|
||||
111C0:9
|
||||
111CA:7
|
||||
11235:9
|
||||
11236:7
|
||||
112E9:7
|
||||
|
@ -307,6 +312,8 @@ FE27..FE2D:220
|
|||
1134D:9
|
||||
11366..1136C:230
|
||||
11370..11374:230
|
||||
11442:9
|
||||
11446:7
|
||||
114C2:9
|
||||
114C3:7
|
||||
115BF:9
|
||||
|
@ -314,6 +321,8 @@ FE27..FE2D:220
|
|||
1163F:9
|
||||
116B6:9
|
||||
116B7:7
|
||||
1172B:9
|
||||
11C3F:9
|
||||
16AF0..16AF4:1
|
||||
16B30..16B36:230
|
||||
1BC9E:1
|
||||
|
@ -326,7 +335,14 @@ FE27..FE2D:220
|
|||
1D18A..1D18B:220
|
||||
1D1AA..1D1AD:230
|
||||
1D242..1D244:230
|
||||
1E000..1E006:230
|
||||
1E008..1E018:230
|
||||
1E01B..1E021:230
|
||||
1E023..1E024:230
|
||||
1E026..1E02A:230
|
||||
1E8D0..1E8D6:220
|
||||
1E944..1E949:230
|
||||
1E94A:7
|
||||
|
||||
# Canonical decomposition mappings
|
||||
00C0>0041 0300 # one-way: diacritic 0300
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 1999-2014, International Business Machines
|
||||
# Copyright (C) 1999-2016, International Business Machines
|
||||
# Corporation and others. All Rights Reserved.
|
||||
#
|
||||
# file name: nfkc.txt
|
||||
|
@ -11,7 +11,7 @@
|
|||
# to NFKC one-way mappings.
|
||||
# Use this file as the second gennorm2 input file after nfc.txt.
|
||||
|
||||
* Unicode 7.0.0
|
||||
* Unicode 9.0.0
|
||||
|
||||
00A0>0020
|
||||
00A8>0020 0308
|
||||
|
@ -3675,6 +3675,7 @@ FFEE>25CB
|
|||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F23B>914D
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Unicode Character Database
|
||||
# Copyright (c) 1991-2014 Unicode, Inc.
|
||||
# Copyright (c) 1991-2016 Unicode, Inc.
|
||||
# For terms of use, see http://www.unicode.org/terms_of_use.html
|
||||
# For documentation, see http://www.unicode.org/reports/tr44/
|
||||
#
|
||||
|
@ -12,7 +12,7 @@
|
|||
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
|
||||
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
|
||||
|
||||
* Unicode 7.0.0
|
||||
* Unicode 9.0.0
|
||||
|
||||
0041>0061
|
||||
0042>0062
|
||||
|
@ -632,8 +632,22 @@
|
|||
10CD>2D2D
|
||||
10FC>10DC
|
||||
115F..1160>
|
||||
13F8>13F0
|
||||
13F9>13F1
|
||||
13FA>13F2
|
||||
13FB>13F3
|
||||
13FC>13F4
|
||||
13FD>13F5
|
||||
17B4..17B5>
|
||||
180B..180E>
|
||||
1C80>0432
|
||||
1C81>0434
|
||||
1C82>043E
|
||||
1C83>0441
|
||||
1C84..1C85>0442
|
||||
1C86>044A
|
||||
1C87>0463
|
||||
1C88>A64B
|
||||
1D2C>0061
|
||||
1D2D>00E6
|
||||
1D2E>0062
|
||||
|
@ -2382,14 +2396,99 @@ A7AA>0266
|
|||
A7AB>025C
|
||||
A7AC>0261
|
||||
A7AD>026C
|
||||
A7AE>026A
|
||||
A7B0>029E
|
||||
A7B1>0287
|
||||
A7B2>029D
|
||||
A7B3>AB53
|
||||
A7B4>A7B5
|
||||
A7B6>A7B7
|
||||
A7F8>0127
|
||||
A7F9>0153
|
||||
AB5C>A727
|
||||
AB5D>AB37
|
||||
AB5E>026B
|
||||
AB5F>AB52
|
||||
AB70>13A0
|
||||
AB71>13A1
|
||||
AB72>13A2
|
||||
AB73>13A3
|
||||
AB74>13A4
|
||||
AB75>13A5
|
||||
AB76>13A6
|
||||
AB77>13A7
|
||||
AB78>13A8
|
||||
AB79>13A9
|
||||
AB7A>13AA
|
||||
AB7B>13AB
|
||||
AB7C>13AC
|
||||
AB7D>13AD
|
||||
AB7E>13AE
|
||||
AB7F>13AF
|
||||
AB80>13B0
|
||||
AB81>13B1
|
||||
AB82>13B2
|
||||
AB83>13B3
|
||||
AB84>13B4
|
||||
AB85>13B5
|
||||
AB86>13B6
|
||||
AB87>13B7
|
||||
AB88>13B8
|
||||
AB89>13B9
|
||||
AB8A>13BA
|
||||
AB8B>13BB
|
||||
AB8C>13BC
|
||||
AB8D>13BD
|
||||
AB8E>13BE
|
||||
AB8F>13BF
|
||||
AB90>13C0
|
||||
AB91>13C1
|
||||
AB92>13C2
|
||||
AB93>13C3
|
||||
AB94>13C4
|
||||
AB95>13C5
|
||||
AB96>13C6
|
||||
AB97>13C7
|
||||
AB98>13C8
|
||||
AB99>13C9
|
||||
AB9A>13CA
|
||||
AB9B>13CB
|
||||
AB9C>13CC
|
||||
AB9D>13CD
|
||||
AB9E>13CE
|
||||
AB9F>13CF
|
||||
ABA0>13D0
|
||||
ABA1>13D1
|
||||
ABA2>13D2
|
||||
ABA3>13D3
|
||||
ABA4>13D4
|
||||
ABA5>13D5
|
||||
ABA6>13D6
|
||||
ABA7>13D7
|
||||
ABA8>13D8
|
||||
ABA9>13D9
|
||||
ABAA>13DA
|
||||
ABAB>13DB
|
||||
ABAC>13DC
|
||||
ABAD>13DD
|
||||
ABAE>13DE
|
||||
ABAF>13DF
|
||||
ABB0>13E0
|
||||
ABB1>13E1
|
||||
ABB2>13E2
|
||||
ABB3>13E3
|
||||
ABB4>13E4
|
||||
ABB5>13E5
|
||||
ABB6>13E6
|
||||
ABB7>13E7
|
||||
ABB8>13E8
|
||||
ABB9>13E9
|
||||
ABBA>13EA
|
||||
ABBB>13EB
|
||||
ABBC>13EC
|
||||
ABBD>13ED
|
||||
ABBE>13EE
|
||||
ABBF>13EF
|
||||
F900>8C48
|
||||
F901>66F4
|
||||
F902>8ECA
|
||||
|
@ -3766,6 +3865,93 @@ FFF0..FFF8>
|
|||
10425>1044D
|
||||
10426>1044E
|
||||
10427>1044F
|
||||
104B0>104D8
|
||||
104B1>104D9
|
||||
104B2>104DA
|
||||
104B3>104DB
|
||||
104B4>104DC
|
||||
104B5>104DD
|
||||
104B6>104DE
|
||||
104B7>104DF
|
||||
104B8>104E0
|
||||
104B9>104E1
|
||||
104BA>104E2
|
||||
104BB>104E3
|
||||
104BC>104E4
|
||||
104BD>104E5
|
||||
104BE>104E6
|
||||
104BF>104E7
|
||||
104C0>104E8
|
||||
104C1>104E9
|
||||
104C2>104EA
|
||||
104C3>104EB
|
||||
104C4>104EC
|
||||
104C5>104ED
|
||||
104C6>104EE
|
||||
104C7>104EF
|
||||
104C8>104F0
|
||||
104C9>104F1
|
||||
104CA>104F2
|
||||
104CB>104F3
|
||||
104CC>104F4
|
||||
104CD>104F5
|
||||
104CE>104F6
|
||||
104CF>104F7
|
||||
104D0>104F8
|
||||
104D1>104F9
|
||||
104D2>104FA
|
||||
104D3>104FB
|
||||
10C80>10CC0
|
||||
10C81>10CC1
|
||||
10C82>10CC2
|
||||
10C83>10CC3
|
||||
10C84>10CC4
|
||||
10C85>10CC5
|
||||
10C86>10CC6
|
||||
10C87>10CC7
|
||||
10C88>10CC8
|
||||
10C89>10CC9
|
||||
10C8A>10CCA
|
||||
10C8B>10CCB
|
||||
10C8C>10CCC
|
||||
10C8D>10CCD
|
||||
10C8E>10CCE
|
||||
10C8F>10CCF
|
||||
10C90>10CD0
|
||||
10C91>10CD1
|
||||
10C92>10CD2
|
||||
10C93>10CD3
|
||||
10C94>10CD4
|
||||
10C95>10CD5
|
||||
10C96>10CD6
|
||||
10C97>10CD7
|
||||
10C98>10CD8
|
||||
10C99>10CD9
|
||||
10C9A>10CDA
|
||||
10C9B>10CDB
|
||||
10C9C>10CDC
|
||||
10C9D>10CDD
|
||||
10C9E>10CDE
|
||||
10C9F>10CDF
|
||||
10CA0>10CE0
|
||||
10CA1>10CE1
|
||||
10CA2>10CE2
|
||||
10CA3>10CE3
|
||||
10CA4>10CE4
|
||||
10CA5>10CE5
|
||||
10CA6>10CE6
|
||||
10CA7>10CE7
|
||||
10CA8>10CE8
|
||||
10CA9>10CE9
|
||||
10CAA>10CEA
|
||||
10CAB>10CEB
|
||||
10CAC>10CEC
|
||||
10CAD>10CED
|
||||
10CAE>10CEE
|
||||
10CAF>10CEF
|
||||
10CB0>10CF0
|
||||
10CB1>10CF1
|
||||
10CB2>10CF2
|
||||
118A0>118C0
|
||||
118A1>118C1
|
||||
118A2>118C2
|
||||
|
@ -4803,6 +4989,40 @@ FFF0..FFF8>
|
|||
1D7FD>0037
|
||||
1D7FE>0038
|
||||
1D7FF>0039
|
||||
1E900>1E922
|
||||
1E901>1E923
|
||||
1E902>1E924
|
||||
1E903>1E925
|
||||
1E904>1E926
|
||||
1E905>1E927
|
||||
1E906>1E928
|
||||
1E907>1E929
|
||||
1E908>1E92A
|
||||
1E909>1E92B
|
||||
1E90A>1E92C
|
||||
1E90B>1E92D
|
||||
1E90C>1E92E
|
||||
1E90D>1E92F
|
||||
1E90E>1E930
|
||||
1E90F>1E931
|
||||
1E910>1E932
|
||||
1E911>1E933
|
||||
1E912>1E934
|
||||
1E913>1E935
|
||||
1E914>1E936
|
||||
1E915>1E937
|
||||
1E916>1E938
|
||||
1E917>1E939
|
||||
1E918>1E93A
|
||||
1E919>1E93B
|
||||
1E91A>1E93C
|
||||
1E91B>1E93D
|
||||
1E91C>1E93E
|
||||
1E91D>1E93F
|
||||
1E91E>1E940
|
||||
1E91F>1E941
|
||||
1E920>1E942
|
||||
1E921>1E943
|
||||
1EE00>0627
|
||||
1EE01>0628
|
||||
1EE02>062C
|
||||
|
@ -5067,6 +5287,7 @@ FFF0..FFF8>
|
|||
1F238>7533
|
||||
1F239>5272
|
||||
1F23A>55B6
|
||||
1F23B>914D
|
||||
1F240>3014 672C 3015
|
||||
1F241>3014 4E09 3015
|
||||
1F242>3014 4E8C 3015
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -53,7 +53,14 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
|
|||
new String[] { "我", "购买", "了", "道具", "和", "服装" }
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
public void testTraditionalChinese() throws Exception {
|
||||
assertAnalyzesTo(a, "我購買了道具和服裝。",
|
||||
new String[] { "我", "購買", "了", "道具", "和", "服裝"});
|
||||
assertAnalyzesTo(a, "定義切分字串的基本單位是訂定分詞標準的首要工作", // From http://godel.iis.sinica.edu.tw/CKIP/paper/wordsegment_standard.pdf
|
||||
new String[] { "定義", "切", "分", "字串", "的", "基本", "單位", "是", "訂定", "分詞", "標準", "的", "首要", "工作" });
|
||||
}
|
||||
|
||||
public void testChineseNumerics() throws Exception {
|
||||
assertAnalyzesTo(a, "9483", new String[] { "9483" });
|
||||
assertAnalyzesTo(a, "院內分機9483。",
|
||||
|
|
|
@ -63,7 +63,7 @@ import java.util.regex.Pattern;
|
|||
public class GenerateUTR30DataFiles {
|
||||
private static final String ICU_SVN_TAG_URL
|
||||
= "http://source.icu-project.org/repos/icu/icu/tags";
|
||||
private static final String ICU_RELEASE_TAG = "release-54-1";
|
||||
private static final String ICU_RELEASE_TAG = "release-58-1";
|
||||
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
|
||||
private static final String NFC_TXT = "nfc.txt";
|
||||
private static final String NFKC_TXT = "nfkc.txt";
|
||||
|
|
|
@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
|
|||
// ignored characters
|
||||
builder.add("\u0301", "");
|
||||
builder.add("\u00AD", "");
|
||||
builder.add("ґ", "г");
|
||||
builder.add("Ґ", "Г");
|
||||
|
||||
NormalizeCharMap normMap = builder.build();
|
||||
reader = new MappingCharFilter(normMap, reader);
|
||||
|
|
|
@ -52,10 +52,17 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testCapsTokenStream() throws Exception {
|
||||
Analyzer a = new UkrainianMorfologikAnalyzer();
|
||||
assertAnalyzesTo(a, "Цих Чайковського і Ґете.",
|
||||
new String[] { "Чайковське", "Чайковський", "Ґете" });
|
||||
new String[] { "Чайковське", "Чайковський", "Гете" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testCharNormalization() throws Exception {
|
||||
Analyzer a = new UkrainianMorfologikAnalyzer();
|
||||
assertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
|
||||
new String[] { "Гюмрі", "Гюмрі" });
|
||||
a.close();
|
||||
}
|
||||
|
||||
public void testSampleSentence() throws Exception {
|
||||
Analyzer a = new UkrainianMorfologikAnalyzer();
|
||||
assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.",
|
||||
|
|
|
@ -60,10 +60,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.legacy.LegacyIntField;
|
||||
import org.apache.lucene.legacy.LegacyLongField;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
@ -1114,9 +1110,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
|
||||
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
|
||||
doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
|
||||
// add numeric fields, to test if flex preserves encoding
|
||||
doc.add(new LegacyIntField("trieInt", id, Field.Store.NO));
|
||||
doc.add(new LegacyLongField("trieLong", (long) id, Field.Store.NO));
|
||||
|
||||
// add docvalues fields
|
||||
doc.add(new NumericDocValuesField("dvByte", (byte) id));
|
||||
|
@ -1294,51 +1287,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testNumericFields() throws Exception {
|
||||
for (String name : oldNames) {
|
||||
|
||||
Directory dir = oldIndexDirs.get(name);
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
for (int id=10; id<15; id++) {
|
||||
ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
|
||||
assertEquals("wrong number of hits", 1, hits.length);
|
||||
Document d = searcher.doc(hits[0].doc);
|
||||
assertEquals(String.valueOf(id), d.get("id"));
|
||||
|
||||
hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
|
||||
assertEquals("wrong number of hits", 1, hits.length);
|
||||
d = searcher.doc(hits[0].doc);
|
||||
assertEquals(String.valueOf(id), d.get("id"));
|
||||
}
|
||||
|
||||
// check that also lower-precision fields are ok
|
||||
ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
|
||||
assertEquals("wrong number of hits", 34, hits.length);
|
||||
|
||||
hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
|
||||
assertEquals("wrong number of hits", 34, hits.length);
|
||||
|
||||
// check decoding of terms
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt");
|
||||
TermsEnum termsEnum = LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
|
||||
while (termsEnum.next() != null) {
|
||||
int val = LegacyNumericUtils.prefixCodedToInt(termsEnum.term());
|
||||
assertTrue("value in id bounds", val >= 0 && val < 35);
|
||||
}
|
||||
|
||||
terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong");
|
||||
termsEnum = LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
|
||||
while (termsEnum.next() != null) {
|
||||
long val = LegacyNumericUtils.prefixCodedToLong(termsEnum.term());
|
||||
assertTrue("value in id bounds", val >= 0L && val < 35L);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException {
|
||||
final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
|
||||
if (VERBOSE) {
|
||||
|
|
|
@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4
|
|||
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
|
||||
/com.googlecode.mp4parser/isoparser = 1.1.18
|
||||
/com.healthmarketscience.jackcess/jackcess = 2.1.3
|
||||
/com.ibm.icu/icu4j = 56.1
|
||||
/com.ibm.icu/icu4j = 59.1
|
||||
/com.pff/java-libpst = 0.8.1
|
||||
|
||||
com.sun.jersey.version = 1.9
|
||||
|
@ -276,7 +276,7 @@ org.slf4j.version = 1.7.7
|
|||
/org.tukaani/xz = 1.5
|
||||
/rome/rome = 1.0
|
||||
|
||||
ua.net.nlp.morfologik-ukrainian-search.version = 3.7.5
|
||||
ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6
|
||||
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
|
||||
|
||||
/xerces/xercesImpl = 2.9.1
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
8dd6671f52165a0419e6de5e1016400875a90fa9
|
|
@ -0,0 +1 @@
|
|||
6f06e820cf4c8968bbbaae66ae0b33f6a256b57f
|
|
@ -1 +0,0 @@
|
|||
2b8c8fbd740164d220ca7d18605b8b2092e163e9
|
|
@ -0,0 +1 @@
|
|||
8d2c4bf006f59227bcba8885b4602b3a8b5bd799
|
|
@ -31,9 +31,7 @@
|
|||
<path id="classpath">
|
||||
<path refid="base.classpath"/>
|
||||
<path refid="spatialjar"/>
|
||||
<pathelement path="${backward-codecs.jar}" />
|
||||
<pathelement path="${queries.jar}" />
|
||||
<pathelement path="${misc.jar}" />
|
||||
<pathelement path="${spatial3d.jar}" />
|
||||
</path>
|
||||
|
||||
|
|
|
@ -25,11 +25,6 @@ import org.apache.lucene.document.StringField;
|
|||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyDoubleField;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -41,8 +36,6 @@ import org.apache.lucene.spatial.query.SpatialArgs;
|
|||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
|
||||
import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
|
@ -88,8 +81,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
*/
|
||||
public static FieldType DEFAULT_FIELDTYPE;
|
||||
|
||||
@Deprecated
|
||||
public static LegacyFieldType LEGACY_FIELDTYPE;
|
||||
static {
|
||||
// Default: pointValues + docValues
|
||||
FieldType type = new FieldType();
|
||||
|
@ -98,15 +89,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
type.setStored(false);
|
||||
type.freeze();
|
||||
DEFAULT_FIELDTYPE = type;
|
||||
// Legacy default: legacyNumerics + docValues
|
||||
LegacyFieldType legacyType = new LegacyFieldType();
|
||||
legacyType.setIndexOptions(IndexOptions.DOCS);
|
||||
legacyType.setNumericType(LegacyNumericType.DOUBLE);
|
||||
legacyType.setNumericPrecisionStep(8);// same as solr default
|
||||
legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
|
||||
legacyType.setStored(false);
|
||||
legacyType.freeze();
|
||||
LEGACY_FIELDTYPE = legacyType;
|
||||
}
|
||||
|
||||
public static final String SUFFIX_MINX = "__minX";
|
||||
|
@ -131,8 +113,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
private final boolean hasStored;
|
||||
private final boolean hasDocVals;
|
||||
private final boolean hasPointVals;
|
||||
// equiv to "hasLegacyNumerics":
|
||||
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
|
||||
private final FieldType xdlFieldType;
|
||||
|
||||
/**
|
||||
|
@ -142,15 +122,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
|
||||
* @deprecated LegacyNumerics will be removed
|
||||
*/
|
||||
@Deprecated
|
||||
public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates this strategy.
|
||||
* {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
|
||||
|
@ -179,23 +150,8 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
|
||||
numQuads++;
|
||||
}
|
||||
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
|
||||
if (hasPointVals) {
|
||||
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
|
||||
}
|
||||
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
|
||||
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
|
||||
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
|
||||
}
|
||||
numQuads++;
|
||||
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
|
||||
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
|
||||
legacyNumericFieldType.freeze();
|
||||
} else {
|
||||
legacyNumericFieldType = null;
|
||||
}
|
||||
|
||||
if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
|
||||
if (hasPointVals) { // if we have an index...
|
||||
xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
xdlFieldType.setIndexOptions(IndexOptions.DOCS);
|
||||
xdlFieldType.freeze();
|
||||
|
@ -242,12 +198,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
|
||||
fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
|
||||
}
|
||||
if (legacyNumericFieldType != null) {
|
||||
fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
|
||||
}
|
||||
if (xdlFieldType != null) {
|
||||
fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
|
||||
}
|
||||
|
@ -664,17 +614,12 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
private Query makeNumberTermQuery(String field, double number) {
|
||||
if (hasPointVals) {
|
||||
return DoublePoint.newExactQuery(field, number);
|
||||
} else if (legacyNumericFieldType != null) {
|
||||
BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
|
||||
return new TermQuery(new Term(field, bytes.get()));
|
||||
}
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a numeric range query based on FieldType
|
||||
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
|
||||
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
|
||||
*
|
||||
* @param fieldname field name. must not be <code>null</code>.
|
||||
|
@ -702,8 +647,6 @@ public class BBoxStrategy extends SpatialStrategy {
|
|||
}
|
||||
|
||||
return DoublePoint.newRangeQuery(fieldname, min, max);
|
||||
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
|
||||
return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
|
||||
}
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
}
|
||||
|
|
|
@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
/**
|
||||
* A TokenStream used internally by {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}.
|
||||
*
|
||||
* This is modelled after {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
class BytesRefIteratorTokenStream extends TokenStream {
|
||||
|
|
|
@ -18,18 +18,17 @@ package org.apache.lucene.spatial.prefix;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
|
||||
|
||||
|
@ -57,9 +56,22 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
|
||||
//levels doesn't actually matter; NumberRange based Shapes have their own "level".
|
||||
return super.createCellIteratorToIndex(shape, grid.getMaxLevels(), reuse);
|
||||
protected boolean isPointShape(Shape shape) {
|
||||
if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
|
||||
return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() == grid.getMaxLevels();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isGridAlignedShape(Shape shape) {
|
||||
// any UnitNRShape other than the world is a single cell/term
|
||||
if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
|
||||
return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() > 0;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Unsupported. */
|
||||
|
|
|
@ -21,8 +21,6 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -34,6 +32,10 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
|||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.locationtech.spatial4j.shape.Circle;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
/**
|
||||
* An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
|
||||
|
@ -163,7 +165,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
|
|||
}
|
||||
|
||||
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
|
||||
if (pointsOnly && !(shape instanceof Point)) {
|
||||
if (pointsOnly && !isPointShape(shape)) {
|
||||
throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
|
||||
}
|
||||
return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator
|
||||
|
@ -205,4 +207,16 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
|
|||
Shape inputShape, final int facetLevel, int maxCells) throws IOException {
|
||||
return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
|
||||
}
|
||||
|
||||
protected boolean isPointShape(Shape shape) {
|
||||
if (shape instanceof Point) {
|
||||
return true;
|
||||
} else if (shape instanceof Circle) {
|
||||
return ((Circle) shape).getRadius() == 0.0;
|
||||
} else if (shape instanceof Rectangle) {
|
||||
Rectangle rect = (Rectangle) shape;
|
||||
return rect.getWidth() == 0.0 && rect.getHeight() == 0.0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@ import java.util.ArrayList;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.spatial.prefix.tree.Cell;
|
||||
import org.apache.lucene.spatial.prefix.tree.CellIterator;
|
||||
import org.apache.lucene.spatial.prefix.tree.LegacyCell;
|
||||
|
@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
|||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
/**
|
||||
* A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeQuery}.
|
||||
|
@ -121,7 +122,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
|
||||
@Override
|
||||
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
|
||||
if (shape instanceof Point || !pruneLeafyBranches)
|
||||
if (!pruneLeafyBranches || isGridAlignedShape(shape))
|
||||
return super.createCellIteratorToIndex(shape, detailLevel, reuse);
|
||||
|
||||
List<Cell> cells = new ArrayList<>(4096);
|
||||
|
@ -177,6 +178,9 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
|
||||
|
||||
if (op == SpatialOperation.Intersects) {
|
||||
if (isGridAlignedShape(args.getShape())) {
|
||||
return makeGridShapeIntersectsQuery(args.getShape());
|
||||
}
|
||||
return new IntersectsPrefixTreeQuery(
|
||||
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel);
|
||||
} else if (op == SpatialOperation.IsWithin) {
|
||||
|
@ -189,4 +193,35 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
|
|||
}
|
||||
throw new UnsupportedSpatialOperation(op);
|
||||
}
|
||||
|
||||
/**
|
||||
* A quick check of the shape to see if it is perfectly aligned to a grid.
|
||||
* Points always are as they are indivisible. It's okay to return false
|
||||
* if the shape actually is aligned; this is an optimization hint.
|
||||
*/
|
||||
protected boolean isGridAlignedShape(Shape shape) {
|
||||
return isPointShape(shape);
|
||||
}
|
||||
|
||||
/** {@link #makeQuery(SpatialArgs)} specialized for the query being a grid square. */
|
||||
protected Query makeGridShapeIntersectsQuery(Shape gridShape) {
|
||||
assert isGridAlignedShape(gridShape);
|
||||
if (isPointsOnly()) {
|
||||
// Awesome; this will be equivalent to a TermQuery.
|
||||
Iterator<Cell> cellIterator = grid.getTreeCellIterator(gridShape, grid.getMaxLevels());
|
||||
// get last cell
|
||||
Cell cell = cellIterator.next();
|
||||
while (cellIterator.hasNext()) {
|
||||
int prevLevel = cell.getLevel();
|
||||
cell = cellIterator.next();
|
||||
assert prevLevel < cell.getLevel();
|
||||
}
|
||||
return new TermQuery(new Term(getFieldName(), cell.getTokenBytesWithLeaf(null)));
|
||||
} else {
|
||||
// Well there could be parent cells. But we can reduce the "scan level" which will be slower for a point query.
|
||||
// TODO: AVPTQ will still scan the bottom nonetheless; file an issue to eliminate that
|
||||
return new IntersectsPrefixTreeQuery(
|
||||
gridShape, getFieldName(), grid, getGrid().getMaxLevels(), getGrid().getMaxLevels() + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,11 +22,6 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.legacy.LegacyDoubleField;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.queries.function.FunctionRangeQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
|
@ -86,8 +81,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
*/
|
||||
public static FieldType DEFAULT_FIELDTYPE;
|
||||
|
||||
@Deprecated
|
||||
public static LegacyFieldType LEGACY_FIELDTYPE;
|
||||
static {
|
||||
// Default: pointValues + docValues
|
||||
FieldType type = new FieldType();
|
||||
|
@ -96,15 +89,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
type.setStored(false);
|
||||
type.freeze();
|
||||
DEFAULT_FIELDTYPE = type;
|
||||
// Legacy default: legacyNumerics
|
||||
LegacyFieldType legacyType = new LegacyFieldType();
|
||||
legacyType.setIndexOptions(IndexOptions.DOCS);
|
||||
legacyType.setNumericType(LegacyNumericType.DOUBLE);
|
||||
legacyType.setNumericPrecisionStep(8);// same as solr default
|
||||
legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
|
||||
legacyType.setStored(false);
|
||||
legacyType.freeze();
|
||||
LEGACY_FIELDTYPE = legacyType;
|
||||
}
|
||||
|
||||
public static final String SUFFIX_X = "__x";
|
||||
|
@ -117,8 +101,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
private final boolean hasStored;
|
||||
private final boolean hasDocVals;
|
||||
private final boolean hasPointVals;
|
||||
// equiv to "hasLegacyNumerics":
|
||||
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
|
||||
|
||||
/**
|
||||
* Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
|
||||
|
@ -127,18 +109,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
|
||||
* However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
|
||||
* since that requires docValues (the legacy config didn't have that).
|
||||
*
|
||||
* @deprecated LegacyNumerics will be removed
|
||||
*/
|
||||
@Deprecated
|
||||
public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
|
||||
* a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
|
||||
|
@ -159,21 +129,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
|
||||
numPairs++;
|
||||
}
|
||||
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
|
||||
if (hasPointVals) {
|
||||
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
|
||||
}
|
||||
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
|
||||
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
|
||||
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
|
||||
}
|
||||
numPairs++;
|
||||
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
|
||||
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
|
||||
legacyNumericFieldType.freeze();
|
||||
} else {
|
||||
legacyNumericFieldType = null;
|
||||
}
|
||||
this.fieldsLen = numPairs * 2;
|
||||
}
|
||||
|
||||
|
@ -209,10 +164,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
fields[++idx] = new DoublePoint(fieldNameX, point.getX());
|
||||
fields[++idx] = new DoublePoint(fieldNameY, point.getY());
|
||||
}
|
||||
if (legacyNumericFieldType != null) {
|
||||
fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
|
||||
}
|
||||
assert idx == fields.length - 1;
|
||||
return fields;
|
||||
}
|
||||
|
@ -268,7 +219,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
|
||||
/**
|
||||
* Returns a numeric range query based on FieldType
|
||||
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
|
||||
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
|
||||
*/
|
||||
private Query rangeQuery(String fieldName, Double min, Double max) {
|
||||
|
@ -283,8 +233,6 @@ public class PointVectorStrategy extends SpatialStrategy {
|
|||
|
||||
return DoublePoint.newRangeQuery(fieldName, min, max);
|
||||
|
||||
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
|
||||
return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
|
||||
}
|
||||
//TODO try doc-value range query?
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
|
|
|
@ -68,9 +68,6 @@ public class DistanceStrategyTest extends StrategyTestCase {
|
|||
strategy = BBoxStrategy.newInstance(ctx, "bbox");
|
||||
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
|
||||
|
||||
strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy");
|
||||
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
|
||||
|
||||
strategy = new SerializedDVStrategy(ctx, "serialized");
|
||||
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
|
||||
|
||||
|
|
|
@ -58,9 +58,7 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase {
|
|||
strategies.add(recursive_geohash);
|
||||
strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad"));
|
||||
strategies.add(PointVectorStrategy.newInstance(ctx, "pointvector"));
|
||||
strategies.add(PointVectorStrategy.newLegacyInstance(ctx, "pointvector_legacy"));
|
||||
strategies.add(BBoxStrategy.newInstance(ctx, "bbox"));
|
||||
strategies.add(BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy"));
|
||||
final SerializedDVStrategy serialized = new SerializedDVStrategy(ctx, "serialized");
|
||||
strategies.add(serialized);
|
||||
strategies.add(new CompositeSpatialStrategy("composite", recursive_geohash, serialized));
|
||||
|
|
|
@ -21,8 +21,6 @@ import java.io.IOException;
|
|||
import com.carrotsearch.randomizedtesting.annotations.Repeat;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.spatial.SpatialMatchConcern;
|
||||
import org.apache.lucene.spatial.prefix.RandomSpatialOpStrategyTestCase;
|
||||
|
@ -93,20 +91,10 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
|
|||
factory.worldBounds = new RectangleImpl(-300, 300, -100, 100, null);
|
||||
this.ctx = factory.newSpatialContext();
|
||||
}
|
||||
// randomly test legacy (numeric) and point based bbox strategy
|
||||
if (random().nextBoolean()) {
|
||||
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
|
||||
} else {
|
||||
this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
|
||||
}
|
||||
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
|
||||
//test we can disable docValues for predicate tests
|
||||
if (random().nextBoolean()) {
|
||||
FieldType fieldType = ((BBoxStrategy)strategy).getFieldType();
|
||||
if (fieldType instanceof LegacyFieldType) {
|
||||
fieldType = new LegacyFieldType((LegacyFieldType)fieldType);
|
||||
} else {
|
||||
fieldType = new FieldType(fieldType);
|
||||
}
|
||||
FieldType fieldType = new FieldType(((BBoxStrategy)strategy).getFieldType());
|
||||
fieldType.setDocValuesType(DocValuesType.NONE);
|
||||
strategy = new BBoxStrategy(ctx, strategy.getFieldName(), fieldType);
|
||||
}
|
||||
|
@ -194,11 +182,7 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
|
|||
|
||||
private void setupGeo() {
|
||||
this.ctx = SpatialContext.GEO;
|
||||
if (random().nextBoolean()) {
|
||||
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
|
||||
} else {
|
||||
this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
|
||||
}
|
||||
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
|
||||
}
|
||||
|
||||
// OLD STATIC TESTS (worthless?)
|
||||
|
@ -239,16 +223,9 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
|
|||
FieldType fieldType;
|
||||
// random legacy or not legacy
|
||||
String FIELD_PREFIX = "bbox";
|
||||
fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
|
||||
if (random().nextBoolean()) {
|
||||
fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
|
||||
if (random().nextBoolean()) {
|
||||
fieldType.setDimensions(0, 0);
|
||||
}
|
||||
} else {
|
||||
fieldType = new FieldType(BBoxStrategy.LEGACY_FIELDTYPE);
|
||||
if (random().nextBoolean()) {
|
||||
fieldType.setIndexOptions(IndexOptions.NONE);
|
||||
}
|
||||
fieldType.setDimensions(0, 0);
|
||||
}
|
||||
|
||||
strategy = new BBoxStrategy(ctx, FIELD_PREFIX, fieldType);
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
|
||||
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
|
||||
|
||||
public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
||||
|
@ -42,17 +42,8 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
|||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
tree = DateRangePrefixTree.INSTANCE;
|
||||
if (randomBoolean()) {
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
|
||||
} else {
|
||||
//Test the format that existed <= Lucene 5.0
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange") {
|
||||
@Override
|
||||
protected CellToBytesRefIterator newCellToBytesRefIterator() {
|
||||
return new CellToBytesRefIterator50();
|
||||
}
|
||||
};
|
||||
}
|
||||
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
|
||||
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(randomInt() % 5 == 0);
|
||||
Calendar tmpCal = tree.newCal();
|
||||
int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND);
|
||||
tmpCal.add(randomCalWindowField, 2_000);
|
||||
|
@ -79,15 +70,16 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
|||
|
||||
@Test
|
||||
public void testWithinSame() throws IOException {
|
||||
final Calendar cal = tree.newCal();
|
||||
Shape shape = randomIndexedShape();
|
||||
testOperation(
|
||||
tree.toShape(cal),
|
||||
shape,
|
||||
SpatialOperation.IsWithin,
|
||||
tree.toShape(cal), true);//is within itself
|
||||
shape, true);//is within itself
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWorld() throws IOException {
|
||||
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
|
||||
testOperation(
|
||||
tree.toShape(tree.newCal()),//world matches everything
|
||||
SpatialOperation.Contains,
|
||||
|
@ -96,6 +88,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
|||
|
||||
@Test
|
||||
public void testBugInitIterOptimization() throws Exception {
|
||||
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
|
||||
//bug due to fast path initIter() optimization
|
||||
testOperation(
|
||||
tree.parseShape("[2014-03-27T23 TO 2014-04-01T01]"),
|
||||
|
@ -114,6 +107,21 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
|||
|
||||
@Override
|
||||
protected Shape randomIndexedShape() {
|
||||
if (((NumberRangePrefixTreeStrategy)strategy).isPointsOnly()) {
|
||||
Calendar cal = tree.newCal();
|
||||
cal.setTimeInMillis(random().nextLong());
|
||||
return tree.toShape(cal);
|
||||
} else {
|
||||
return randomShape();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Shape randomQueryShape() {
|
||||
return randomShape();
|
||||
}
|
||||
|
||||
private Shape randomShape() {
|
||||
Calendar cal1 = randomCalendar();
|
||||
UnitNRShape s1 = tree.toShape(cal1);
|
||||
if (rarely()) {
|
||||
|
@ -144,9 +152,4 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
|
|||
}
|
||||
return cal;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Shape randomQueryShape() {
|
||||
return randomIndexedShape();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,12 +63,7 @@ public class TestPointVectorStrategy extends StrategyTestCase {
|
|||
@Test
|
||||
public void testCitiesIntersectsBBox() throws IOException {
|
||||
// note: does not require docValues
|
||||
if (random().nextBoolean()) {
|
||||
this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
|
||||
} else {
|
||||
// switch to legacy instance sometimes, which has no docValues
|
||||
this.strategy = PointVectorStrategy.newLegacyInstance(ctx, getClass().getSimpleName());
|
||||
}
|
||||
this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
|
||||
getAddAndVerifyIndexedDocuments(DATA_WORLD_CITIES_POINTS);
|
||||
executeQueries(SpatialMatchConcern.FILTER, QTEST_Cities_Intersects_BBox);
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
* <ul>
|
||||
* <li><code>wordset</code> - This is the default format, which supports one word per
|
||||
* line (including any intra-word whitespace) and allows whole line comments
|
||||
* begining with the "#" character. Blank lines are ignored. See
|
||||
* beginning with the "#" character. Blank lines are ignored. See
|
||||
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
|
||||
* </li>
|
||||
* <li><code>snowball</code> - This format allows for multiple words specified on each
|
||||
|
|
|
@ -194,6 +194,13 @@ Other Changes
|
|||
* SOLR-10700: Deprecated and converted the PostingsSolrHighlighter to extend UnifiedSolrHighlighter and thus no
|
||||
longer use the PostingsHighlighter. It should behave mostly the same. (David Smiley)
|
||||
|
||||
* SOLR-10710: Fix LTR failing tests. (Diego Ceccarelli via Tomás Fernández Löbbe)
|
||||
|
||||
* SOLR-10755: delete/refactor many solrj deprecations (hossman)
|
||||
|
||||
* SOLR-10752: replicationFactor (nrtReplicas) default is 0 if tlogReplicas is specified when creating a collection
|
||||
(Tomás Fernández Löbbe)
|
||||
|
||||
================== 6.7.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
@ -217,6 +224,8 @@ Upgrade Notes
|
|||
passwords via the env variables SOLR_SSL_KEY_STORE_PASSWORD and SOLR_SSL_TRUST_STORE_PASSWORD rather
|
||||
than system properties.
|
||||
|
||||
* SOLR-10379: ManagedSynonymFilterFactory has been deprecated in favor of ManagedSynonymGraphFilterFactory.
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
|
@ -224,12 +233,20 @@ New Features
|
|||
|
||||
* SOLR-10721: Provide a way to know when Core Discovery is finished and when all async cores are done loading
|
||||
(Erick Erickson)
|
||||
|
||||
* SOLR-10379: Add ManagedSynonymGraphFilterFactory, deprecate ManagedSynonymFilterFactory. (Steve Rowe)
|
||||
|
||||
* SOLR-10479: Adds support for HttpShardHandlerFactory.loadBalancerRequests(MinimumAbsolute|MaximumFraction)
|
||||
configuration. (Ramsey Haddad, Daniel Collins, Christine Poerschke)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
* SOLR-10723 JSON Facet API: resize() implemented incorrectly for CountSlotAcc, HllAgg.NumericAcc
|
||||
resulting in exceptions when using a hashing faceting method and sorting by hll(numeric_field).
|
||||
(yonik)
|
||||
|
||||
* SOLR-10719: Creating a core.properties fails if the parent of core.properties is a symlinked dierctory
|
||||
(Erick Erickson)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
@ -238,7 +255,6 @@ Optimizations
|
|||
so that the second phase which would normally involve calculating the domain for the bucket
|
||||
can be skipped entirely, leading to large performance improvements. (yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -250,6 +266,15 @@ Other Changes
|
|||
|
||||
* SOLR-10438: Assign explicit useDocValuesAsStored values to all points field types in
|
||||
schema-point.xml/TestPointFields. (hossman, Steve Rowe)
|
||||
|
||||
* LUCENE-7705: Allow CharTokenizer-derived tokenizers and KeywordTokenizer to configure the max token length.
|
||||
(Amrit Sarkar via Erick Erickson)
|
||||
|
||||
* SOLR-10659: Remove ResponseBuilder.getSortSpec use in SearchGroupShardResponseProcessor.
|
||||
(Judith Silverman via Christine Poerschke)
|
||||
|
||||
* SOLR-10741: Factor out createSliceShardsStr method from HttpShardHandler.prepDistributed.
|
||||
(Domenico Fabio Marino via Christine Poerschke)
|
||||
|
||||
================== 6.6.0 ==================
|
||||
|
||||
|
@ -458,6 +483,14 @@ Bug Fixes
|
|||
"lucene"/standard query parser, should require " TO " in range queries,
|
||||
and accept "TO" as endpoints in range queries. (hossman, Steve Rowe)
|
||||
|
||||
* SOLR-10735: Windows script (solr.cmd) didn't work properly with directory containing spaces. Adding quotations
|
||||
to fix (Uwe Schindler, janhoy, Tomas Fernandez-Lobbe, Ishan Chattopadhyaya)
|
||||
|
||||
Ref Guide
|
||||
----------------------
|
||||
|
||||
* SOLR-10758: Modernize the Solr ref guide's Chinese language analysis coverage. (Steve Rowe)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import java.io.IOException;
|
|||
import java.time.Instant;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.docvalues.LongDocValues;
|
||||
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
|
||||
|
|
|
@ -76,8 +76,7 @@ public class FieldLengthFeature extends Feature {
|
|||
static {
|
||||
NORM_TABLE[0] = 0;
|
||||
for (int i = 1; i < 256; i++) {
|
||||
float norm = SmallFloat.byte315ToFloat((byte) i);
|
||||
NORM_TABLE[i] = 1.0f / (norm * norm);
|
||||
NORM_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -88,15 +88,16 @@ public class TestLTRQParserPlugin extends TestRerankBase {
|
|||
query.add("rows", "4");
|
||||
query.add("fv", "true");
|
||||
|
||||
String nonRerankedScore = "0.09271725";
|
||||
// FIXME: design better way to test this, we cannot check an absolute score
|
||||
// String nonRerankedScore = "0.09271725";
|
||||
|
||||
// Normal solr order
|
||||
assertJQ("/query" + query.toQueryString(),
|
||||
"/response/docs/[0]/id=='9'",
|
||||
"/response/docs/[1]/id=='8'",
|
||||
"/response/docs/[2]/id=='7'",
|
||||
"/response/docs/[3]/id=='6'",
|
||||
"/response/docs/[3]/score=="+nonRerankedScore
|
||||
"/response/docs/[3]/id=='6'"
|
||||
// "/response/docs/[3]/score=="+nonRerankedScore
|
||||
);
|
||||
|
||||
query.add("rq", "{!ltr model=6029760550880411648 reRankDocs=3}");
|
||||
|
@ -106,8 +107,8 @@ public class TestLTRQParserPlugin extends TestRerankBase {
|
|||
"/response/docs/[0]/id=='7'",
|
||||
"/response/docs/[1]/id=='8'",
|
||||
"/response/docs/[2]/id=='9'",
|
||||
"/response/docs/[3]/id=='6'",
|
||||
"/response/docs/[3]/score=="+nonRerankedScore
|
||||
"/response/docs/[3]/id=='6'"
|
||||
// "/response/docs/[3]/score=="+nonRerankedScore
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -42,8 +42,9 @@ public class TestParallelWeightCreation extends TestRerankBase{
|
|||
query.add("rows", "4");
|
||||
|
||||
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
|
||||
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
|
||||
aftertest();
|
||||
}
|
||||
|
|
|
@ -210,14 +210,14 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
|
|||
@Test
|
||||
public void testSelectiveWeightsRequestFeaturesFromDifferentStore() throws Exception {
|
||||
|
||||
final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"matchedTitle","1.0", "titlePhraseMatch","0.6103343");
|
||||
final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
|
||||
"matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
|
||||
final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
|
||||
"popularity","3.0", "originalScore","1.0");
|
||||
|
||||
final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
|
||||
// final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
|
||||
// "matchedTitle","1.0", "titlePhraseMatch","0.6103343");
|
||||
// final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
|
||||
// "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
|
||||
// final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
|
||||
// "popularity","3.0", "originalScore","1.0");
|
||||
//
|
||||
// final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
|
||||
|
||||
// extract all features in externalmodel's store (default store)
|
||||
// rerank using externalmodel (default store)
|
||||
|
@ -227,11 +227,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
|
|||
query.add("rows", "5");
|
||||
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w2 efi.userTitlePhrase2=w1}");
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
|
||||
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
|
||||
// FIXME design better way to test this, we can't rely on absolute scores
|
||||
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
|
||||
|
||||
// extract all features from fstore4
|
||||
// rerank using externalmodel (default store)
|
||||
|
@ -240,11 +241,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
|
|||
query.add("fl", "*,score,fv:[fv store=fstore4 efi.myPop=3]");
|
||||
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
|
||||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
|
||||
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
|
||||
// FIXME design better way to test this, we can't rely on absolute scores
|
||||
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
|
||||
|
||||
// extract all features from fstore4
|
||||
// rerank using externalmodel2 (fstore2)
|
||||
|
@ -255,9 +257,9 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
|
|||
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='5'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==2.5");
|
||||
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
|
||||
// FIXME design better way to test this, we can't rely on absolute scores
|
||||
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -95,9 +95,9 @@ public class CreateCollectionCmd implements Cmd {
|
|||
// look at the replication factor and see if it matches reality
|
||||
// if it does not, find best nodes to create more cores
|
||||
|
||||
int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, 1));
|
||||
int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
|
||||
int numTlogReplicas = message.getInt(TLOG_REPLICAS, 0);
|
||||
int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1));
|
||||
int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
|
||||
|
||||
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
|
||||
final String async = message.getStr(ASYNC);
|
||||
|
|
|
@ -39,6 +39,7 @@ import java.util.stream.Collectors;
|
|||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.util.FileUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -85,13 +86,15 @@ public class CorePropertiesLocator implements CoresLocator {
|
|||
private void writePropertiesFile(CoreDescriptor cd, Path propfile) {
|
||||
Properties p = buildCoreProperties(cd);
|
||||
try {
|
||||
Files.createDirectories(propfile.getParent());
|
||||
FileUtils.createDirectories(propfile.getParent()); // Handling for symlinks.
|
||||
try (Writer os = new OutputStreamWriter(Files.newOutputStream(propfile), StandardCharsets.UTF_8)) {
|
||||
p.store(os, "Written by CorePropertiesLocator");
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
logger.error("Couldn't persist core properties to {}: {}", propfile, e.getMessage());
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
"Couldn't persist core properties to " + propfile.toAbsolutePath().toString() + " : " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -45,6 +45,9 @@ import org.apache.solr.client.solrj.io.stream.expr.Explanation;
|
|||
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser;
|
||||
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
|
||||
import org.apache.solr.client.solrj.io.stream.metrics.CountMetric;
|
||||
import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric;
|
||||
|
@ -185,6 +188,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
.withFunctionName("percentile", PercentileEvaluator.class)
|
||||
.withFunctionName("empiricalDistribution", EmpiricalDistributionEvaluator.class)
|
||||
.withFunctionName("describe", DescribeEvaluator.class)
|
||||
.withFunctionName("finddelay", FindDelayEvaluator.class)
|
||||
.withFunctionName("sequence", SequenceEvaluator.class)
|
||||
.withFunctionName("array", ArrayEvaluator.class)
|
||||
.withFunctionName("hist", HistogramEvaluator.class)
|
||||
.withFunctionName("anova", AnovaEvaluator.class)
|
||||
.withFunctionName("movingAvg", MovingAverageEvaluator.class)
|
||||
|
||||
// metrics
|
||||
.withFunctionName("min", MinMetric.class)
|
||||
|
@ -296,7 +305,14 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
|
|||
TupleStream tupleStream;
|
||||
|
||||
try {
|
||||
tupleStream = this.streamFactory.constructStream(params.get("expr"));
|
||||
StreamExpression streamExpression = StreamExpressionParser.parse(params.get("expr"));
|
||||
if(this.streamFactory.isEvaluator(streamExpression)) {
|
||||
StreamExpression tupleExpression = new StreamExpression("tuple");
|
||||
tupleExpression.addParameter(new StreamExpressionNamedParameter("return-value", streamExpression));
|
||||
tupleStream = this.streamFactory.constructStream(tupleExpression);
|
||||
} else {
|
||||
tupleStream = this.streamFactory.constructStream(streamExpression);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
//Catch exceptions that occur while the stream is being created. This will include streaming expression parse rules.
|
||||
SolrException.log(logger, e);
|
||||
|
|
|
@ -449,17 +449,7 @@ public class HttpShardHandler extends ShardHandler {
|
|||
}
|
||||
}
|
||||
// And now recreate the | delimited list of equivalent servers
|
||||
final StringBuilder sliceShardsStr = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (String shardUrl : shardUrls) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
sliceShardsStr.append('|');
|
||||
}
|
||||
sliceShardsStr.append(shardUrl);
|
||||
}
|
||||
rb.shards[i] = sliceShardsStr.toString();
|
||||
rb.shards[i] = createSliceShardsStr(shardUrls);
|
||||
}
|
||||
}
|
||||
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
|
||||
|
@ -472,6 +462,20 @@ public class HttpShardHandler extends ShardHandler {
|
|||
}
|
||||
}
|
||||
|
||||
private static String createSliceShardsStr(final List<String> shardUrls) {
|
||||
final StringBuilder sliceShardsStr = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (String shardUrl : shardUrls) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
sliceShardsStr.append('|');
|
||||
}
|
||||
sliceShardsStr.append(shardUrl);
|
||||
}
|
||||
return sliceShardsStr.toString();
|
||||
}
|
||||
|
||||
|
||||
private void addSlices(Map<String,Slice> target, ClusterState state, SolrParams params, String collectionName, String shardKeys, boolean multiCollection) {
|
||||
DocCollection coll = state.getCollection(collectionName);
|
||||
|
|
|
@ -97,6 +97,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
int maximumPoolSize = Integer.MAX_VALUE;
|
||||
int keepAliveTime = 5;
|
||||
int queueSize = -1;
|
||||
int permittedLoadBalancerRequestsMinimumAbsolute = 0;
|
||||
float permittedLoadBalancerRequestsMaximumFraction = 1.0f;
|
||||
boolean accessPolicy = false;
|
||||
|
||||
private String scheme = null;
|
||||
|
@ -122,6 +124,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
// If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff
|
||||
static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue";
|
||||
|
||||
// The minimum number of replicas that may be used
|
||||
static final String LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE = "loadBalancerRequestsMinimumAbsolute";
|
||||
|
||||
// The maximum proportion of replicas to be used
|
||||
static final String LOAD_BALANCER_REQUESTS_MAX_FRACTION = "loadBalancerRequestsMaximumFraction";
|
||||
|
||||
// Configure if the threadpool favours fairness over throughput
|
||||
static final String INIT_FAIRNESS_POLICY = "fairnessPolicy";
|
||||
|
||||
|
@ -164,6 +172,16 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, maximumPoolSize,sb);
|
||||
this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, keepAliveTime,sb);
|
||||
this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, queueSize,sb);
|
||||
this.permittedLoadBalancerRequestsMinimumAbsolute = getParameter(
|
||||
args,
|
||||
LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE,
|
||||
permittedLoadBalancerRequestsMinimumAbsolute,
|
||||
sb);
|
||||
this.permittedLoadBalancerRequestsMaximumFraction = getParameter(
|
||||
args,
|
||||
LOAD_BALANCER_REQUESTS_MAX_FRACTION,
|
||||
permittedLoadBalancerRequestsMaximumFraction,
|
||||
sb);
|
||||
this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, accessPolicy,sb);
|
||||
log.debug("created with {}",sb);
|
||||
|
||||
|
@ -252,7 +270,15 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
|
|||
*/
|
||||
public LBHttpSolrClient.Rsp makeLoadBalancedRequest(final QueryRequest req, List<String> urls)
|
||||
throws SolrServerException, IOException {
|
||||
return loadbalancer.request(new LBHttpSolrClient.Req(req, urls));
|
||||
return loadbalancer.request(newLBHttpSolrClientReq(req, urls));
|
||||
}
|
||||
|
||||
protected LBHttpSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List<String> urls) {
|
||||
int numServersToTry = (int)Math.floor(urls.size() * this.permittedLoadBalancerRequestsMaximumFraction);
|
||||
if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) {
|
||||
numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute;
|
||||
}
|
||||
return new LBHttpSolrClient.Req(req, urls, numServersToTry);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -84,6 +84,12 @@ import org.slf4j.LoggerFactory;
|
|||
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
|
||||
{
|
||||
|
||||
/**
|
||||
* This constant was formerly part of HighlightParams. After deprecation it was removed so clients
|
||||
* would no longer use it, but we still support it server side.
|
||||
*/
|
||||
private static final String USE_FVH = HighlightParams.HIGHLIGHT + ".useFastVectorHighlighter";
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
protected final SolrCore solrCore;
|
||||
|
@ -492,7 +498,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
boolean methodFvh =
|
||||
HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
|
||||
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
|
||||
|| params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
|
||||
|| params.getFieldBool(schemaField.getName(), USE_FVH, false);
|
||||
if (!methodFvh) return false;
|
||||
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
|
||||
if (!termPosOff) {
|
||||
|
|
|
@ -0,0 +1,706 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.document.DoubleDocValuesField;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.spatial.SpatialStrategy;
|
||||
import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource;
|
||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
|
||||
import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
|
||||
/**
|
||||
* A SpatialStrategy for indexing and searching Rectangles by storing its
|
||||
* coordinates in numeric fields. It supports all {@link SpatialOperation}s and
|
||||
* has a custom overlap relevancy. It is based on GeoPortal's <a
|
||||
* href="http://geoportal.svn.sourceforge.net/svnroot/geoportal/Geoportal/trunk/src/com/esri/gpt/catalog/lucene/SpatialClauseAdapter.java">SpatialClauseAdapter</a>.
|
||||
* <p>
|
||||
* <b>Characteristics:</b>
|
||||
* <br>
|
||||
* <ul>
|
||||
* <li>Only indexes Rectangles; just one per field value. Other shapes can be provided
|
||||
* and the bounding box will be used.</li>
|
||||
* <li>Can query only by a Rectangle. Providing other shapes is an error.</li>
|
||||
* <li>Supports most {@link SpatialOperation}s but not Overlaps.</li>
|
||||
* <li>Uses the DocValues API for any sorting / relevancy.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <b>Implementation:</b>
|
||||
* <p>
|
||||
* This uses 4 double fields for minX, maxX, minY, maxY
|
||||
* and a boolean to mark a dateline cross. Depending on the particular {@link
|
||||
* SpatialOperation}s, there are a variety of range queries on {@link DoublePoint}s to be
|
||||
* done.
|
||||
* The {@link #makeOverlapRatioValueSource(org.locationtech.spatial4j.shape.Rectangle, double)}
|
||||
* works by calculating the query bbox overlap percentage against the indexed
|
||||
* shape overlap percentage. The indexed shape's coordinates are retrieved from
|
||||
* {@link org.apache.lucene.index.LeafReader#getNumericDocValues}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class BBoxStrategy extends SpatialStrategy {
|
||||
|
||||
// note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
|
||||
// create more than one Field.
|
||||
|
||||
/**
|
||||
* pointValues, docValues, and nothing else.
|
||||
*/
|
||||
public static FieldType DEFAULT_FIELDTYPE;
|
||||
|
||||
@Deprecated
|
||||
public static LegacyFieldType LEGACY_FIELDTYPE;
|
||||
static {
|
||||
// Default: pointValues + docValues
|
||||
FieldType type = new FieldType();
|
||||
type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
|
||||
type.setDocValuesType(DocValuesType.NUMERIC);//docValues
|
||||
type.setStored(false);
|
||||
type.freeze();
|
||||
DEFAULT_FIELDTYPE = type;
|
||||
// Legacy default: legacyNumerics + docValues
|
||||
LegacyFieldType legacyType = new LegacyFieldType();
|
||||
legacyType.setIndexOptions(IndexOptions.DOCS);
|
||||
legacyType.setNumericType(LegacyNumericType.DOUBLE);
|
||||
legacyType.setNumericPrecisionStep(8);// same as solr default
|
||||
legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
|
||||
legacyType.setStored(false);
|
||||
legacyType.freeze();
|
||||
LEGACY_FIELDTYPE = legacyType;
|
||||
}
|
||||
|
||||
public static final String SUFFIX_MINX = "__minX";
|
||||
public static final String SUFFIX_MAXX = "__maxX";
|
||||
public static final String SUFFIX_MINY = "__minY";
|
||||
public static final String SUFFIX_MAXY = "__maxY";
|
||||
public static final String SUFFIX_XDL = "__xdl";
|
||||
|
||||
/*
|
||||
* The Bounding Box gets stored as four fields for x/y min/max and a flag
|
||||
* that says if the box crosses the dateline (xdl).
|
||||
*/
|
||||
final String field_bbox;
|
||||
final String field_minX;
|
||||
final String field_minY;
|
||||
final String field_maxX;
|
||||
final String field_maxY;
|
||||
final String field_xdl; // crosses dateline
|
||||
|
||||
private final FieldType optionsFieldType;//from constructor; aggregate field type used to express all options
|
||||
private final int fieldsLen;
|
||||
private final boolean hasStored;
|
||||
private final boolean hasDocVals;
|
||||
private final boolean hasPointVals;
|
||||
// equiv to "hasLegacyNumerics":
|
||||
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
|
||||
private final FieldType xdlFieldType;
|
||||
|
||||
/**
|
||||
* Creates a new {@link BBoxStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
|
||||
*/
|
||||
public static BBoxStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
|
||||
* @deprecated LegacyNumerics will be removed
|
||||
*/
|
||||
@Deprecated
|
||||
public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates this strategy.
|
||||
* {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
|
||||
* field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features
|
||||
* aren't needed then disable them.
|
||||
*/
|
||||
public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
|
||||
super(ctx, fieldNamePrefix);
|
||||
field_bbox = fieldNamePrefix;
|
||||
field_minX = fieldNamePrefix + SUFFIX_MINX;
|
||||
field_maxX = fieldNamePrefix + SUFFIX_MAXX;
|
||||
field_minY = fieldNamePrefix + SUFFIX_MINY;
|
||||
field_maxY = fieldNamePrefix + SUFFIX_MAXY;
|
||||
field_xdl = fieldNamePrefix + SUFFIX_XDL;
|
||||
|
||||
fieldType.freeze();
|
||||
this.optionsFieldType = fieldType;
|
||||
|
||||
int numQuads = 0;
|
||||
if ((this.hasStored = fieldType.stored())) {
|
||||
numQuads++;
|
||||
}
|
||||
if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
|
||||
numQuads++;
|
||||
}
|
||||
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
|
||||
numQuads++;
|
||||
}
|
||||
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
|
||||
if (hasPointVals) {
|
||||
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
|
||||
}
|
||||
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
|
||||
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
|
||||
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
|
||||
}
|
||||
numQuads++;
|
||||
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
|
||||
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
|
||||
legacyNumericFieldType.freeze();
|
||||
} else {
|
||||
legacyNumericFieldType = null;
|
||||
}
|
||||
|
||||
if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
|
||||
xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
xdlFieldType.setIndexOptions(IndexOptions.DOCS);
|
||||
xdlFieldType.freeze();
|
||||
} else {
|
||||
xdlFieldType = null;
|
||||
}
|
||||
|
||||
this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0);
|
||||
}
|
||||
|
||||
/** Returns a field type representing the set of field options. This is identical to what was passed into the
|
||||
* constructor. It's frozen. */
|
||||
public FieldType getFieldType() {
|
||||
return optionsFieldType;
|
||||
}
|
||||
|
||||
//---------------------------------
|
||||
// Indexing
|
||||
//---------------------------------
|
||||
|
||||
@Override
|
||||
public Field[] createIndexableFields(Shape shape) {
|
||||
return createIndexableFields(shape.getBoundingBox());
|
||||
}
|
||||
|
||||
private Field[] createIndexableFields(Rectangle bbox) {
|
||||
Field[] fields = new Field[fieldsLen];
|
||||
int idx = -1;
|
||||
if (hasStored) {
|
||||
fields[++idx] = new StoredField(field_minX, bbox.getMinX());
|
||||
fields[++idx] = new StoredField(field_minY, bbox.getMinY());
|
||||
fields[++idx] = new StoredField(field_maxX, bbox.getMaxX());
|
||||
fields[++idx] = new StoredField(field_maxY, bbox.getMaxY());
|
||||
}
|
||||
if (hasDocVals) {
|
||||
fields[++idx] = new DoubleDocValuesField(field_minX, bbox.getMinX());
|
||||
fields[++idx] = new DoubleDocValuesField(field_minY, bbox.getMinY());
|
||||
fields[++idx] = new DoubleDocValuesField(field_maxX, bbox.getMaxX());
|
||||
fields[++idx] = new DoubleDocValuesField(field_maxY, bbox.getMaxY());
|
||||
}
|
||||
if (hasPointVals) {
|
||||
fields[++idx] = new DoublePoint(field_minX, bbox.getMinX());
|
||||
fields[++idx] = new DoublePoint(field_minY, bbox.getMinY());
|
||||
fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
|
||||
fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
|
||||
}
|
||||
if (legacyNumericFieldType != null) {
|
||||
fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
|
||||
}
|
||||
if (xdlFieldType != null) {
|
||||
fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
|
||||
}
|
||||
assert idx == fields.length - 1;
|
||||
return fields;
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------
|
||||
// Value Source / Relevancy
|
||||
//---------------------------------
|
||||
|
||||
/**
|
||||
* Provides access to each rectangle per document as a ValueSource in which
|
||||
* {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)} returns a {@link
|
||||
* Shape}.
|
||||
*/ //TODO raise to SpatialStrategy
|
||||
public ValueSource makeShapeValueSource() {
|
||||
return new BBoxValueSource(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
|
||||
//TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl.
|
||||
return new DistanceToShapeValueSource(makeShapeValueSource(), queryPoint, multiplier, ctx);
|
||||
}
|
||||
|
||||
/** Returns a similarity based on {@link BBoxOverlapRatioValueSource}. This is just a
|
||||
* convenience method. */
|
||||
public ValueSource makeOverlapRatioValueSource(Rectangle queryBox, double queryTargetProportion) {
|
||||
return new BBoxOverlapRatioValueSource(
|
||||
makeShapeValueSource(), ctx.isGeo(), queryBox, queryTargetProportion, 0.0);
|
||||
}
|
||||
|
||||
//---------------------------------
|
||||
// Query Building
|
||||
//---------------------------------
|
||||
|
||||
// Utility on SpatialStrategy?
|
||||
// public Query makeQueryWithValueSource(SpatialArgs args, ValueSource valueSource) {
|
||||
// return new CustomScoreQuery(makeQuery(args), new FunctionQuery(valueSource));
|
||||
//or...
|
||||
// return new BooleanQuery.Builder()
|
||||
// .add(new FunctionQuery(valueSource), BooleanClause.Occur.MUST)//matches everything and provides score
|
||||
// .add(filterQuery, BooleanClause.Occur.FILTER)//filters (score isn't used)
|
||||
// .build();
|
||||
// }
|
||||
|
||||
@Override
|
||||
public Query makeQuery(SpatialArgs args) {
|
||||
Shape shape = args.getShape();
|
||||
if (!(shape instanceof Rectangle))
|
||||
throw new UnsupportedOperationException("Can only query by Rectangle, not " + shape);
|
||||
|
||||
Rectangle bbox = (Rectangle) shape;
|
||||
Query spatial;
|
||||
|
||||
// Useful for understanding Relations:
|
||||
// http://edndoc.esri.com/arcsde/9.1/general_topics/understand_spatial_relations.htm
|
||||
SpatialOperation op = args.getOperation();
|
||||
if( op == SpatialOperation.BBoxIntersects ) spatial = makeIntersects(bbox);
|
||||
else if( op == SpatialOperation.BBoxWithin ) spatial = makeWithin(bbox);
|
||||
else if( op == SpatialOperation.Contains ) spatial = makeContains(bbox);
|
||||
else if( op == SpatialOperation.Intersects ) spatial = makeIntersects(bbox);
|
||||
else if( op == SpatialOperation.IsEqualTo ) spatial = makeEquals(bbox);
|
||||
else if( op == SpatialOperation.IsDisjointTo ) spatial = makeDisjoint(bbox);
|
||||
else if( op == SpatialOperation.IsWithin ) spatial = makeWithin(bbox);
|
||||
else { //no Overlaps support yet
|
||||
throw new UnsupportedSpatialOperation(op);
|
||||
}
|
||||
return new ConstantScoreQuery(spatial);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that fully contain the input envelope.
|
||||
*
|
||||
* @return the spatial query
|
||||
*/
|
||||
Query makeContains(Rectangle bbox) {
|
||||
|
||||
// general case
|
||||
// docMinX <= queryExtent.getMinX() AND docMinY <= queryExtent.getMinY() AND docMaxX >= queryExtent.getMaxX() AND docMaxY >= queryExtent.getMaxY()
|
||||
|
||||
// Y conditions
|
||||
// docMinY <= queryExtent.getMinY() AND docMaxY >= queryExtent.getMaxY()
|
||||
Query qMinY = this.makeNumericRangeQuery(field_minY, null, bbox.getMinY(), false, true);
|
||||
Query qMaxY = this.makeNumericRangeQuery(field_maxY, bbox.getMaxY(), null, true, false);
|
||||
Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
|
||||
|
||||
// X conditions
|
||||
Query xConditions;
|
||||
|
||||
// queries that do not cross the date line
|
||||
if (!bbox.getCrossesDateLine()) {
|
||||
|
||||
// X Conditions for documents that do not cross the date line,
|
||||
// documents that contain the min X and max X of the query envelope,
|
||||
// docMinX <= queryExtent.getMinX() AND docMaxX >= queryExtent.getMaxX()
|
||||
Query qMinX = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
|
||||
Query qMaxX = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
|
||||
Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
|
||||
Query qNonXDL = this.makeXDL(false, qMinMax);
|
||||
|
||||
if (!ctx.isGeo()) {
|
||||
xConditions = qNonXDL;
|
||||
} else {
|
||||
// X Conditions for documents that cross the date line,
|
||||
// the left portion of the document contains the min X of the query
|
||||
// OR the right portion of the document contains the max X of the query,
|
||||
// docMinXLeft <= queryExtent.getMinX() OR docMaxXRight >= queryExtent.getMaxX()
|
||||
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
|
||||
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
|
||||
Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qXDLLeft, qXDLRight);
|
||||
Query qXDL = this.makeXDL(true, qXDLLeftRight);
|
||||
|
||||
Query qEdgeDL = null;
|
||||
if (bbox.getMinX() == bbox.getMaxX() && Math.abs(bbox.getMinX()) == 180) {
|
||||
double edge = bbox.getMinX() * -1;//opposite dateline edge
|
||||
qEdgeDL = makeQuery(BooleanClause.Occur.SHOULD,
|
||||
makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
|
||||
}
|
||||
|
||||
// apply the non-XDL and XDL conditions
|
||||
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL, qEdgeDL);
|
||||
}
|
||||
} else {
|
||||
// queries that cross the date line
|
||||
|
||||
// No need to search for documents that do not cross the date line
|
||||
|
||||
// X Conditions for documents that cross the date line,
|
||||
// the left portion of the document contains the min X of the query
|
||||
// AND the right portion of the document contains the max X of the query,
|
||||
// docMinXLeft <= queryExtent.getMinX() AND docMaxXRight >= queryExtent.getMaxX()
|
||||
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
|
||||
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
|
||||
Query qXDLLeftRight = this.makeXDL(true, this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight));
|
||||
|
||||
Query qWorld = makeQuery(BooleanClause.Occur.MUST,
|
||||
makeNumberTermQuery(field_minX, -180), makeNumberTermQuery(field_maxX, 180));
|
||||
|
||||
xConditions = makeQuery(BooleanClause.Occur.SHOULD, qXDLLeftRight, qWorld);
|
||||
}
|
||||
|
||||
// both X and Y conditions must occur
|
||||
return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that are disjoint to the input envelope.
|
||||
*
|
||||
* @return the spatial query
|
||||
*/
|
||||
Query makeDisjoint(Rectangle bbox) {
|
||||
|
||||
// general case
|
||||
// docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX() OR docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
|
||||
|
||||
// Y conditions
|
||||
// docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
|
||||
Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMaxY(), null, false, false);
|
||||
Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMinY(), false, false);
|
||||
Query yConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qMinY, qMaxY);
|
||||
|
||||
// X conditions
|
||||
Query xConditions;
|
||||
|
||||
// queries that do not cross the date line
|
||||
if (!bbox.getCrossesDateLine()) {
|
||||
|
||||
// X Conditions for documents that do not cross the date line,
|
||||
// docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()
|
||||
Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
|
||||
if (bbox.getMinX() == -180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(qMinX, BooleanClause.Occur.MUST);
|
||||
bq.add(makeNumberTermQuery(field_maxX, 180.0), BooleanClause.Occur.MUST_NOT);
|
||||
qMinX = bq.build();
|
||||
}
|
||||
Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
|
||||
|
||||
if (bbox.getMaxX() == 180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(qMaxX, BooleanClause.Occur.MUST);
|
||||
bq.add(makeNumberTermQuery(field_minX, -180.0), BooleanClause.Occur.MUST_NOT);
|
||||
qMaxX = bq.build();
|
||||
}
|
||||
Query qMinMax = this.makeQuery(BooleanClause.Occur.SHOULD, qMinX, qMaxX);
|
||||
Query qNonXDL = this.makeXDL(false, qMinMax);
|
||||
|
||||
if (!ctx.isGeo()) {
|
||||
xConditions = qNonXDL;
|
||||
} else {
|
||||
// X Conditions for documents that cross the date line,
|
||||
|
||||
// both the left and right portions of the document must be disjoint to the query
|
||||
// (docMinXLeft > queryExtent.getMaxX() OR docMaxXLeft < queryExtent.getMinX()) AND
|
||||
// (docMinXRight > queryExtent.getMaxX() OR docMaxXRight < queryExtent.getMinX())
|
||||
// where: docMaxXLeft = 180.0, docMinXRight = -180.0
|
||||
// (docMaxXLeft < queryExtent.getMinX()) equates to (180.0 < queryExtent.getMinX()) and is ignored
|
||||
// (docMinXRight > queryExtent.getMaxX()) equates to (-180.0 > queryExtent.getMaxX()) and is ignored
|
||||
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
|
||||
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
|
||||
Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXRight);
|
||||
Query qXDL = this.makeXDL(true, qLeftRight);
|
||||
|
||||
// apply the non-XDL and XDL conditions
|
||||
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
|
||||
}
|
||||
// queries that cross the date line
|
||||
} else {
|
||||
|
||||
// X Conditions for documents that do not cross the date line,
|
||||
// the document must be disjoint to both the left and right query portions
|
||||
// (docMinX > queryExtent.getMaxX()Left OR docMaxX < queryExtent.getMinX()) AND (docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()Left)
|
||||
// where: queryExtent.getMaxX()Left = 180.0, queryExtent.getMinX()Left = -180.0
|
||||
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, 180.0, null, false, false);
|
||||
Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
|
||||
Query qMinXRight = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
|
||||
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, -180.0, false, false);
|
||||
Query qLeft = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXLeft, qMaxXLeft);
|
||||
Query qRight = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXRight, qMaxXRight);
|
||||
Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qLeft, qRight);
|
||||
|
||||
// No need to search for documents that do not cross the date line
|
||||
|
||||
xConditions = this.makeXDL(false, qLeftRight);
|
||||
}
|
||||
|
||||
// either X or Y conditions should occur
|
||||
return this.makeQuery(BooleanClause.Occur.SHOULD, xConditions, yConditions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that equal the input envelope.
|
||||
*
|
||||
* @return the spatial query
|
||||
*/
|
||||
Query makeEquals(Rectangle bbox) {
|
||||
|
||||
// docMinX = queryExtent.getMinX() AND docMinY = queryExtent.getMinY() AND docMaxX = queryExtent.getMaxX() AND docMaxY = queryExtent.getMaxY()
|
||||
Query qMinX = makeNumberTermQuery(field_minX, bbox.getMinX());
|
||||
Query qMinY = makeNumberTermQuery(field_minY, bbox.getMinY());
|
||||
Query qMaxX = makeNumberTermQuery(field_maxX, bbox.getMaxX());
|
||||
Query qMaxY = makeNumberTermQuery(field_maxY, bbox.getMaxY());
|
||||
return makeQuery(BooleanClause.Occur.MUST, qMinX, qMinY, qMaxX, qMaxY);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that intersect the input envelope.
|
||||
*
|
||||
* @return the spatial query
|
||||
*/
|
||||
Query makeIntersects(Rectangle bbox) {
|
||||
|
||||
// the original intersects query does not work for envelopes that cross the date line,
|
||||
// switch to a NOT Disjoint query
|
||||
|
||||
// MUST_NOT causes a problem when it's the only clause type within a BooleanQuery,
|
||||
// to get around it we add all documents as a SHOULD
|
||||
|
||||
// there must be an envelope, it must not be disjoint
|
||||
Query qHasEnv;
|
||||
if (ctx.isGeo()) {
|
||||
Query qIsNonXDL = this.makeXDL(false);
|
||||
Query qIsXDL = ctx.isGeo() ? this.makeXDL(true) : null;
|
||||
qHasEnv = this.makeQuery(BooleanClause.Occur.SHOULD, qIsNonXDL, qIsXDL);
|
||||
} else {
|
||||
qHasEnv = this.makeXDL(false);
|
||||
}
|
||||
|
||||
BooleanQuery.Builder qNotDisjoint = new BooleanQuery.Builder();
|
||||
qNotDisjoint.add(qHasEnv, BooleanClause.Occur.MUST);
|
||||
Query qDisjoint = makeDisjoint(bbox);
|
||||
qNotDisjoint.add(qDisjoint, BooleanClause.Occur.MUST_NOT);
|
||||
|
||||
//Query qDisjoint = makeDisjoint();
|
||||
//BooleanQuery qNotDisjoint = new BooleanQuery();
|
||||
//qNotDisjoint.add(new MatchAllDocsQuery(),BooleanClause.Occur.SHOULD);
|
||||
//qNotDisjoint.add(qDisjoint,BooleanClause.Occur.MUST_NOT);
|
||||
return qNotDisjoint.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes a boolean query based upon a collection of queries and a logical operator.
|
||||
*
|
||||
* @param occur the logical operator
|
||||
* @param queries the query collection
|
||||
* @return the query
|
||||
*/
|
||||
BooleanQuery makeQuery(BooleanClause.Occur occur, Query... queries) {
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
for (Query query : queries) {
|
||||
if (query != null)
|
||||
bq.add(query, occur);
|
||||
}
|
||||
return bq.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents are fully within the input envelope.
|
||||
*
|
||||
* @return the spatial query
|
||||
*/
|
||||
Query makeWithin(Rectangle bbox) {
|
||||
|
||||
// general case
|
||||
// docMinX >= queryExtent.getMinX() AND docMinY >= queryExtent.getMinY() AND docMaxX <= queryExtent.getMaxX() AND docMaxY <= queryExtent.getMaxY()
|
||||
|
||||
// Y conditions
|
||||
// docMinY >= queryExtent.getMinY() AND docMaxY <= queryExtent.getMaxY()
|
||||
Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMinY(), null, true, false);
|
||||
Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMaxY(), false, true);
|
||||
Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
|
||||
|
||||
// X conditions
|
||||
Query xConditions;
|
||||
|
||||
if (ctx.isGeo() && bbox.getMinX() == -180.0 && bbox.getMaxX() == 180.0) {
|
||||
//if query world-wraps, only the y condition matters
|
||||
return yConditions;
|
||||
|
||||
} else if (!bbox.getCrossesDateLine()) {
|
||||
// queries that do not cross the date line
|
||||
|
||||
// docMinX >= queryExtent.getMinX() AND docMaxX <= queryExtent.getMaxX()
|
||||
Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
|
||||
Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
|
||||
Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
|
||||
|
||||
double edge = 0;//none, otherwise opposite dateline of query
|
||||
if (bbox.getMinX() == -180.0)
|
||||
edge = 180;
|
||||
else if (bbox.getMaxX() == 180.0)
|
||||
edge = -180;
|
||||
if (edge != 0 && ctx.isGeo()) {
|
||||
Query edgeQ = makeQuery(BooleanClause.Occur.MUST,
|
||||
makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
|
||||
qMinMax = makeQuery(BooleanClause.Occur.SHOULD, qMinMax, edgeQ);
|
||||
}
|
||||
|
||||
xConditions = this.makeXDL(false, qMinMax);
|
||||
|
||||
// queries that cross the date line
|
||||
} else {
|
||||
|
||||
// X Conditions for documents that do not cross the date line
|
||||
|
||||
// the document should be within the left portion of the query
|
||||
// docMinX >= queryExtent.getMinX() AND docMaxX <= 180.0
|
||||
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
|
||||
Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, 180.0, false, true);
|
||||
Query qLeft = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXLeft);
|
||||
|
||||
// the document should be within the right portion of the query
|
||||
// docMinX >= -180.0 AND docMaxX <= queryExtent.getMaxX()
|
||||
Query qMinXRight = this.makeNumericRangeQuery(field_minX, -180.0, null, true, false);
|
||||
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
|
||||
Query qRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXRight, qMaxXRight);
|
||||
|
||||
// either left or right conditions should occur,
|
||||
// apply the left and right conditions to documents that do not cross the date line
|
||||
Query qLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qLeft, qRight);
|
||||
Query qNonXDL = this.makeXDL(false, qLeftRight);
|
||||
|
||||
// X Conditions for documents that cross the date line,
|
||||
// the left portion of the document must be within the left portion of the query,
|
||||
// AND the right portion of the document must be within the right portion of the query
|
||||
// docMinXLeft >= queryExtent.getMinX() AND docMaxXLeft <= 180.0
|
||||
// AND docMinXRight >= -180.0 AND docMaxXRight <= queryExtent.getMaxX()
|
||||
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
|
||||
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
|
||||
Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight);
|
||||
Query qXDL = this.makeXDL(true, qXDLLeftRight);
|
||||
|
||||
// apply the non-XDL and XDL conditions
|
||||
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
|
||||
}
|
||||
|
||||
// both X and Y conditions must occur
|
||||
return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that do or do not cross the date line.
|
||||
*
|
||||
* @param crossedDateLine <code>true</true> for documents that cross the date line
|
||||
* @return the query
|
||||
*/
|
||||
private Query makeXDL(boolean crossedDateLine) {
|
||||
// The 'T' and 'F' values match solr fields
|
||||
return new TermQuery(new Term(field_xdl, crossedDateLine ? "T" : "F"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that do or do not cross the date line
|
||||
* and match the supplied spatial query.
|
||||
*
|
||||
* @param crossedDateLine <code>true</true> for documents that cross the date line
|
||||
* @param query the spatial query
|
||||
* @return the query
|
||||
*/
|
||||
private Query makeXDL(boolean crossedDateLine, Query query) {
|
||||
if (!ctx.isGeo()) {
|
||||
assert !crossedDateLine;
|
||||
return query;
|
||||
}
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
bq.add(this.makeXDL(crossedDateLine), BooleanClause.Occur.MUST);
|
||||
bq.add(query, BooleanClause.Occur.MUST);
|
||||
return bq.build();
|
||||
}
|
||||
|
||||
private Query makeNumberTermQuery(String field, double number) {
|
||||
if (hasPointVals) {
|
||||
return DoublePoint.newExactQuery(field, number);
|
||||
} else if (legacyNumericFieldType != null) {
|
||||
BytesRefBuilder bytes = new BytesRefBuilder();
|
||||
LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
|
||||
return new TermQuery(new Term(field, bytes.get()));
|
||||
}
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a numeric range query based on FieldType
|
||||
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
|
||||
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
|
||||
*
|
||||
* @param fieldname field name. must not be <code>null</code>.
|
||||
* @param min minimum value of the range.
|
||||
* @param max maximum value of the range.
|
||||
* @param minInclusive include the minimum value if <code>true</code>.
|
||||
* @param maxInclusive include the maximum value if <code>true</code>
|
||||
*/
|
||||
private Query makeNumericRangeQuery(String fieldname, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
|
||||
if (hasPointVals) {
|
||||
if (min == null) {
|
||||
min = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
|
||||
if (max == null) {
|
||||
max = Double.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
if (minInclusive == false) {
|
||||
min = Math.nextUp(min);
|
||||
}
|
||||
|
||||
if (maxInclusive == false) {
|
||||
max = Math.nextDown(max);
|
||||
}
|
||||
|
||||
return DoublePoint.newRangeQuery(fieldname, min, max);
|
||||
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
|
||||
return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
|
||||
}
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
|
||||
/**
|
||||
* A ValueSource in which the indexed Rectangle is returned from
|
||||
* {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)}.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
class BBoxValueSource extends ValueSource {
|
||||
|
||||
private final BBoxStrategy strategy;
|
||||
|
||||
public BBoxValueSource(BBoxStrategy strategy) {
|
||||
this.strategy = strategy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "bboxShape(" + strategy.getFieldName() + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
LeafReader reader = readerContext.reader();
|
||||
final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX);
|
||||
final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY);
|
||||
final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX);
|
||||
final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY);
|
||||
|
||||
//reused
|
||||
final Rectangle rect = strategy.getSpatialContext().makeRectangle(0,0,0,0);
|
||||
|
||||
return new FunctionValues() {
|
||||
private int lastDocID = -1;
|
||||
|
||||
private double getDocValue(NumericDocValues values, int doc) throws IOException {
|
||||
int curDocID = values.docID();
|
||||
if (doc > curDocID) {
|
||||
curDocID = values.advance(doc);
|
||||
}
|
||||
if (doc == curDocID) {
|
||||
return Double.longBitsToDouble(values.longValue());
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object objectVal(int doc) throws IOException {
|
||||
if (doc < lastDocID) {
|
||||
throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc);
|
||||
}
|
||||
lastDocID = doc;
|
||||
|
||||
double minXValue = getDocValue(minX, doc);
|
||||
if (minX.docID() != doc) {
|
||||
return null;
|
||||
} else {
|
||||
double minYValue = getDocValue(minY, doc);
|
||||
double maxXValue = getDocValue(maxX, doc);
|
||||
double maxYValue = getDocValue(maxY, doc);
|
||||
rect.reset(minXValue, maxXValue, minYValue, maxYValue);
|
||||
return rect;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String strVal(int doc) throws IOException {//TODO support WKT output once Spatial4j does
|
||||
Object v = objectVal(doc);
|
||||
return v == null ? null : v.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean exists(int doc) throws IOException {
|
||||
getDocValue(minX, doc);
|
||||
return minX.docID() == doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc) throws IOException {
|
||||
return Explanation.match(Float.NaN, toString(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) throws IOException {
|
||||
return description() + '=' + strVal(doc);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
BBoxValueSource that = (BBoxValueSource) o;
|
||||
|
||||
if (!strategy.equals(that.strategy)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return strategy.hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.locationtech.spatial4j.distance.DistanceCalculator;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* An implementation of the Lucene ValueSource model that returns the distance
|
||||
* for a {@link PointVectorStrategy}.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class DistanceValueSource extends ValueSource {
|
||||
|
||||
private PointVectorStrategy strategy;
|
||||
private final Point from;
|
||||
private final double multiplier;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public DistanceValueSource(PointVectorStrategy strategy, Point from, double multiplier) {
|
||||
this.strategy = strategy;
|
||||
this.from = from;
|
||||
this.multiplier = multiplier;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ValueSource description.
|
||||
*/
|
||||
@Override
|
||||
public String description() {
|
||||
return "DistanceValueSource("+strategy+", "+from+")";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the FunctionValues used by the function query.
|
||||
*/
|
||||
@Override
|
||||
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
|
||||
LeafReader reader = readerContext.reader();
|
||||
|
||||
final NumericDocValues ptX = DocValues.getNumeric(reader, strategy.getFieldNameX());
|
||||
final NumericDocValues ptY = DocValues.getNumeric(reader, strategy.getFieldNameY());
|
||||
|
||||
return new FunctionValues() {
|
||||
|
||||
private int lastDocID = -1;
|
||||
|
||||
private final Point from = DistanceValueSource.this.from;
|
||||
private final DistanceCalculator calculator = strategy.getSpatialContext().getDistCalc();
|
||||
private final double nullValue =
|
||||
(strategy.getSpatialContext().isGeo() ? 180 * multiplier : Double.MAX_VALUE);
|
||||
|
||||
private double getDocValue(NumericDocValues values, int doc) throws IOException {
|
||||
int curDocID = values.docID();
|
||||
if (doc > curDocID) {
|
||||
curDocID = values.advance(doc);
|
||||
}
|
||||
if (doc == curDocID) {
|
||||
return Double.longBitsToDouble(values.longValue());
|
||||
} else {
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float floatVal(int doc) throws IOException {
|
||||
return (float) doubleVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double doubleVal(int doc) throws IOException {
|
||||
// make sure it has minX and area
|
||||
double x = getDocValue(ptX, doc);
|
||||
if (ptX.docID() == doc) {
|
||||
double y = getDocValue(ptY, doc);
|
||||
assert ptY.docID() == doc;
|
||||
return calculator.distance(from, x, y) * multiplier;
|
||||
}
|
||||
return nullValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) throws IOException {
|
||||
return description() + "=" + floatVal(doc);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
DistanceValueSource that = (DistanceValueSource) o;
|
||||
|
||||
if (!from.equals(that.from)) return false;
|
||||
if (!strategy.equals(that.strategy)) return false;
|
||||
if (multiplier != that.multiplier) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return from.hashCode();
|
||||
}
|
||||
}
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyFloatField}.
|
||||
*
|
||||
* <p>To perform range querying or filtering against a
|
||||
* <code>LegacyDoubleField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <code>LegacyDoubleField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* To sort according to a
|
||||
* <code>LegacyDoubleField</code>, use the normal numeric sort types, eg
|
||||
* {@link org.apache.lucene.search.SortField.Type#DOUBLE}. <code>LegacyDoubleField</code>
|
||||
|
@ -85,7 +85,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyFieldType#setNumericPrecisionStep} method if you'd
|
||||
* like to change the value. Note that you must also
|
||||
* specify a congruent value when creating {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* For low cardinality fields larger precision steps are good.
|
||||
* If the cardinality is < 100, it is fair
|
||||
* to use {@link Integer#MAX_VALUE}, which produces one
|
||||
|
@ -94,8 +94,8 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* <p>For more information on the internals of numeric trie
|
||||
* indexing, including the <a
|
||||
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
|
||||
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
|
||||
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
|
||||
*
|
||||
* <p>If you only need to sort by numeric value, and never
|
||||
* run range querying/filtering, you can index using a
|
||||
|
@ -103,7 +103,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* This will minimize disk space consumed. </p>
|
||||
*
|
||||
* <p>More advanced users can instead use {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* class is a wrapper around this token stream type for
|
||||
* easier, more intuitive usage.</p>
|
||||
*
|
||||
|
@ -144,7 +144,7 @@ public final class LegacyDoubleField extends LegacyField {
|
|||
|
||||
/** Creates a stored or un-stored LegacyDoubleField with the provided value
|
||||
* and default <code>precisionStep</code> {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* @param name field name
|
||||
* @param value 64-bit double value
|
||||
* @param stored Store.YES if the content should also be stored
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -60,7 +60,7 @@ public final class LegacyFieldType extends FieldType {
|
|||
|
||||
/**
|
||||
* LegacyNumericType: if non-null then the field's value will be indexed
|
||||
* numerically so that {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} can be used at
|
||||
* numerically so that {@link org.apache.solr.legacy.LegacyNumericRangeQuery} can be used at
|
||||
* search time.
|
||||
* <p>
|
||||
* The default is <code>null</code> (no numeric type)
|
||||
|
@ -97,7 +97,7 @@ public final class LegacyFieldType extends FieldType {
|
|||
* <p>
|
||||
* This has no effect if {@link #numericType()} returns null.
|
||||
* <p>
|
||||
* The default is {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
|
||||
* The default is {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
|
||||
* @see #setNumericPrecisionStep(int)
|
||||
*
|
||||
* @deprecated Please switch to {@link org.apache.lucene.index.PointValues} instead
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
|
@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyDoubleField}.
|
||||
*
|
||||
* <p>To perform range querying or filtering against a
|
||||
* <code>LegacyFloatField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <code>LegacyFloatField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* To sort according to a
|
||||
* <code>LegacyFloatField</code>, use the normal numeric sort types, eg
|
||||
* {@link org.apache.lucene.search.SortField.Type#FLOAT}. <code>LegacyFloatField</code>
|
||||
|
@ -83,7 +83,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyFieldType#setNumericPrecisionStep} method if you'd
|
||||
* like to change the value. Note that you must also
|
||||
* specify a congruent value when creating {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* For low cardinality fields larger precision steps are good.
|
||||
* If the cardinality is < 100, it is fair
|
||||
* to use {@link Integer#MAX_VALUE}, which produces one
|
||||
|
@ -92,8 +92,8 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* <p>For more information on the internals of numeric trie
|
||||
* indexing, including the <a
|
||||
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
|
||||
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
|
||||
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
|
||||
*
|
||||
* <p>If you only need to sort by numeric value, and never
|
||||
* run range querying/filtering, you can index using a
|
||||
|
@ -101,7 +101,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* This will minimize disk space consumed. </p>
|
||||
*
|
||||
* <p>More advanced users can instead use {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* class is a wrapper around this token stream type for
|
||||
* easier, more intuitive usage.</p>
|
||||
*
|
||||
|
@ -144,7 +144,7 @@ public final class LegacyFloatField extends LegacyField {
|
|||
|
||||
/** Creates a stored or un-stored LegacyFloatField with the provided value
|
||||
* and default <code>precisionStep</code> {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* @param name field name
|
||||
* @param value 32-bit double value
|
||||
* @param stored Store.YES if the content should also be stored
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -50,7 +50,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyDoubleField}.
|
||||
*
|
||||
* <p>To perform range querying or filtering against a
|
||||
* <code>LegacyIntField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <code>LegacyIntField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* To sort according to a
|
||||
* <code>LegacyIntField</code>, use the normal numeric sort types, eg
|
||||
* {@link org.apache.lucene.search.SortField.Type#INT}. <code>LegacyIntField</code>
|
||||
|
@ -84,7 +84,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyFieldType#setNumericPrecisionStep} method if you'd
|
||||
* like to change the value. Note that you must also
|
||||
* specify a congruent value when creating {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* For low cardinality fields larger precision steps are good.
|
||||
* If the cardinality is < 100, it is fair
|
||||
* to use {@link Integer#MAX_VALUE}, which produces one
|
||||
|
@ -93,8 +93,8 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* <p>For more information on the internals of numeric trie
|
||||
* indexing, including the <a
|
||||
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
|
||||
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
|
||||
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
|
||||
*
|
||||
* <p>If you only need to sort by numeric value, and never
|
||||
* run range querying/filtering, you can index using a
|
||||
|
@ -102,7 +102,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* This will minimize disk space consumed. </p>
|
||||
*
|
||||
* <p>More advanced users can instead use {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* class is a wrapper around this token stream type for
|
||||
* easier, more intuitive usage.</p>
|
||||
*
|
||||
|
@ -145,7 +145,7 @@ public final class LegacyIntField extends LegacyField {
|
|||
|
||||
/** Creates a stored or un-stored LegacyIntField with the provided value
|
||||
* and default <code>precisionStep</code> {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* @param name field name
|
||||
* @param value 32-bit integer value
|
||||
* @param stored Store.YES if the content should also be stored
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -61,7 +61,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* <code>long</code> value.
|
||||
*
|
||||
* <p>To perform range querying or filtering against a
|
||||
* <code>LegacyLongField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <code>LegacyLongField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* To sort according to a
|
||||
* <code>LegacyLongField</code>, use the normal numeric sort types, eg
|
||||
* {@link org.apache.lucene.search.SortField.Type#LONG}. <code>LegacyLongField</code>
|
||||
|
@ -95,7 +95,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* LegacyFieldType#setNumericPrecisionStep} method if you'd
|
||||
* like to change the value. Note that you must also
|
||||
* specify a congruent value when creating {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
* For low cardinality fields larger precision steps are good.
|
||||
* If the cardinality is < 100, it is fair
|
||||
* to use {@link Integer#MAX_VALUE}, which produces one
|
||||
|
@ -104,8 +104,8 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* <p>For more information on the internals of numeric trie
|
||||
* indexing, including the <a
|
||||
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
|
||||
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
|
||||
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
|
||||
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
|
||||
*
|
||||
* <p>If you only need to sort by numeric value, and never
|
||||
* run range querying/filtering, you can index using a
|
||||
|
@ -113,7 +113,7 @@ import org.apache.lucene.index.IndexOptions;
|
|||
* This will minimize disk space consumed.
|
||||
*
|
||||
* <p>More advanced users can instead use {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
|
||||
* class is a wrapper around this token stream type for
|
||||
* easier, more intuitive usage.</p>
|
||||
*
|
||||
|
@ -154,7 +154,7 @@ public final class LegacyLongField extends LegacyField {
|
|||
|
||||
/** Creates a stored or un-stored LegacyLongField with the provided value
|
||||
* and default <code>precisionStep</code> {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* @param name field name
|
||||
* @param value 64-bit long value
|
||||
* @param stored Store.YES if the content should also be stored
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -41,9 +41,9 @@ import org.apache.lucene.index.Term; // for javadocs
|
|||
/**
|
||||
* <p>A {@link Query} that matches numeric values within a
|
||||
* specified range. To use this, you must first index the
|
||||
* numeric values using {@link org.apache.lucene.legacy.LegacyIntField}, {@link
|
||||
* org.apache.lucene.legacy.LegacyFloatField}, {@link org.apache.lucene.legacy.LegacyLongField} or {@link org.apache.lucene.legacy.LegacyDoubleField} (expert: {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
|
||||
* numeric values using {@link org.apache.solr.legacy.LegacyIntField}, {@link
|
||||
* org.apache.solr.legacy.LegacyFloatField}, {@link org.apache.solr.legacy.LegacyLongField} or {@link org.apache.solr.legacy.LegacyDoubleField} (expert: {@link
|
||||
* org.apache.solr.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
|
||||
* you should use {@link TermRangeQuery}.</p>
|
||||
*
|
||||
* <p>You create a new LegacyNumericRangeQuery with the static
|
||||
|
@ -97,7 +97,7 @@ import org.apache.lucene.index.Term; // for javadocs
|
|||
* (all numerical values like doubles, longs, floats, and ints are converted to
|
||||
* lexicographic sortable string representations and stored with different precisions
|
||||
* (for a more detailed description of how the values are stored,
|
||||
* see {@link org.apache.lucene.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
|
||||
* see {@link org.apache.solr.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
|
||||
* The center of the range is searched only with the lowest possible precision in the <em>trie</em>,
|
||||
* while the boundaries are matched more exactly. This reduces the number of terms dramatically.</p>
|
||||
*
|
||||
|
@ -113,7 +113,7 @@ import org.apache.lucene.index.Term; // for javadocs
|
|||
* <h3><a name="precisionStepDesc">Precision Step</a></h3>
|
||||
* <p>You can choose any <code>precisionStep</code> when encoding values.
|
||||
* Lower step values mean more precisions and so more terms in index (and index gets larger). The number
|
||||
* of indexed terms per value is (those are generated by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}):
|
||||
* of indexed terms per value is (those are generated by {@link org.apache.solr.legacy.LegacyNumericTokenStream}):
|
||||
* <p style="font-family:serif">
|
||||
* indexedTermsPerValue = <b>ceil</b><big>(</big>bitsPerValue / precisionStep<big>)</big>
|
||||
* </p>
|
||||
|
@ -149,8 +149,8 @@ import org.apache.lucene.index.Term; // for javadocs
|
|||
* <li>Steps <b>≥64</b> for <em>long/double</em> and <b>≥32</b> for <em>int/float</em> produces one token
|
||||
* per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used
|
||||
* to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as
|
||||
* <code>precisionStep</code>). Using {@link org.apache.lucene.legacy.LegacyIntField},
|
||||
* {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} for sorting
|
||||
* <code>precisionStep</code>). Using {@link org.apache.solr.legacy.LegacyIntField},
|
||||
* {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} for sorting
|
||||
* is ideal, because building the field cache is much faster than with text-only numbers.
|
||||
* These fields have one term per value and therefore also work with term enumeration for building distinct lists
|
||||
* (e.g. facets / preselected values to search for).
|
||||
|
@ -205,7 +205,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
|
|||
|
||||
/**
|
||||
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>long</code>
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||
|
@ -231,7 +231,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
|
|||
|
||||
/**
|
||||
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>int</code>
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
|
||||
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
|
||||
|
@ -259,7 +259,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
|
|||
|
||||
/**
|
||||
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>double</code>
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting the min or max value to <code>null</code>.
|
||||
* {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query
|
||||
|
@ -289,7 +289,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
|
|||
|
||||
/**
|
||||
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>float</code>
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
|
||||
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
|
||||
* by setting the min or max value to <code>null</code>.
|
||||
* {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query
|
||||
|
@ -390,8 +390,8 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
|
|||
* <p>
|
||||
* WARNING: This term enumeration is not guaranteed to be always ordered by
|
||||
* {@link Term#compareTo}.
|
||||
* The ordering depends on how {@link org.apache.lucene.legacy.LegacyNumericUtils#splitLongRange} and
|
||||
* {@link org.apache.lucene.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
|
||||
* The ordering depends on how {@link org.apache.solr.legacy.LegacyNumericUtils#splitLongRange} and
|
||||
* {@link org.apache.solr.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
|
||||
* {@link MultiTermQuery} ordering is not relevant.
|
||||
*/
|
||||
private final class NumericRangeTermsEnum extends FilteredTermsEnum {
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import java.util.Objects;
|
||||
|
@ -35,10 +35,10 @@ import org.apache.lucene.util.NumericUtils;
|
|||
/**
|
||||
* <b>Expert:</b> This class provides a {@link TokenStream}
|
||||
* for indexing numeric values that can be used by {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
*
|
||||
* <p>Note that for simple usage, {@link org.apache.lucene.legacy.LegacyIntField}, {@link
|
||||
* org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} is
|
||||
* <p>Note that for simple usage, {@link org.apache.solr.legacy.LegacyIntField}, {@link
|
||||
* org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} is
|
||||
* recommended. These fields disable norms and
|
||||
* term freqs, as they are not usually needed during
|
||||
* searching. If you need to change these settings, you
|
||||
|
@ -81,7 +81,7 @@ import org.apache.lucene.util.NumericUtils;
|
|||
* than one numeric field, use a separate <code>LegacyNumericTokenStream</code>
|
||||
* instance for each.</p>
|
||||
*
|
||||
* <p>See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details on the
|
||||
* <p>See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details on the
|
||||
* <a
|
||||
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
|
||||
* parameter as well as how numeric fields work under the hood.</p>
|
||||
|
@ -140,7 +140,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
|
|||
}
|
||||
}
|
||||
|
||||
/** Implementation of {@link org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
|
||||
/** Implementation of {@link org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
|
||||
* @lucene.internal
|
||||
* @since 4.0
|
||||
*/
|
||||
|
@ -240,7 +240,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
|
|||
|
||||
/**
|
||||
* Creates a token stream for numeric values using the default <code>precisionStep</code>
|
||||
* {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
|
||||
* {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
|
||||
* before using set a value using the various set<em>???</em>Value() methods.
|
||||
*/
|
||||
public LegacyNumericTokenStream() {
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
/** Data type of the numeric value
|
||||
* @since 3.2
|
|
@ -14,7 +14,7 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -43,9 +43,9 @@ import org.apache.lucene.util.BytesRefBuilder;
|
|||
* during encoding.
|
||||
*
|
||||
* <p>For easy usage, the trie algorithm is implemented for indexing inside
|
||||
* {@link org.apache.lucene.legacy.LegacyNumericTokenStream} that can index <code>int</code>, <code>long</code>,
|
||||
* {@link org.apache.solr.legacy.LegacyNumericTokenStream} that can index <code>int</code>, <code>long</code>,
|
||||
* <code>float</code>, and <code>double</code>. For querying,
|
||||
* {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} implements the query part
|
||||
* {@link org.apache.solr.legacy.LegacyNumericRangeQuery} implements the query part
|
||||
* for the same data types.
|
||||
*
|
||||
* @lucene.internal
|
||||
|
@ -61,15 +61,15 @@ public final class LegacyNumericUtils {
|
|||
private LegacyNumericUtils() {} // no instance!
|
||||
|
||||
/**
|
||||
* The default precision step used by {@link org.apache.lucene.legacy.LegacyLongField},
|
||||
* {@link org.apache.lucene.legacy.LegacyDoubleField}, {@link org.apache.lucene.legacy.LegacyNumericTokenStream}, {@link
|
||||
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* The default precision step used by {@link org.apache.solr.legacy.LegacyLongField},
|
||||
* {@link org.apache.solr.legacy.LegacyDoubleField}, {@link org.apache.solr.legacy.LegacyNumericTokenStream}, {@link
|
||||
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
*/
|
||||
public static final int PRECISION_STEP_DEFAULT = 16;
|
||||
|
||||
/**
|
||||
* The default precision step used by {@link org.apache.lucene.legacy.LegacyIntField} and
|
||||
* {@link org.apache.lucene.legacy.LegacyFloatField}.
|
||||
* The default precision step used by {@link org.apache.solr.legacy.LegacyIntField} and
|
||||
* {@link org.apache.solr.legacy.LegacyFloatField}.
|
||||
*/
|
||||
public static final int PRECISION_STEP_DEFAULT_32 = 8;
|
||||
|
||||
|
@ -101,7 +101,7 @@ public final class LegacyNumericUtils {
|
|||
|
||||
/**
|
||||
* Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* After encoding, {@code bytes.offset} will always be 0.
|
||||
* @param val the numeric value
|
||||
* @param shift how many bits to strip from the right
|
||||
|
@ -128,7 +128,7 @@ public final class LegacyNumericUtils {
|
|||
|
||||
/**
|
||||
* Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
|
||||
* This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* After encoding, {@code bytes.offset} will always be 0.
|
||||
* @param val the numeric value
|
||||
* @param shift how many bits to strip from the right
|
||||
|
@ -232,7 +232,7 @@ public final class LegacyNumericUtils {
|
|||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link LongRangeBuilder#addRange(BytesRef,BytesRef)}
|
||||
* method.
|
||||
* <p>This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <p>This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
*/
|
||||
public static void splitLongRange(final LongRangeBuilder builder,
|
||||
final int precisionStep, final long minBound, final long maxBound
|
||||
|
@ -246,7 +246,7 @@ public final class LegacyNumericUtils {
|
|||
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
|
||||
* {@link IntRangeBuilder#addRange(BytesRef,BytesRef)}
|
||||
* method.
|
||||
* <p>This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
|
||||
* <p>This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
|
||||
*/
|
||||
public static void splitIntRange(final IntRangeBuilder builder,
|
||||
final int precisionStep, final int minBound, final int maxBound
|
|
@ -0,0 +1,292 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.legacy;
|
||||
|
||||
import org.apache.lucene.document.DoubleDocValuesField;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.solr.legacy.LegacyDoubleField;
|
||||
import org.apache.solr.legacy.LegacyFieldType;
|
||||
import org.apache.solr.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.solr.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.queries.function.FunctionRangeQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.spatial.SpatialStrategy;
|
||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.query.SpatialOperation;
|
||||
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
|
||||
import org.locationtech.spatial4j.context.SpatialContext;
|
||||
import org.locationtech.spatial4j.shape.Circle;
|
||||
import org.locationtech.spatial4j.shape.Point;
|
||||
import org.locationtech.spatial4j.shape.Rectangle;
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
|
||||
/**
|
||||
* Simple {@link SpatialStrategy} which represents Points in two numeric fields.
|
||||
* The Strategy's best feature is decent distance sort.
|
||||
*
|
||||
* <p>
|
||||
* <b>Characteristics:</b>
|
||||
* <br>
|
||||
* <ul>
|
||||
* <li>Only indexes points; just one per field value.</li>
|
||||
* <li>Can query by a rectangle or circle.</li>
|
||||
* <li>{@link
|
||||
* org.apache.lucene.spatial.query.SpatialOperation#Intersects} and {@link
|
||||
* SpatialOperation#IsWithin} is supported.</li>
|
||||
* <li>Requires DocValues for
|
||||
* {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} and for
|
||||
* searching with a Circle.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* <b>Implementation:</b>
|
||||
* <p>
|
||||
* This is a simple Strategy. Search works with a pair of range queries on two {@link DoublePoint}s representing
|
||||
* x & y fields. A Circle query does the same bbox query but adds a
|
||||
* ValueSource filter on
|
||||
* {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}.
|
||||
* <p>
|
||||
* One performance shortcoming with this strategy is that a scenario involving
|
||||
* both a search using a Circle and sort will result in calculations for the
|
||||
* spatial distance being done twice -- once for the filter and second for the
|
||||
* sort.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PointVectorStrategy extends SpatialStrategy {
|
||||
|
||||
// note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
|
||||
// create more than one Field.
|
||||
|
||||
/**
|
||||
* pointValues, docValues, and nothing else.
|
||||
*/
|
||||
public static FieldType DEFAULT_FIELDTYPE;
|
||||
|
||||
@Deprecated
|
||||
public static LegacyFieldType LEGACY_FIELDTYPE;
|
||||
static {
|
||||
// Default: pointValues + docValues
|
||||
FieldType type = new FieldType();
|
||||
type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
|
||||
type.setDocValuesType(DocValuesType.NUMERIC);//docValues
|
||||
type.setStored(false);
|
||||
type.freeze();
|
||||
DEFAULT_FIELDTYPE = type;
|
||||
// Legacy default: legacyNumerics
|
||||
LegacyFieldType legacyType = new LegacyFieldType();
|
||||
legacyType.setIndexOptions(IndexOptions.DOCS);
|
||||
legacyType.setNumericType(LegacyNumericType.DOUBLE);
|
||||
legacyType.setNumericPrecisionStep(8);// same as solr default
|
||||
legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
|
||||
legacyType.setStored(false);
|
||||
legacyType.freeze();
|
||||
LEGACY_FIELDTYPE = legacyType;
|
||||
}
|
||||
|
||||
public static final String SUFFIX_X = "__x";
|
||||
public static final String SUFFIX_Y = "__y";
|
||||
|
||||
private final String fieldNameX;
|
||||
private final String fieldNameY;
|
||||
|
||||
private final int fieldsLen;
|
||||
private final boolean hasStored;
|
||||
private final boolean hasDocVals;
|
||||
private final boolean hasPointVals;
|
||||
// equiv to "hasLegacyNumerics":
|
||||
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
|
||||
|
||||
/**
|
||||
* Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
|
||||
*/
|
||||
public static PointVectorStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
|
||||
* However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
|
||||
* since that requires docValues (the legacy config didn't have that).
|
||||
*
|
||||
* @deprecated LegacyNumerics will be removed
|
||||
*/
|
||||
@Deprecated
|
||||
public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
|
||||
return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
|
||||
* a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
|
||||
* is configurable this way too.
|
||||
*/
|
||||
public PointVectorStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
|
||||
super(ctx, fieldNamePrefix);
|
||||
this.fieldNameX = fieldNamePrefix+SUFFIX_X;
|
||||
this.fieldNameY = fieldNamePrefix+SUFFIX_Y;
|
||||
|
||||
int numPairs = 0;
|
||||
if ((this.hasStored = fieldType.stored())) {
|
||||
numPairs++;
|
||||
}
|
||||
if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
|
||||
numPairs++;
|
||||
}
|
||||
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
|
||||
numPairs++;
|
||||
}
|
||||
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
|
||||
if (hasPointVals) {
|
||||
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
|
||||
}
|
||||
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
|
||||
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
|
||||
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
|
||||
}
|
||||
numPairs++;
|
||||
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
|
||||
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
|
||||
legacyNumericFieldType.freeze();
|
||||
} else {
|
||||
legacyNumericFieldType = null;
|
||||
}
|
||||
this.fieldsLen = numPairs * 2;
|
||||
}
|
||||
|
||||
|
||||
String getFieldNameX() {
|
||||
return fieldNameX;
|
||||
}
|
||||
|
||||
String getFieldNameY() {
|
||||
return fieldNameY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Field[] createIndexableFields(Shape shape) {
|
||||
if (shape instanceof Point)
|
||||
return createIndexableFields((Point) shape);
|
||||
throw new UnsupportedOperationException("Can only index Point, not " + shape);
|
||||
}
|
||||
|
||||
/** @see #createIndexableFields(org.locationtech.spatial4j.shape.Shape) */
|
||||
public Field[] createIndexableFields(Point point) {
|
||||
Field[] fields = new Field[fieldsLen];
|
||||
int idx = -1;
|
||||
if (hasStored) {
|
||||
fields[++idx] = new StoredField(fieldNameX, point.getX());
|
||||
fields[++idx] = new StoredField(fieldNameY, point.getY());
|
||||
}
|
||||
if (hasDocVals) {
|
||||
fields[++idx] = new DoubleDocValuesField(fieldNameX, point.getX());
|
||||
fields[++idx] = new DoubleDocValuesField(fieldNameY, point.getY());
|
||||
}
|
||||
if (hasPointVals) {
|
||||
fields[++idx] = new DoublePoint(fieldNameX, point.getX());
|
||||
fields[++idx] = new DoublePoint(fieldNameY, point.getY());
|
||||
}
|
||||
if (legacyNumericFieldType != null) {
|
||||
fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
|
||||
fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
|
||||
}
|
||||
assert idx == fields.length - 1;
|
||||
return fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
|
||||
return new DistanceValueSource(this, queryPoint, multiplier);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ConstantScoreQuery makeQuery(SpatialArgs args) {
|
||||
if(! SpatialOperation.is( args.getOperation(),
|
||||
SpatialOperation.Intersects,
|
||||
SpatialOperation.IsWithin ))
|
||||
throw new UnsupportedSpatialOperation(args.getOperation());
|
||||
Shape shape = args.getShape();
|
||||
if (shape instanceof Rectangle) {
|
||||
Rectangle bbox = (Rectangle) shape;
|
||||
return new ConstantScoreQuery(makeWithin(bbox));
|
||||
} else if (shape instanceof Circle) {
|
||||
Circle circle = (Circle)shape;
|
||||
Rectangle bbox = circle.getBoundingBox();
|
||||
Query approxQuery = makeWithin(bbox);
|
||||
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
|
||||
FunctionRangeQuery vsRangeQuery =
|
||||
new FunctionRangeQuery(makeDistanceValueSource(circle.getCenter()), 0.0, circle.getRadius(), true, true);
|
||||
bqBuilder.add(approxQuery, BooleanClause.Occur.FILTER);//should have lowest "cost" value; will drive iteration
|
||||
bqBuilder.add(vsRangeQuery, BooleanClause.Occur.FILTER);
|
||||
return new ConstantScoreQuery(bqBuilder.build());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Only Rectangles and Circles are currently supported, " +
|
||||
"found [" + shape.getClass() + "]");//TODO
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a query to retrieve documents that fully contain the input envelope.
|
||||
*/
|
||||
private Query makeWithin(Rectangle bbox) {
|
||||
BooleanQuery.Builder bq = new BooleanQuery.Builder();
|
||||
BooleanClause.Occur MUST = BooleanClause.Occur.MUST;
|
||||
if (bbox.getCrossesDateLine()) {
|
||||
//use null as performance trick since no data will be beyond the world bounds
|
||||
bq.add(rangeQuery(fieldNameX, null/*-180*/, bbox.getMaxX()), BooleanClause.Occur.SHOULD );
|
||||
bq.add(rangeQuery(fieldNameX, bbox.getMinX(), null/*+180*/), BooleanClause.Occur.SHOULD );
|
||||
bq.setMinimumNumberShouldMatch(1);//must match at least one of the SHOULD
|
||||
} else {
|
||||
bq.add(rangeQuery(fieldNameX, bbox.getMinX(), bbox.getMaxX()), MUST);
|
||||
}
|
||||
bq.add(rangeQuery(fieldNameY, bbox.getMinY(), bbox.getMaxY()), MUST);
|
||||
return bq.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a numeric range query based on FieldType
|
||||
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
|
||||
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
|
||||
*/
|
||||
private Query rangeQuery(String fieldName, Double min, Double max) {
|
||||
if (hasPointVals) {
|
||||
if (min == null) {
|
||||
min = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
|
||||
if (max == null) {
|
||||
max = Double.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
return DoublePoint.newRangeQuery(fieldName, min, max);
|
||||
|
||||
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
|
||||
return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
|
||||
}
|
||||
//TODO try doc-value range query?
|
||||
throw new UnsupportedOperationException("An index is required for this operation.");
|
||||
}
|
||||
}
|
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 3.1 KiB |
Before Width: | Height: | Size: 3.6 KiB After Width: | Height: | Size: 3.6 KiB |
|
@ -18,4 +18,4 @@
|
|||
/**
|
||||
* Deprecated stuff!
|
||||
*/
|
||||
package org.apache.lucene.legacy;
|
||||
package org.apache.solr.legacy;
|
|
@ -28,6 +28,7 @@ import java.util.TreeMap;
|
|||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.FlattenGraphFilterFactory; // javadocs
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
|
@ -50,7 +51,11 @@ import org.slf4j.LoggerFactory;
|
|||
/**
|
||||
* TokenFilterFactory and ManagedResource implementation for
|
||||
* doing CRUD on synonyms using the REST API.
|
||||
*
|
||||
* @deprecated Use {@link ManagedSynonymGraphFilterFactory} instead, but be sure to also
|
||||
* use {@link FlattenGraphFilterFactory} at index time (not at search time) as well.
|
||||
*/
|
||||
@Deprecated
|
||||
public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
|
|
@ -0,0 +1,437 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.rest.schema.analysis;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.text.ParseException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.rest.BaseSolrResource;
|
||||
import org.apache.solr.rest.ManagedResource;
|
||||
import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
|
||||
import org.restlet.data.Status;
|
||||
import org.restlet.resource.ResourceException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
/**
|
||||
* TokenFilterFactory and ManagedResource implementation for
|
||||
* doing CRUD on synonyms using the REST API.
|
||||
*/
|
||||
public class ManagedSynonymGraphFilterFactory extends BaseManagedTokenFilterFactory {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
public static final String SYNONYM_MAPPINGS = "synonymMappings";
|
||||
public static final String IGNORE_CASE_INIT_ARG = "ignoreCase";
|
||||
|
||||
/**
|
||||
* Used internally to preserve the case of synonym mappings regardless
|
||||
* of the ignoreCase setting.
|
||||
*/
|
||||
private static class CasePreservedSynonymMappings {
|
||||
Map<String,Set<String>> mappings = new TreeMap<>();
|
||||
|
||||
/**
|
||||
* Provides a view of the mappings for a given term; specifically, if
|
||||
* ignoreCase is true, then the returned "view" contains the mappings
|
||||
* for all known cases of the term, if it is false, then only the
|
||||
* mappings for the specific case is returned.
|
||||
*/
|
||||
Set<String> getMappings(boolean ignoreCase, String key) {
|
||||
Set<String> synMappings = null;
|
||||
if (ignoreCase) {
|
||||
// TODO: should we return the mapped values in all lower-case here?
|
||||
if (mappings.size() == 1) {
|
||||
// if only one in the map (which is common) just return it directly
|
||||
return mappings.values().iterator().next();
|
||||
}
|
||||
|
||||
synMappings = new TreeSet<>();
|
||||
for (Set<String> next : mappings.values())
|
||||
synMappings.addAll(next);
|
||||
} else {
|
||||
synMappings = mappings.get(key);
|
||||
}
|
||||
return synMappings;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return mappings.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ManagedResource implementation for synonyms, which are so specialized that
|
||||
* it makes sense to implement this class as an inner class as it has little
|
||||
* application outside the SynonymFilterFactory use cases.
|
||||
*/
|
||||
public static class SynonymManager extends ManagedResource
|
||||
implements ManagedResource.ChildResourceSupport
|
||||
{
|
||||
protected Map<String,CasePreservedSynonymMappings> synonymMappings;
|
||||
|
||||
public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO)
|
||||
throws SolrException {
|
||||
super(resourceId, loader, storageIO);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
protected void onManagedDataLoadedFromStorage(NamedList<?> managedInitArgs, Object managedData)
|
||||
throws SolrException
|
||||
{
|
||||
NamedList<Object> initArgs = (NamedList<Object>)managedInitArgs;
|
||||
|
||||
String format = (String)initArgs.get("format");
|
||||
if (format != null && !"solr".equals(format)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid format "+
|
||||
format+"! Only 'solr' is supported.");
|
||||
}
|
||||
|
||||
// the default behavior is to not ignore case,
|
||||
// so if not supplied, then install the default
|
||||
if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) {
|
||||
initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE);
|
||||
}
|
||||
|
||||
boolean ignoreCase = getIgnoreCase(managedInitArgs);
|
||||
synonymMappings = new TreeMap<>();
|
||||
if (managedData != null) {
|
||||
Map<String,Object> storedSyns = (Map<String,Object>)managedData;
|
||||
for (String key : storedSyns.keySet()) {
|
||||
|
||||
String caseKey = applyCaseSetting(ignoreCase, key);
|
||||
CasePreservedSynonymMappings cpsm = synonymMappings.get(caseKey);
|
||||
if (cpsm == null) {
|
||||
cpsm = new CasePreservedSynonymMappings();
|
||||
synonymMappings.put(caseKey, cpsm);
|
||||
}
|
||||
|
||||
// give the nature of our JSON parsing solution, we really have
|
||||
// no guarantees on what is in the file
|
||||
Object mapping = storedSyns.get(key);
|
||||
if (!(mapping instanceof List)) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Invalid synonym file format! Expected a list of synonyms for "+key+
|
||||
" but got "+mapping.getClass().getName());
|
||||
}
|
||||
|
||||
Set<String> sortedVals = new TreeSet<>();
|
||||
sortedVals.addAll((List<String>)storedSyns.get(key));
|
||||
cpsm.mappings.put(key, sortedVals);
|
||||
}
|
||||
}
|
||||
log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId());
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
protected Object applyUpdatesToManagedData(Object updates) {
|
||||
boolean ignoreCase = getIgnoreCase();
|
||||
boolean madeChanges = false;
|
||||
if (updates instanceof List) {
|
||||
madeChanges = applyListUpdates((List<String>)updates, ignoreCase);
|
||||
} else if (updates instanceof Map) {
|
||||
madeChanges = applyMapUpdates((Map<String,Object>)updates, ignoreCase);
|
||||
} else {
|
||||
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST,
|
||||
"Unsupported data format (" + updates.getClass().getName() + "); expected a JSON object (Map or List)!");
|
||||
}
|
||||
return madeChanges ? getStoredView() : null;
|
||||
}
|
||||
|
||||
protected boolean applyListUpdates(List<String> jsonList, boolean ignoreCase) {
|
||||
boolean madeChanges = false;
|
||||
for (String term : jsonList) {
|
||||
// find the mappings using the case aware key
|
||||
String origTerm = term;
|
||||
term = applyCaseSetting(ignoreCase, term);
|
||||
CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
|
||||
if (cpsm == null)
|
||||
cpsm = new CasePreservedSynonymMappings();
|
||||
|
||||
Set<String> treeTerms = new TreeSet<>();
|
||||
treeTerms.addAll(jsonList);
|
||||
cpsm.mappings.put(origTerm, treeTerms);
|
||||
madeChanges = true;
|
||||
// only add the cpsm to the synonymMappings if it has valid data
|
||||
if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
|
||||
synonymMappings.put(term, cpsm);
|
||||
}
|
||||
}
|
||||
return madeChanges;
|
||||
}
|
||||
|
||||
protected boolean applyMapUpdates(Map<String,Object> jsonMap, boolean ignoreCase) {
|
||||
boolean madeChanges = false;
|
||||
|
||||
for (String term : jsonMap.keySet()) {
|
||||
|
||||
String origTerm = term;
|
||||
term = applyCaseSetting(ignoreCase, term);
|
||||
|
||||
// find the mappings using the case aware key
|
||||
CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
|
||||
if (cpsm == null)
|
||||
cpsm = new CasePreservedSynonymMappings();
|
||||
|
||||
Set<String> output = cpsm.mappings.get(origTerm);
|
||||
|
||||
Object val = jsonMap.get(origTerm); // IMPORTANT: use the original
|
||||
if (val instanceof String) {
|
||||
String strVal = (String)val;
|
||||
|
||||
if (output == null) {
|
||||
output = new TreeSet<>();
|
||||
cpsm.mappings.put(origTerm, output);
|
||||
}
|
||||
|
||||
if (output.add(strVal)) {
|
||||
madeChanges = true;
|
||||
}
|
||||
} else if (val instanceof List) {
|
||||
List<String> vals = (List<String>)val;
|
||||
|
||||
if (output == null) {
|
||||
output = new TreeSet<>();
|
||||
cpsm.mappings.put(origTerm, output);
|
||||
}
|
||||
|
||||
for (String nextVal : vals) {
|
||||
if (output.add(nextVal)) {
|
||||
madeChanges = true;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+
|
||||
" for "+term+"; expected single value or a JSON array!");
|
||||
}
|
||||
|
||||
// only add the cpsm to the synonymMappings if it has valid data
|
||||
if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
|
||||
synonymMappings.put(term, cpsm);
|
||||
}
|
||||
}
|
||||
|
||||
return madeChanges;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a Map of how we store and load data managed by this resource,
|
||||
* which is different than how it is managed at runtime in order to support
|
||||
* the ignoreCase setting.
|
||||
*/
|
||||
protected Map<String,Set<String>> getStoredView() {
|
||||
Map<String,Set<String>> storedView = new TreeMap<>();
|
||||
for (CasePreservedSynonymMappings cpsm : synonymMappings.values()) {
|
||||
for (String key : cpsm.mappings.keySet()) {
|
||||
storedView.put(key, cpsm.mappings.get(key));
|
||||
}
|
||||
}
|
||||
return storedView;
|
||||
}
|
||||
|
||||
protected String applyCaseSetting(boolean ignoreCase, String str) {
|
||||
return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str;
|
||||
}
|
||||
|
||||
public boolean getIgnoreCase() {
|
||||
return getIgnoreCase(managedInitArgs);
|
||||
}
|
||||
|
||||
public boolean getIgnoreCase(NamedList<?> initArgs) {
|
||||
Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG);
|
||||
// ignoreCase = false by default
|
||||
return null == ignoreCase ? false : ignoreCase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void doGet(BaseSolrResource endpoint, String childId) {
|
||||
SolrQueryResponse response = endpoint.getSolrResponse();
|
||||
if (childId != null) {
|
||||
boolean ignoreCase = getIgnoreCase();
|
||||
String key = applyCaseSetting(ignoreCase, childId);
|
||||
|
||||
// if ignoreCase==true, then we get the mappings using the lower-cased key
|
||||
// and then return a union of all case-sensitive keys, if false, then
|
||||
// we only return the mappings for the exact case requested
|
||||
CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
|
||||
Set<String> mappings = (cpsm != null) ? cpsm.getMappings(ignoreCase, childId) : null;
|
||||
if (mappings == null)
|
||||
throw new SolrException(ErrorCode.NOT_FOUND,
|
||||
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
|
||||
|
||||
response.add(childId, mappings);
|
||||
} else {
|
||||
response.add(SYNONYM_MAPPINGS, buildMapToStore(getStoredView()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) {
|
||||
boolean ignoreCase = getIgnoreCase();
|
||||
String key = applyCaseSetting(ignoreCase, childId);
|
||||
|
||||
CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
|
||||
if (cpsm == null)
|
||||
throw new SolrException(ErrorCode.NOT_FOUND,
|
||||
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
|
||||
|
||||
if (ignoreCase) {
|
||||
// delete all mappings regardless of case
|
||||
synonymMappings.remove(key);
|
||||
} else {
|
||||
// just delete the mappings for the specific case-sensitive key
|
||||
if (cpsm.mappings.containsKey(childId)) {
|
||||
cpsm.mappings.remove(childId);
|
||||
|
||||
if (cpsm.mappings.isEmpty())
|
||||
synonymMappings.remove(key);
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.NOT_FOUND,
|
||||
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
|
||||
}
|
||||
}
|
||||
|
||||
// store the updated data (using the stored view)
|
||||
storeManagedData(getStoredView());
|
||||
|
||||
log.info("Removed synonym mappings for: {}", childId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom SynonymMap.Parser implementation that provides synonym
|
||||
* mappings from the managed JSON in this class during SynonymMap
|
||||
* building.
|
||||
*/
|
||||
private class ManagedSynonymParser extends SynonymMap.Parser {
|
||||
|
||||
SynonymManager synonymManager;
|
||||
|
||||
public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) {
|
||||
super(dedup, analyzer);
|
||||
this.synonymManager = synonymManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the managed synonyms and their mappings into the SynonymMap builder.
|
||||
*/
|
||||
@Override
|
||||
public void parse(Reader in) throws IOException, ParseException {
|
||||
boolean ignoreCase = synonymManager.getIgnoreCase();
|
||||
for (CasePreservedSynonymMappings cpsm : synonymManager.synonymMappings.values()) {
|
||||
for (String term : cpsm.mappings.keySet()) {
|
||||
for (String mapping : cpsm.mappings.get(term)) {
|
||||
// apply the case setting to match the behavior of the SynonymMap builder
|
||||
CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder());
|
||||
CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
|
||||
add(casedTerm, casedMapping, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected SynonymGraphFilterFactory delegate;
|
||||
|
||||
public ManagedSynonymGraphFilterFactory(Map<String,String> args) {
|
||||
super(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getResourceId() {
|
||||
return "/schema/analysis/synonyms/"+handle;
|
||||
}
|
||||
|
||||
protected Class<? extends ManagedResource> getManagedResourceImplClass() {
|
||||
return SynonymManager.class;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called once, during core initialization, to initialize any analysis components
|
||||
* that depend on the data managed by this resource. It is important that the
|
||||
* analysis component is only initialized once during core initialization so that
|
||||
* text analysis is consistent, especially in a distributed environment, as we
|
||||
* don't want one server applying a different set of stop words than other servers.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res)
|
||||
throws SolrException
|
||||
{
|
||||
NamedList<Object> args = (NamedList<Object>)initArgs;
|
||||
args.add("synonyms", getResourceId());
|
||||
args.add("expand", "false");
|
||||
args.add("format", "solr");
|
||||
|
||||
Map<String,String> filtArgs = new HashMap<>();
|
||||
for (Map.Entry<String,?> entry : args) {
|
||||
filtArgs.put(entry.getKey(), entry.getValue().toString());
|
||||
}
|
||||
// create the actual filter factory that pulls the synonym mappings
|
||||
// from synonymMappings using a custom parser implementation
|
||||
delegate = new SynonymGraphFilterFactory(filtArgs) {
|
||||
@Override
|
||||
protected SynonymMap loadSynonyms
|
||||
(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
|
||||
throws IOException, ParseException {
|
||||
|
||||
ManagedSynonymParser parser =
|
||||
new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
|
||||
// null is safe here because there's no actual parsing done against a input Reader
|
||||
parser.parse(null);
|
||||
return parser.build();
|
||||
}
|
||||
};
|
||||
try {
|
||||
delegate.inform(res.getResourceLoader());
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
if (delegate == null)
|
||||
throw new IllegalStateException(this.getClass().getName()+
|
||||
" not initialized correctly! The SynonymFilterFactory delegate was not initialized.");
|
||||
|
||||
return delegate.create(input);
|
||||
}
|
||||
}
|
|
@ -23,10 +23,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.solr.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource;
|
||||
import org.apache.lucene.spatial.bbox.BBoxStrategy;
|
||||
import org.apache.solr.legacy.BBoxStrategy;
|
||||
import org.apache.lucene.spatial.query.SpatialArgs;
|
||||
import org.apache.lucene.spatial.util.ShapeAreaValueSource;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
|
|
@ -35,11 +35,11 @@ import javax.xml.xpath.XPathFactory;
|
|||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.legacy.LegacyIntField;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyFieldType;
|
||||
import org.apache.solr.legacy.LegacyIntField;
|
||||
import org.apache.solr.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.solr.legacy.LegacyNumericType;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
|
|
|
@ -373,7 +373,7 @@ public class IndexSchema {
|
|||
void persist(Writer writer) throws IOException {
|
||||
final SolrQueryResponse response = new SolrQueryResponse();
|
||||
response.add(IndexSchema.SCHEMA, getNamedPropertyValues());
|
||||
final NamedList args = new NamedList(Arrays.<Object>asList("indent", "on"));
|
||||
final SolrParams args = (new ModifiableSolrParams()).set("indent", "on");
|
||||
final LocalSolrQueryRequest req = new LocalSolrQueryRequest(null, args);
|
||||
final SchemaXmlWriter schemaXmlWriter = new SchemaXmlWriter(writer, req, response);
|
||||
schemaXmlWriter.setEmitManagedSchemaDoNotEditWarning(true);
|
||||
|
|
|
@ -20,8 +20,8 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.spatial.vector.PointVectorStrategy;
|
||||
import org.apache.solr.legacy.LegacyFieldType;
|
||||
import org.apache.solr.legacy.PointVectorStrategy;
|
||||
|
||||
/**
|
||||
* @see PointVectorStrategy
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
|
||||
|
|
|
@ -30,14 +30,14 @@ import org.apache.lucene.document.NumericDocValuesField;
|
|||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.legacy.LegacyDoubleField;
|
||||
import org.apache.lucene.legacy.LegacyFieldType;
|
||||
import org.apache.lucene.legacy.LegacyFloatField;
|
||||
import org.apache.lucene.legacy.LegacyIntField;
|
||||
import org.apache.lucene.legacy.LegacyLongField;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericType;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyDoubleField;
|
||||
import org.apache.solr.legacy.LegacyFieldType;
|
||||
import org.apache.solr.legacy.LegacyFloatField;
|
||||
import org.apache.solr.legacy.LegacyIntField;
|
||||
import org.apache.solr.legacy.LegacyLongField;
|
||||
import org.apache.solr.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.solr.legacy.LegacyNumericType;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
|
@ -63,9 +63,9 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
/**
|
||||
* Provides field types to support for Lucene's {@link
|
||||
* org.apache.lucene.legacy.LegacyIntField}, {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} and
|
||||
* {@link org.apache.lucene.legacy.LegacyDoubleField}.
|
||||
* See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details.
|
||||
* org.apache.solr.legacy.LegacyIntField}, {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} and
|
||||
* {@link org.apache.solr.legacy.LegacyDoubleField}.
|
||||
* See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details.
|
||||
* It supports integer, float, long, double and date types.
|
||||
* <p>
|
||||
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
|
||||
|
@ -78,7 +78,7 @@ import org.slf4j.LoggerFactory;
|
|||
* generated, range search will be no faster than any other number field, but sorting will still be possible.
|
||||
*
|
||||
*
|
||||
* @see org.apache.lucene.legacy.LegacyNumericRangeQuery
|
||||
* @see org.apache.solr.legacy.LegacyNumericRangeQuery
|
||||
* @since solr 1.4
|
||||
*/
|
||||
public class TrieField extends NumericFieldType {
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.IntDocValues;
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.docvalues.LongDocValues;
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queryparser.xml.DOMUtils;
|
||||
import org.apache.lucene.queryparser.xml.ParserException;
|
||||
import org.apache.lucene.queryparser.xml.QueryBuilder;
|
||||
|
@ -26,10 +26,10 @@ import org.apache.lucene.queryparser.xml.builders.PointRangeQueryBuilder;
|
|||
import org.w3c.dom.Element;
|
||||
|
||||
/**
|
||||
* Creates a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The table below specifies the required
|
||||
* Creates a {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The table below specifies the required
|
||||
* attributes and the defaults if optional attributes are omitted. For more
|
||||
* detail on what each of the attributes actually do, consult the documentation
|
||||
* for {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}:
|
||||
* for {@link org.apache.solr.legacy.LegacyNumericRangeQuery}:
|
||||
* <table summary="supported attributes">
|
||||
* <tr>
|
||||
* <th>Attribute name</th>
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.solr.legacy.LegacyNumericRangeQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.util.Bits;
|
|||
* Constrains search results to only match those which also match a provided
|
||||
* query.
|
||||
*
|
||||
* <p> This could be used, for example, with a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} on a suitably
|
||||
* <p> This could be used, for example, with a {@link org.apache.solr.legacy.LegacyNumericRangeQuery} on a suitably
|
||||
* formatted date field to implement date filtering. One could re-use a single
|
||||
* CachingWrapperFilter(QueryWrapperFilter) that matches, e.g., only documents modified
|
||||
* within the last week. This would only need to be reconstructed once per day.
|
||||
|
|
|
@ -52,7 +52,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
|
|||
*/
|
||||
@Override
|
||||
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
|
||||
SortSpec ss = rb.getSortSpec();
|
||||
SortSpec groupSortSpec = rb.getGroupingSpec().getGroupSortSpec();
|
||||
Sort groupSort = rb.getGroupingSpec().getGroupSort();
|
||||
final String[] fields = rb.getGroupingSpec().getFields();
|
||||
Sort withinGroupSort = rb.getGroupingSpec().getSortWithinGroup();
|
||||
|
@ -144,7 +144,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
|
|||
rb.firstPhaseElapsedTime = maxElapsedTime;
|
||||
for (String groupField : commandSearchGroups.keySet()) {
|
||||
List<Collection<SearchGroup<BytesRef>>> topGroups = commandSearchGroups.get(groupField);
|
||||
Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(topGroups, ss.getOffset(), ss.getCount(), groupSort);
|
||||
Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(topGroups, groupSortSpec.getOffset(), groupSortSpec.getCount(), groupSort);
|
||||
if (mergedTopGroups == null) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.regex.Pattern;
|
|||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
*/
|
||||
package org.apache.solr.search.mlt;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.queries.mlt.MoreLikeThis;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.legacy.LegacyNumericUtils;
|
||||
import org.apache.solr.legacy.LegacyNumericUtils;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -159,8 +159,8 @@ public interface FieldCache {
|
|||
};
|
||||
|
||||
/**
|
||||
* A parser instance for int values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.legacy.LegacyIntField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* A parser instance for int values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.solr.legacy.LegacyIntField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
|
@ -182,8 +182,8 @@ public interface FieldCache {
|
|||
};
|
||||
|
||||
/**
|
||||
* A parser instance for float values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.legacy.LegacyFloatField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* A parser instance for float values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.solr.legacy.LegacyFloatField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
|
@ -207,8 +207,8 @@ public interface FieldCache {
|
|||
};
|
||||
|
||||
/**
|
||||
* A parser instance for long values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.legacy.LegacyLongField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* A parser instance for long values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.solr.legacy.LegacyLongField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
|
@ -229,8 +229,8 @@ public interface FieldCache {
|
|||
};
|
||||
|
||||
/**
|
||||
* A parser instance for double values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.lucene.legacy.LegacyDoubleField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
|
||||
* A parser instance for double values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
|
||||
* via {@link org.apache.solr.legacy.LegacyDoubleField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
|
||||
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
|
@ -277,7 +277,7 @@ public interface FieldCache {
|
|||
* @param parser
|
||||
* Computes long for string values. May be {@code null} if the
|
||||
* requested field was indexed as {@link NumericDocValuesField} or
|
||||
* {@link org.apache.lucene.legacy.LegacyLongField}.
|
||||
* {@link org.apache.solr.legacy.LegacyLongField}.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException
|
||||
* If any error occurs.
|
||||
|
|
|
@ -87,7 +87,7 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
*/
|
||||
DOUBLE_POINT,
|
||||
/**
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
|
||||
* Single-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
|
@ -96,7 +96,7 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
@Deprecated
|
||||
LEGACY_INTEGER,
|
||||
/**
|
||||
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
|
||||
* Single-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
|
@ -105,7 +105,7 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
@Deprecated
|
||||
LEGACY_LONG,
|
||||
/**
|
||||
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
|
||||
* Single-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
|
@ -114,7 +114,7 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
@Deprecated
|
||||
LEGACY_FLOAT,
|
||||
/**
|
||||
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
|
||||
* Single-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link NumericDocValuesField}.
|
||||
|
@ -144,28 +144,28 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
*/
|
||||
SORTED_SET_BINARY,
|
||||
/**
|
||||
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
|
||||
* Multi-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_INTEGER,
|
||||
/**
|
||||
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
|
||||
* Multi-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_FLOAT,
|
||||
/**
|
||||
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
|
||||
* Multi-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
*/
|
||||
SORTED_SET_LONG,
|
||||
/**
|
||||
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
|
||||
* Multi-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
|
||||
* <p>
|
||||
* Fields with this type act as if they were indexed with
|
||||
* {@link SortedSetDocValuesField}.
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue