This commit is contained in:
Karl Wright 2017-05-31 07:56:14 -04:00
commit 97693234de
209 changed files with 4879 additions and 1310 deletions

View File

@ -296,7 +296,7 @@ def checkSummary(fullPath):
print()
print(fullPath)
printed = True
print(' missing: %s' % unescapeHTML(lastHREF))
print(' missing description: %s' % unescapeHTML(lastHREF))
anyMissing = True
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
if not printed:

View File

@ -57,6 +57,8 @@ API Changes
instead, which derived from the UH. WholeBreakIterator and
CustomSeparatorBreakIterator were moved to UH's package. (David Smiley)
* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
Bug Fixes
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
@ -88,6 +90,10 @@ Optimizations
values using different numbers of bits per value if this proves to save
storage. (Adrien Grand)
* LUCENE-7845: Enhance spatial-extras RecursivePrefixTreeStrategy queries when the
query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
Other
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
@ -99,6 +105,8 @@ Other
* LUCENE-7753: Make fields static when possible.
(Daniel Jelinski via Adrien Grand)
* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi)
======================= Lucene 6.7.0 =======================
Other
@ -107,6 +115,10 @@ Other
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
Uwe Schindler, Dawid Weiss)
Improvements
* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
======================= Lucene 6.6.0 =======================
New Features

View File

@ -74,3 +74,9 @@ collecting TopDocs for each group, but instead takes a GroupReducer that will
perform any type of reduction on the top groups collected on a first-pass. To
reproduce the old behaviour of SecondPassGroupingCollector, you should instead
use TopGroupsCollector.
## Removed legacy numerics (LUCENE-7850)
Support for legacy numerics has been removed since legacy numerics had been
deprecated since Lucene 6.0. Points should be used instead, see
org.apache.lucene.index.PointValues for an introduction.

View File

@ -24,6 +24,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* Emits the entire input as a single token.
*/
@ -41,16 +43,16 @@ public final class KeywordTokenizer extends Tokenizer {
}
public KeywordTokenizer(int bufferSize) {
if (bufferSize <= 0) {
throw new IllegalArgumentException("bufferSize must be > 0");
if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
}
termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeFactory factory, int bufferSize) {
super(factory);
if (bufferSize <= 0) {
throw new IllegalArgumentException("bufferSize must be > 0");
if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
}
termAtt.resizeBuffer(bufferSize);
}

View File

@ -16,26 +16,39 @@
*/
package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* Factory for {@link KeywordTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory"/&gt;
* &lt;tokenizer class="solr.KeywordTokenizerFactory" maxTokenLen="256"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* Options:
* <ul>
* <li>maxTokenLen: max token length, should be greater than 0 and less than
* MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
* else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE</li>
* </ul>
*/
public class KeywordTokenizerFactory extends TokenizerFactory {
private final int maxTokenLen;
/** Creates a new KeywordTokenizerFactory */
public KeywordTokenizerFactory(Map<String,String> args) {
super(args);
maxTokenLen = getInt(args, "maxTokenLen", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -43,6 +56,6 @@ public class KeywordTokenizerFactory extends TokenizerFactory {
@Override
public KeywordTokenizer create(AttributeFactory factory) {
return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
return new KeywordTokenizer(factory, maxTokenLen);
}
}

View File

@ -50,6 +50,20 @@ public class LetterTokenizer extends CharTokenizer {
super(factory);
}
/**
* Construct a new LetterTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param factory the attribute factory to use for this {@link Tokenizer}
* @param maxTokenLen maximum token length the tokenizer will emit.
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
* @throws IllegalArgumentException if maxTokenLen is invalid.
*/
public LetterTokenizer(AttributeFactory factory, int maxTokenLen) {
super(factory, maxTokenLen);
}
/** Collects only characters which satisfy
* {@link Character#isLetter(int)}.*/
@Override

View File

@ -17,25 +17,40 @@
package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* Factory for {@link LetterTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.LetterTokenizerFactory"/&gt;
* &lt;tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="256"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
* Options:
* <ul>
* <li>maxTokenLen: max token length, must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
* </ul>
*/
public class LetterTokenizerFactory extends TokenizerFactory {
private final int maxTokenLen;
/** Creates a new LetterTokenizerFactory */
public LetterTokenizerFactory(Map<String,String> args) {
super(args);
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -43,6 +58,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
@Override
public LetterTokenizer create(AttributeFactory factory) {
return new LetterTokenizer(factory);
return new LetterTokenizer(factory, maxTokenLen);
}
}

View File

@ -50,6 +50,19 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
super(factory);
}
/**
* Construct a new LowerCaseTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param factory the attribute factory to use for this {@link Tokenizer}
* @param maxTokenLen maximum token length the tokenizer will emit.
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
* @throws IllegalArgumentException if maxTokenLen is invalid.
*/
public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
super(factory, maxTokenLen);
}
/** Converts char to lower case
* {@link Character#toLowerCase(int)}.*/
@Override

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
@ -25,20 +26,36 @@ import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap;
import java.util.Map;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* Factory for {@link LowerCaseTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.LowerCaseTokenizerFactory"/&gt;
* &lt;/analyzer&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
* <p>
* Options:
* <ul>
* <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
* </ul>
*/
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
private final int maxTokenLen;
/** Creates a new LowerCaseTokenizerFactory */
public LowerCaseTokenizerFactory(Map<String,String> args) {
/**
* Creates a new LowerCaseTokenizerFactory
*/
public LowerCaseTokenizerFactory(Map<String, String> args) {
super(args);
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -46,11 +63,13 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
@Override
public LowerCaseTokenizer create(AttributeFactory factory) {
return new LowerCaseTokenizer(factory);
return new LowerCaseTokenizer(factory, maxTokenLen);
}
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs()));
Map map = new HashMap<>(getOriginalArgs());
map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
return new LowerCaseFilterFactory(map);
}
}

View File

@ -58,7 +58,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <ul>
* <li><code>wordset</code> - This is the default format, which supports one word per
* line (including any intra-word whitespace) and allows whole line comments
* begining with the "#" character. Blank lines are ignored. See
* beginning with the "#" character. Blank lines are ignored. See
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
* </li>
* <li><code>snowball</code> - This format allows for multiple words specified on each

View File

@ -48,6 +48,19 @@ public final class UnicodeWhitespaceTokenizer extends CharTokenizer {
super(factory);
}
/**
* Construct a new UnicodeWhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param factory the attribute factory to use for this {@link Tokenizer}
* @param maxTokenLen maximum token length the tokenizer will emit.
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
* @throws IllegalArgumentException if maxTokenLen is invalid.
*/
public UnicodeWhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
super(factory, maxTokenLen);
}
/** Collects only characters which do not satisfy Unicode's WHITESPACE property. */
@Override
protected boolean isTokenChar(int c) {

View File

@ -47,6 +47,19 @@ public final class WhitespaceTokenizer extends CharTokenizer {
super(factory);
}
/**
* Construct a new WhitespaceTokenizer using a given
* {@link org.apache.lucene.util.AttributeFactory}.
*
* @param factory the attribute factory to use for this {@link Tokenizer}
* @param maxTokenLen maximum token length the tokenizer will emit.
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
* @throws IllegalArgumentException if maxTokenLen is invalid.
*/
public WhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
super(factory, maxTokenLen);
}
/** Collects only characters which do not satisfy
* {@link Character#isWhitespace(int)}.*/
@Override

View File

@ -22,15 +22,18 @@ import java.util.Collection;
import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* Factory for {@link WhitespaceTokenizer}.
* <pre class="prettyprint">
* &lt;fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"/&gt;
* &lt;tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode" maxTokenLen="256"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*
@ -38,6 +41,9 @@ import org.apache.lucene.util.AttributeFactory;
* <ul>
* <li>rule: either "java" for {@link WhitespaceTokenizer}
* or "unicode" for {@link UnicodeWhitespaceTokenizer}</li>
* <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
* It is rare to need to change this
* else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN</li>
* </ul>
*/
public class WhitespaceTokenizerFactory extends TokenizerFactory {
@ -46,13 +52,17 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
private static final Collection<String> RULE_NAMES = Arrays.asList(RULE_JAVA, RULE_UNICODE);
private final String rule;
private final int maxTokenLen;
/** Creates a new WhitespaceTokenizerFactory */
public WhitespaceTokenizerFactory(Map<String,String> args) {
super(args);
rule = get(args, "rule", RULE_NAMES, RULE_JAVA);
maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
}
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@ -62,9 +72,9 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
public Tokenizer create(AttributeFactory factory) {
switch (rule) {
case RULE_JAVA:
return new WhitespaceTokenizer(factory);
return new WhitespaceTokenizer(factory, maxTokenLen);
case RULE_UNICODE:
return new UnicodeWhitespaceTokenizer(factory);
return new UnicodeWhitespaceTokenizer(factory, maxTokenLen);
default:
throw new AssertionError();
}

View File

@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
/**
* An abstract base class for simple, character-oriented tokenizers.
* <p>
@ -50,6 +52,7 @@ public abstract class CharTokenizer extends Tokenizer {
* Creates a new {@link CharTokenizer} instance
*/
public CharTokenizer() {
this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
}
/**
@ -60,6 +63,23 @@ public abstract class CharTokenizer extends Tokenizer {
*/
public CharTokenizer(AttributeFactory factory) {
super(factory);
this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
}
/**
* Creates a new {@link CharTokenizer} instance
*
* @param factory the attribute factory to use for this {@link Tokenizer}
* @param maxTokenLen maximum token length the tokenizer will emit.
* Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
* @throws IllegalArgumentException if maxTokenLen is invalid.
*/
public CharTokenizer(AttributeFactory factory, int maxTokenLen) {
super(factory);
if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
}
this.maxTokenLen = maxTokenLen;
}
/**
@ -193,8 +213,9 @@ public abstract class CharTokenizer extends Tokenizer {
}
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
private static final int MAX_WORD_LEN = 255;
public static final int DEFAULT_MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
private final int maxTokenLen;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@ -256,7 +277,7 @@ public abstract class CharTokenizer extends Tokenizer {
}
end += charCount;
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test
if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
break;
}
} else if (length > 0) { // at non-Letter w/ chars

View File

@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet;
/**
* This file contains unicode properties used by various {@link CharTokenizer}s.
* The data was created using ICU4J v56.1.0.0
* The data was created using ICU4J v59.1.0.0
* <p>
* Unicode version: 8.0.0.0
* Unicode version: 9.0.0.0
*/
public final class UnicodeProps {
private UnicodeProps() {}
/** Unicode version that was used to generate this file: {@value} */
public static final String UNICODE_VERSION = "8.0.0.0";
public static final String UNICODE_VERSION = "9.0.0.0";
/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE = createBits(

View File

@ -53,7 +53,7 @@
<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
characters as described before, between any two word characters a digit
in the range 0 to 9 may be specified. The absence of a digit is equivalent
to zero. The '.' character is reserved to indicate begining or ending
to zero. The '.' character is reserved to indicate beginning or ending
of words. -->
<!ELEMENT patterns (#PCDATA)>

View File

@ -54,7 +54,7 @@
<!-- The hyphenation patterns, space separated. A pattern is made of 'equivalent'
characters as described before, between any two word characters a digit
in the range 0 to 9 may be specified. The absence of a digit is equivalent
to zero. The '.' character is reserved to indicate begining or ending
to zero. The '.' character is reserved to indicate beginning or ending
of words. -->
<!ELEMENT patterns (#PCDATA)>

View File

@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.core;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeFactory;
public class TestKeywordTokenizer extends BaseTokenStreamTestCase {
public void testSimple() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
KeywordTokenizer tokenizer = new KeywordTokenizer();
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer \ud801\udc1ctest"});
}
public void testFactory() {
Map<String, String> args = new HashMap<>();
KeywordTokenizerFactory factory = new KeywordTokenizerFactory(args);
AttributeFactory attributeFactory = newAttributeFactory();
Tokenizer tokenizer = factory.create(attributeFactory);
assertEquals(KeywordTokenizer.class, tokenizer.getClass());
}
private Map<String, String> makeArgs(String... args) {
Map<String, String> ret = new HashMap<>();
for (int idx = 0; idx < args.length; idx += 2) {
ret.put(args[idx], args[idx + 1]);
}
return ret;
}
public void testParamsFactory() throws IOException {
// negative maxTokenLen
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "-1")));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
// zero maxTokenLen
iae = expectThrows(IllegalArgumentException.class, () ->
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "0")));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
// Added random param, should throw illegal error
iae = expectThrows(IllegalArgumentException.class, () ->
new KeywordTokenizerFactory(makeArgs("maxTokenLen", "255", "randomParam", "rValue")));
assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
// tokeniser will never split, no matter what is passed,
// but the buffer will not be more than length of the token
KeywordTokenizerFactory factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "5"));
AttributeFactory attributeFactory = newAttributeFactory();
Tokenizer tokenizer = factory.create(attributeFactory);
StringReader reader = new StringReader("Tokenizertest");
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"Tokenizertest"});
// tokeniser will never split, no matter what is passed,
// but the buffer will not be more than length of the token
factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "2"));
attributeFactory = newAttributeFactory();
tokenizer = factory.create(attributeFactory);
reader = new StringReader("Tokenizer\u00A0test");
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer\u00A0test"});
}
}

View File

@ -54,4 +54,55 @@ public class TestUnicodeWhitespaceTokenizer extends BaseTokenStreamTestCase {
assertEquals(UnicodeWhitespaceTokenizer.class, tokenizer.getClass());
}
private Map<String, String> makeArgs(String... args) {
Map<String, String> ret = new HashMap<>();
for (int idx = 0; idx < args.length; idx += 2) {
ret.put(args[idx], args[idx + 1]);
}
return ret;
}
public void testParamsFactory() throws IOException {
// negative maxTokenLen
IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "-1")));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
// zero maxTokenLen
iae = expectThrows(IllegalArgumentException.class, () ->
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "0")));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
// Added random param, should throw illegal error
iae = expectThrows(IllegalArgumentException.class, () ->
new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "255", "randomParam", "rValue")));
assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
// tokeniser will split at 5, Token | izer, no matter what happens
WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "5"));
AttributeFactory attributeFactory = newAttributeFactory();
Tokenizer tokenizer = factory.create(attributeFactory);
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"Token", "izer", "\ud801\udc1ctes", "t"});
// tokeniser will split at 2, To | ke | ni | ze | r, no matter what happens
factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "2"));
attributeFactory = newAttributeFactory();
tokenizer = factory.create(attributeFactory);
reader = new StringReader("Tokenizer\u00A0test");
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"To", "ke", "ni", "ze", "r", "te", "st"});
// tokeniser will split at 10, no matter what happens,
// but tokens' length are less than that
factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "10"));
attributeFactory = newAttributeFactory();
tokenizer = factory.create(attributeFactory);
reader = new StringReader("Tokenizer\u00A0test");
tokenizer.setReader(reader);
assertTokenStreamContents(tokenizer, new String[]{"Tokenizer", "test"});
}
}

View File

@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.TestUtil;
@ -90,6 +92,99 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
/*
* tests the max word length passed as parameter - tokenizer will split at the passed position char no matter what happens
*/
public void testCustomMaxTokenLength() throws IOException {
StringBuilder builder = new StringBuilder();
for (int i = 0; i < 100; i++) {
builder.append("A");
}
Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
// Tricky, passing two copies of the string to the reader....
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT),
builder.toString().toLowerCase(Locale.ROOT) });
Exception e = expectThrows(IllegalArgumentException.class, () ->
new LowerCaseTokenizer(newAttributeFactory(), -1));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString(), builder.toString()});
// Let's test that we can get a token longer than 255 through.
builder.setLength(0);
for (int i = 0; i < 500; i++) {
builder.append("Z");
}
tokenizer = new LetterTokenizer(newAttributeFactory(), 500);
tokenizer.setReader(new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
// Just to be sure what is happening here, token lengths of zero make no sense,
// Let's try the edge cases, token > I/O buffer (4096)
builder.setLength(0);
for (int i = 0; i < 600; i++) {
builder.append("aUrOkIjq"); // 600 * 8 = 4800 chars.
}
e = expectThrows(IllegalArgumentException.class, () ->
new LowerCaseTokenizer(newAttributeFactory(), 0));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
e = expectThrows(IllegalArgumentException.class, () ->
new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
tokenizer.setReader(new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
e = expectThrows(IllegalArgumentException.class, () ->
new KeywordTokenizer(newAttributeFactory(), 0));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
e = expectThrows(IllegalArgumentException.class, () ->
new KeywordTokenizer(newAttributeFactory(), 10_000_000));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
tokenizer = new KeywordTokenizer(newAttributeFactory(), 4800);
tokenizer.setReader(new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
e = expectThrows(IllegalArgumentException.class, () ->
new LetterTokenizer(newAttributeFactory(), 0));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
e = expectThrows(IllegalArgumentException.class, () ->
new LetterTokenizer(newAttributeFactory(), 2_000_000));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 2000000", e.getMessage());
tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
tokenizer.setReader(new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
e = expectThrows(IllegalArgumentException.class, () ->
new WhitespaceTokenizer(newAttributeFactory(), 0));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
e = expectThrows(IllegalArgumentException.class, () ->
new WhitespaceTokenizer(newAttributeFactory(), 3_000_000));
assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 3000000", e.getMessage());
tokenizer = new WhitespaceTokenizer(newAttributeFactory(), 4800);
tokenizer.setReader(new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
}
/*
* tests the max word length of 255 with a surrogate pair at position 255
*/

View File

@ -168,11 +168,14 @@ FFE3>
1134D>
11366..1136C>
11370..11374>
11442>
11446>
114C2..114C3>
115BF..115C0>
1163F>
116B6..116B7>
1172B>
11C3F>
16AF0..16AF4>
16F8F..16F9F>
1D167..1D169>
@ -181,6 +184,8 @@ FFE3>
1D185..1D18B>
1D1AA..1D1AD>
1E8D0..1E8D6>
1E944..1E946>
1E948..1E94A>
# Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter

View File

@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
112F7>0037 # KHUDAWADI DIGIT SEVEN
112F8>0038 # KHUDAWADI DIGIT EIGHT
112F9>0039 # KHUDAWADI DIGIT NINE
11450>0030 # NEWA DIGIT ZERO
11451>0031 # NEWA DIGIT ONE
11452>0032 # NEWA DIGIT TWO
11453>0033 # NEWA DIGIT THREE
11454>0034 # NEWA DIGIT FOUR
11455>0035 # NEWA DIGIT FIVE
11456>0036 # NEWA DIGIT SIX
11457>0037 # NEWA DIGIT SEVEN
11458>0038 # NEWA DIGIT EIGHT
11459>0039 # NEWA DIGIT NINE
114D0>0030 # TIRHUTA DIGIT ZERO
114D1>0031 # TIRHUTA DIGIT ONE
114D2>0032 # TIRHUTA DIGIT TWO
@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
118E7>0037 # WARANG CITI DIGIT SEVEN
118E8>0038 # WARANG CITI DIGIT EIGHT
118E9>0039 # WARANG CITI DIGIT NINE
11C50>0030 # BHAIKSUKI DIGIT ZERO
11C51>0031 # BHAIKSUKI DIGIT ONE
11C52>0032 # BHAIKSUKI DIGIT TWO
11C53>0033 # BHAIKSUKI DIGIT THREE
11C54>0034 # BHAIKSUKI DIGIT FOUR
11C55>0035 # BHAIKSUKI DIGIT FIVE
11C56>0036 # BHAIKSUKI DIGIT SIX
11C57>0037 # BHAIKSUKI DIGIT SEVEN
11C58>0038 # BHAIKSUKI DIGIT EIGHT
11C59>0039 # BHAIKSUKI DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
16B59>0039 # PAHAWH HMONG DIGIT NINE
1E950>0030 # ADLAM DIGIT ZERO
1E951>0031 # ADLAM DIGIT ONE
1E952>0032 # ADLAM DIGIT TWO
1E953>0033 # ADLAM DIGIT THREE
1E954>0034 # ADLAM DIGIT FOUR
1E955>0035 # ADLAM DIGIT FIVE
1E956>0036 # ADLAM DIGIT SIX
1E957>0037 # ADLAM DIGIT SEVEN
1E958>0038 # ADLAM DIGIT EIGHT
1E959>0039 # ADLAM DIGIT NINE

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2014, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfc.txt
@ -7,7 +7,7 @@
#
# Complete data for Unicode NFC normalization.
* Unicode 7.0.0
* Unicode 9.0.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
@ -129,6 +129,8 @@
0825..0827:230
0829..082D:230
0859..085B:220
08D4..08E1:230
08E3:220
08E4..08E5:230
08E6:220
08E7..08E8:230
@ -232,6 +234,7 @@
1DCF:220
1DD0:202
1DD1..1DF5:230
1DFB:230
1DFC:233
1DFD:220
1DFE:230
@ -260,7 +263,7 @@
3099..309A:8
A66F:230
A674..A67D:230
A69F:230
A69E..A69F:230
A6F0..A6F1:230
A806:9
A8C4:9
@ -280,6 +283,7 @@ ABED:9
FB1E:26
FE20..FE26:230
FE27..FE2D:220
FE2E..FE2F:230
101FD:220
102E0:220
10376..1037A:230
@ -299,6 +303,7 @@ FE27..FE2D:220
11133..11134:9
11173:7
111C0:9
111CA:7
11235:9
11236:7
112E9:7
@ -307,6 +312,8 @@ FE27..FE2D:220
1134D:9
11366..1136C:230
11370..11374:230
11442:9
11446:7
114C2:9
114C3:7
115BF:9
@ -314,6 +321,8 @@ FE27..FE2D:220
1163F:9
116B6:9
116B7:7
1172B:9
11C3F:9
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
@ -326,7 +335,14 @@ FE27..FE2D:220
1D18A..1D18B:220
1D1AA..1D1AD:230
1D242..1D244:230
1E000..1E006:230
1E008..1E018:230
1E01B..1E021:230
1E023..1E024:230
1E026..1E02A:230
1E8D0..1E8D6:220
1E944..1E949:230
1E94A:7
# Canonical decomposition mappings
00C0>0041 0300 # one-way: diacritic 0300

View File

@ -1,4 +1,4 @@
# Copyright (C) 1999-2014, International Business Machines
# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfkc.txt
@ -11,7 +11,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
* Unicode 7.0.0
* Unicode 9.0.0
00A0>0020
00A8>0020 0308
@ -3675,6 +3675,7 @@ FFEE>25CB
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View File

@ -1,5 +1,5 @@
# Unicode Character Database
# Copyright (c) 1991-2014 Unicode, Inc.
# Copyright (c) 1991-2016 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
* Unicode 7.0.0
* Unicode 9.0.0
0041>0061
0042>0062
@ -632,8 +632,22 @@
10CD>2D2D
10FC>10DC
115F..1160>
13F8>13F0
13F9>13F1
13FA>13F2
13FB>13F3
13FC>13F4
13FD>13F5
17B4..17B5>
180B..180E>
1C80>0432
1C81>0434
1C82>043E
1C83>0441
1C84..1C85>0442
1C86>044A
1C87>0463
1C88>A64B
1D2C>0061
1D2D>00E6
1D2E>0062
@ -2382,14 +2396,99 @@ A7AA>0266
A7AB>025C
A7AC>0261
A7AD>026C
A7AE>026A
A7B0>029E
A7B1>0287
A7B2>029D
A7B3>AB53
A7B4>A7B5
A7B6>A7B7
A7F8>0127
A7F9>0153
AB5C>A727
AB5D>AB37
AB5E>026B
AB5F>AB52
AB70>13A0
AB71>13A1
AB72>13A2
AB73>13A3
AB74>13A4
AB75>13A5
AB76>13A6
AB77>13A7
AB78>13A8
AB79>13A9
AB7A>13AA
AB7B>13AB
AB7C>13AC
AB7D>13AD
AB7E>13AE
AB7F>13AF
AB80>13B0
AB81>13B1
AB82>13B2
AB83>13B3
AB84>13B4
AB85>13B5
AB86>13B6
AB87>13B7
AB88>13B8
AB89>13B9
AB8A>13BA
AB8B>13BB
AB8C>13BC
AB8D>13BD
AB8E>13BE
AB8F>13BF
AB90>13C0
AB91>13C1
AB92>13C2
AB93>13C3
AB94>13C4
AB95>13C5
AB96>13C6
AB97>13C7
AB98>13C8
AB99>13C9
AB9A>13CA
AB9B>13CB
AB9C>13CC
AB9D>13CD
AB9E>13CE
AB9F>13CF
ABA0>13D0
ABA1>13D1
ABA2>13D2
ABA3>13D3
ABA4>13D4
ABA5>13D5
ABA6>13D6
ABA7>13D7
ABA8>13D8
ABA9>13D9
ABAA>13DA
ABAB>13DB
ABAC>13DC
ABAD>13DD
ABAE>13DE
ABAF>13DF
ABB0>13E0
ABB1>13E1
ABB2>13E2
ABB3>13E3
ABB4>13E4
ABB5>13E5
ABB6>13E6
ABB7>13E7
ABB8>13E8
ABB9>13E9
ABBA>13EA
ABBB>13EB
ABBC>13EC
ABBD>13ED
ABBE>13EE
ABBF>13EF
F900>8C48
F901>66F4
F902>8ECA
@ -3766,6 +3865,93 @@ FFF0..FFF8>
10425>1044D
10426>1044E
10427>1044F
104B0>104D8
104B1>104D9
104B2>104DA
104B3>104DB
104B4>104DC
104B5>104DD
104B6>104DE
104B7>104DF
104B8>104E0
104B9>104E1
104BA>104E2
104BB>104E3
104BC>104E4
104BD>104E5
104BE>104E6
104BF>104E7
104C0>104E8
104C1>104E9
104C2>104EA
104C3>104EB
104C4>104EC
104C5>104ED
104C6>104EE
104C7>104EF
104C8>104F0
104C9>104F1
104CA>104F2
104CB>104F3
104CC>104F4
104CD>104F5
104CE>104F6
104CF>104F7
104D0>104F8
104D1>104F9
104D2>104FA
104D3>104FB
10C80>10CC0
10C81>10CC1
10C82>10CC2
10C83>10CC3
10C84>10CC4
10C85>10CC5
10C86>10CC6
10C87>10CC7
10C88>10CC8
10C89>10CC9
10C8A>10CCA
10C8B>10CCB
10C8C>10CCC
10C8D>10CCD
10C8E>10CCE
10C8F>10CCF
10C90>10CD0
10C91>10CD1
10C92>10CD2
10C93>10CD3
10C94>10CD4
10C95>10CD5
10C96>10CD6
10C97>10CD7
10C98>10CD8
10C99>10CD9
10C9A>10CDA
10C9B>10CDB
10C9C>10CDC
10C9D>10CDD
10C9E>10CDE
10C9F>10CDF
10CA0>10CE0
10CA1>10CE1
10CA2>10CE2
10CA3>10CE3
10CA4>10CE4
10CA5>10CE5
10CA6>10CE6
10CA7>10CE7
10CA8>10CE8
10CA9>10CE9
10CAA>10CEA
10CAB>10CEB
10CAC>10CEC
10CAD>10CED
10CAE>10CEE
10CAF>10CEF
10CB0>10CF0
10CB1>10CF1
10CB2>10CF2
118A0>118C0
118A1>118C1
118A2>118C2
@ -4803,6 +4989,40 @@ FFF0..FFF8>
1D7FD>0037
1D7FE>0038
1D7FF>0039
1E900>1E922
1E901>1E923
1E902>1E924
1E903>1E925
1E904>1E926
1E905>1E927
1E906>1E928
1E907>1E929
1E908>1E92A
1E909>1E92B
1E90A>1E92C
1E90B>1E92D
1E90C>1E92E
1E90D>1E92F
1E90E>1E930
1E90F>1E931
1E910>1E932
1E911>1E933
1E912>1E934
1E913>1E935
1E914>1E936
1E915>1E937
1E916>1E938
1E917>1E939
1E918>1E93A
1E919>1E93B
1E91A>1E93C
1E91B>1E93D
1E91C>1E93E
1E91D>1E93F
1E91E>1E940
1E91F>1E941
1E920>1E942
1E921>1E943
1EE00>0627
1EE01>0628
1EE02>062C
@ -5067,6 +5287,7 @@ FFF0..FFF8>
1F238>7533
1F239>5272
1F23A>55B6
1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015

View File

@ -54,6 +54,13 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
);
}
public void testTraditionalChinese() throws Exception {
assertAnalyzesTo(a, "我購買了道具和服裝。",
new String[] { "", "購買", "", "道具", "", "服裝"});
assertAnalyzesTo(a, "定義切分字串的基本單位是訂定分詞標準的首要工作", // From http://godel.iis.sinica.edu.tw/CKIP/paper/wordsegment_standard.pdf
new String[] { "定義", "", "", "字串", "", "基本", "單位", "", "訂定", "分詞", "標準", "", "首要", "工作" });
}
public void testChineseNumerics() throws Exception {
assertAnalyzesTo(a, "", new String[] { "" });
assertAnalyzesTo(a, "院內分機9483。",

View File

@ -63,7 +63,7 @@ import java.util.regex.Pattern;
public class GenerateUTR30DataFiles {
private static final String ICU_SVN_TAG_URL
= "http://source.icu-project.org/repos/icu/icu/tags";
private static final String ICU_RELEASE_TAG = "release-54-1";
private static final String ICU_RELEASE_TAG = "release-58-1";
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
private static final String NFC_TXT = "nfc.txt";
private static final String NFKC_TXT = "nfkc.txt";

View File

@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
// ignored characters
builder.add("\u0301", "");
builder.add("\u00AD", "");
builder.add("ґ", "г");
builder.add("Ґ", "Г");
NormalizeCharMap normMap = builder.build();
reader = new MappingCharFilter(normMap, reader);

View File

@ -52,7 +52,14 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
public void testCapsTokenStream() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
assertAnalyzesTo(a, "Цих Чайковського і Ґете.",
new String[] { "Чайковське", "Чайковський", "Ґете" });
new String[] { "Чайковське", "Чайковський", "Гете" });
a.close();
}
public void testCharNormalization() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
assertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
new String[] { "Гюмрі", "Гюмрі" });
a.close();
}

View File

@ -60,10 +60,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.legacy.LegacyIntField;
import org.apache.lucene.legacy.LegacyLongField;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
@ -1114,9 +1110,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
// add numeric fields, to test if flex preserves encoding
doc.add(new LegacyIntField("trieInt", id, Field.Store.NO));
doc.add(new LegacyLongField("trieLong", (long) id, Field.Store.NO));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
@ -1294,51 +1287,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
public void testNumericFields() throws Exception {
for (String name : oldNames) {
Directory dir = oldIndexDirs.get(name);
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
for (int id=10; id<15; id++) {
ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
assertEquals("wrong number of hits", 1, hits.length);
Document d = searcher.doc(hits[0].doc);
assertEquals(String.valueOf(id), d.get("id"));
hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
assertEquals("wrong number of hits", 1, hits.length);
d = searcher.doc(hits[0].doc);
assertEquals(String.valueOf(id), d.get("id"));
}
// check that also lower-precision fields are ok
ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
assertEquals("wrong number of hits", 34, hits.length);
hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
assertEquals("wrong number of hits", 34, hits.length);
// check decoding of terms
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt");
TermsEnum termsEnum = LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
while (termsEnum.next() != null) {
int val = LegacyNumericUtils.prefixCodedToInt(termsEnum.term());
assertTrue("value in id bounds", val >= 0 && val < 35);
}
terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong");
termsEnum = LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
while (termsEnum.next() != null) {
long val = LegacyNumericUtils.prefixCodedToLong(termsEnum.term());
assertTrue("value in id bounds", val >= 0L && val < 35L);
}
reader.close();
}
}
private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException {
final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
if (VERBOSE) {

View File

@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.1.18
/com.healthmarketscience.jackcess/jackcess = 2.1.3
/com.ibm.icu/icu4j = 56.1
/com.ibm.icu/icu4j = 59.1
/com.pff/java-libpst = 0.8.1
com.sun.jersey.version = 1.9
@ -276,7 +276,7 @@ org.slf4j.version = 1.7.7
/org.tukaani/xz = 1.5
/rome/rome = 1.0
ua.net.nlp.morfologik-ukrainian-search.version = 3.7.5
ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
/xerces/xercesImpl = 2.9.1

View File

@ -1 +0,0 @@
8dd6671f52165a0419e6de5e1016400875a90fa9

View File

@ -0,0 +1 @@
6f06e820cf4c8968bbbaae66ae0b33f6a256b57f

View File

@ -1 +0,0 @@
2b8c8fbd740164d220ca7d18605b8b2092e163e9

View File

@ -0,0 +1 @@
8d2c4bf006f59227bcba8885b4602b3a8b5bd799

View File

@ -31,9 +31,7 @@
<path id="classpath">
<path refid="base.classpath"/>
<path refid="spatialjar"/>
<pathelement path="${backward-codecs.jar}" />
<pathelement path="${queries.jar}" />
<pathelement path="${misc.jar}" />
<pathelement path="${spatial3d.jar}" />
</path>

View File

@ -25,11 +25,6 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyDoubleField;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -41,8 +36,6 @@ import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@ -88,8 +81,6 @@ public class BBoxStrategy extends SpatialStrategy {
*/
public static FieldType DEFAULT_FIELDTYPE;
@Deprecated
public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
@ -98,15 +89,6 @@ public class BBoxStrategy extends SpatialStrategy {
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
// Legacy default: legacyNumerics + docValues
LegacyFieldType legacyType = new LegacyFieldType();
legacyType.setIndexOptions(IndexOptions.DOCS);
legacyType.setNumericType(LegacyNumericType.DOUBLE);
legacyType.setNumericPrecisionStep(8);// same as solr default
legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
legacyType.setStored(false);
legacyType.freeze();
LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_MINX = "__minX";
@ -131,8 +113,6 @@ public class BBoxStrategy extends SpatialStrategy {
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
// equiv to "hasLegacyNumerics":
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
private final FieldType xdlFieldType;
/**
@ -142,15 +122,6 @@ public class BBoxStrategy extends SpatialStrategy {
return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
/**
* Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
* @deprecated LegacyNumerics will be removed
*/
@Deprecated
public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
}
/**
* Creates this strategy.
* {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
@ -179,23 +150,8 @@ public class BBoxStrategy extends SpatialStrategy {
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numQuads++;
}
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
if (hasPointVals) {
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
}
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
}
numQuads++;
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
legacyNumericFieldType.freeze();
} else {
legacyNumericFieldType = null;
}
if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
if (hasPointVals) { // if we have an index...
xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
xdlFieldType.setIndexOptions(IndexOptions.DOCS);
xdlFieldType.freeze();
@ -242,12 +198,6 @@ public class BBoxStrategy extends SpatialStrategy {
fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
}
if (legacyNumericFieldType != null) {
fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
}
if (xdlFieldType != null) {
fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
}
@ -664,17 +614,12 @@ public class BBoxStrategy extends SpatialStrategy {
private Query makeNumberTermQuery(String field, double number) {
if (hasPointVals) {
return DoublePoint.newExactQuery(field, number);
} else if (legacyNumericFieldType != null) {
BytesRefBuilder bytes = new BytesRefBuilder();
LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
return new TermQuery(new Term(field, bytes.get()));
}
throw new UnsupportedOperationException("An index is required for this operation.");
}
/**
* Returns a numeric range query based on FieldType
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*
* @param fieldname field name. must not be <code>null</code>.
@ -702,8 +647,6 @@ public class BBoxStrategy extends SpatialStrategy {
}
return DoublePoint.newRangeQuery(fieldname, min, max);
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
}
throw new UnsupportedOperationException("An index is required for this operation.");
}

View File

@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRefIterator;
/**
* A TokenStream used internally by {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}.
*
* This is modelled after {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
*
* @lucene.internal
*/
class BytesRefIteratorTokenStream extends TokenStream {

View File

@ -18,18 +18,17 @@ package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
@ -57,9 +56,22 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
}
@Override
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
//levels doesn't actually matter; NumberRange based Shapes have their own "level".
return super.createCellIteratorToIndex(shape, grid.getMaxLevels(), reuse);
protected boolean isPointShape(Shape shape) {
if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() == grid.getMaxLevels();
} else {
return false;
}
}
@Override
protected boolean isGridAlignedShape(Shape shape) {
// any UnitNRShape other than the world is a single cell/term
if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() > 0;
} else {
return false;
}
}
/** Unsupported. */

View File

@ -21,8 +21,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
@ -34,6 +32,10 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
import org.locationtech.spatial4j.shape.Circle;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
/**
* An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
@ -163,7 +165,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
}
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
if (pointsOnly && !(shape instanceof Point)) {
if (pointsOnly && !isPointShape(shape)) {
throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
}
return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator
@ -205,4 +207,16 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
Shape inputShape, final int facetLevel, int maxCells) throws IOException {
return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
}
protected boolean isPointShape(Shape shape) {
if (shape instanceof Point) {
return true;
} else if (shape instanceof Circle) {
return ((Circle) shape).getRadius() == 0.0;
} else if (shape instanceof Rectangle) {
Rectangle rect = (Rectangle) shape;
return rect.getWidth() == 0.0 && rect.getHeight() == 0.0;
}
return false;
}
}

View File

@ -20,9 +20,9 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.LegacyCell;
@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.locationtech.spatial4j.shape.Shape;
/**
* A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeQuery}.
@ -121,7 +122,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
@Override
protected Iterator<Cell> createCellIteratorToIndex(Shape shape, int detailLevel, Iterator<Cell> reuse) {
if (shape instanceof Point || !pruneLeafyBranches)
if (!pruneLeafyBranches || isGridAlignedShape(shape))
return super.createCellIteratorToIndex(shape, detailLevel, reuse);
List<Cell> cells = new ArrayList<>(4096);
@ -177,6 +178,9 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
if (op == SpatialOperation.Intersects) {
if (isGridAlignedShape(args.getShape())) {
return makeGridShapeIntersectsQuery(args.getShape());
}
return new IntersectsPrefixTreeQuery(
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel);
} else if (op == SpatialOperation.IsWithin) {
@ -189,4 +193,35 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
}
throw new UnsupportedSpatialOperation(op);
}
/**
* A quick check of the shape to see if it is perfectly aligned to a grid.
* Points always are as they are indivisible. It's okay to return false
* if the shape actually is aligned; this is an optimization hint.
*/
protected boolean isGridAlignedShape(Shape shape) {
return isPointShape(shape);
}
/** {@link #makeQuery(SpatialArgs)} specialized for the query being a grid square. */
protected Query makeGridShapeIntersectsQuery(Shape gridShape) {
assert isGridAlignedShape(gridShape);
if (isPointsOnly()) {
// Awesome; this will be equivalent to a TermQuery.
Iterator<Cell> cellIterator = grid.getTreeCellIterator(gridShape, grid.getMaxLevels());
// get last cell
Cell cell = cellIterator.next();
while (cellIterator.hasNext()) {
int prevLevel = cell.getLevel();
cell = cellIterator.next();
assert prevLevel < cell.getLevel();
}
return new TermQuery(new Term(getFieldName(), cell.getTokenBytesWithLeaf(null)));
} else {
// Well there could be parent cells. But we can reduce the "scan level" which will be slower for a point query.
// TODO: AVPTQ will still scan the bottom nonetheless; file an issue to eliminate that
return new IntersectsPrefixTreeQuery(
gridShape, getFieldName(), grid, getGrid().getMaxLevels(), getGrid().getMaxLevels() + 1);
}
}
}

View File

@ -22,11 +22,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.legacy.LegacyDoubleField;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.queries.function.FunctionRangeQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
@ -86,8 +81,6 @@ public class PointVectorStrategy extends SpatialStrategy {
*/
public static FieldType DEFAULT_FIELDTYPE;
@Deprecated
public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
@ -96,15 +89,6 @@ public class PointVectorStrategy extends SpatialStrategy {
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
// Legacy default: legacyNumerics
LegacyFieldType legacyType = new LegacyFieldType();
legacyType.setIndexOptions(IndexOptions.DOCS);
legacyType.setNumericType(LegacyNumericType.DOUBLE);
legacyType.setNumericPrecisionStep(8);// same as solr default
legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
legacyType.setStored(false);
legacyType.freeze();
LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_X = "__x";
@ -117,8 +101,6 @@ public class PointVectorStrategy extends SpatialStrategy {
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
// equiv to "hasLegacyNumerics":
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
/**
* Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
@ -127,18 +109,6 @@ public class PointVectorStrategy extends SpatialStrategy {
return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
/**
* Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
* However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
* since that requires docValues (the legacy config didn't have that).
*
* @deprecated LegacyNumerics will be removed
*/
@Deprecated
public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
}
/**
* Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
* a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
@ -159,21 +129,6 @@ public class PointVectorStrategy extends SpatialStrategy {
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numPairs++;
}
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
if (hasPointVals) {
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
}
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
}
numPairs++;
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
legacyNumericFieldType.freeze();
} else {
legacyNumericFieldType = null;
}
this.fieldsLen = numPairs * 2;
}
@ -209,10 +164,6 @@ public class PointVectorStrategy extends SpatialStrategy {
fields[++idx] = new DoublePoint(fieldNameX, point.getX());
fields[++idx] = new DoublePoint(fieldNameY, point.getY());
}
if (legacyNumericFieldType != null) {
fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
}
assert idx == fields.length - 1;
return fields;
}
@ -268,7 +219,6 @@ public class PointVectorStrategy extends SpatialStrategy {
/**
* Returns a numeric range query based on FieldType
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*/
private Query rangeQuery(String fieldName, Double min, Double max) {
@ -283,8 +233,6 @@ public class PointVectorStrategy extends SpatialStrategy {
return DoublePoint.newRangeQuery(fieldName, min, max);
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
}
//TODO try doc-value range query?
throw new UnsupportedOperationException("An index is required for this operation.");

View File

@ -68,9 +68,6 @@ public class DistanceStrategyTest extends StrategyTestCase {
strategy = BBoxStrategy.newInstance(ctx, "bbox");
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy");
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
strategy = new SerializedDVStrategy(ctx, "serialized");
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});

View File

@ -58,9 +58,7 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase {
strategies.add(recursive_geohash);
strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad"));
strategies.add(PointVectorStrategy.newInstance(ctx, "pointvector"));
strategies.add(PointVectorStrategy.newLegacyInstance(ctx, "pointvector_legacy"));
strategies.add(BBoxStrategy.newInstance(ctx, "bbox"));
strategies.add(BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy"));
final SerializedDVStrategy serialized = new SerializedDVStrategy(ctx, "serialized");
strategies.add(serialized);
strategies.add(new CompositeSpatialStrategy("composite", recursive_geohash, serialized));

View File

@ -21,8 +21,6 @@ import java.io.IOException;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.SpatialMatchConcern;
import org.apache.lucene.spatial.prefix.RandomSpatialOpStrategyTestCase;
@ -93,20 +91,10 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
factory.worldBounds = new RectangleImpl(-300, 300, -100, 100, null);
this.ctx = factory.newSpatialContext();
}
// randomly test legacy (numeric) and point based bbox strategy
if (random().nextBoolean()) {
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
} else {
this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
}
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
//test we can disable docValues for predicate tests
if (random().nextBoolean()) {
FieldType fieldType = ((BBoxStrategy)strategy).getFieldType();
if (fieldType instanceof LegacyFieldType) {
fieldType = new LegacyFieldType((LegacyFieldType)fieldType);
} else {
fieldType = new FieldType(fieldType);
}
FieldType fieldType = new FieldType(((BBoxStrategy)strategy).getFieldType());
fieldType.setDocValuesType(DocValuesType.NONE);
strategy = new BBoxStrategy(ctx, strategy.getFieldName(), fieldType);
}
@ -194,11 +182,7 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
private void setupGeo() {
this.ctx = SpatialContext.GEO;
if (random().nextBoolean()) {
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
} else {
this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
}
this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
}
// OLD STATIC TESTS (worthless?)
@ -239,16 +223,9 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
FieldType fieldType;
// random legacy or not legacy
String FIELD_PREFIX = "bbox";
fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
if (random().nextBoolean()) {
fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
if (random().nextBoolean()) {
fieldType.setDimensions(0, 0);
}
} else {
fieldType = new FieldType(BBoxStrategy.LEGACY_FIELDTYPE);
if (random().nextBoolean()) {
fieldType.setIndexOptions(IndexOptions.NONE);
}
fieldType.setDimensions(0, 0);
}
strategy = new BBoxStrategy(ctx, FIELD_PREFIX, fieldType);

View File

@ -27,7 +27,7 @@ import org.junit.Before;
import org.junit.Test;
import org.locationtech.spatial4j.shape.Shape;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@ -42,17 +42,8 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
public void setUp() throws Exception {
super.setUp();
tree = DateRangePrefixTree.INSTANCE;
if (randomBoolean()) {
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
} else {
//Test the format that existed <= Lucene 5.0
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange") {
@Override
protected CellToBytesRefIterator newCellToBytesRefIterator() {
return new CellToBytesRefIterator50();
}
};
}
strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(randomInt() % 5 == 0);
Calendar tmpCal = tree.newCal();
int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND);
tmpCal.add(randomCalWindowField, 2_000);
@ -79,15 +70,16 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Test
public void testWithinSame() throws IOException {
final Calendar cal = tree.newCal();
Shape shape = randomIndexedShape();
testOperation(
tree.toShape(cal),
shape,
SpatialOperation.IsWithin,
tree.toShape(cal), true);//is within itself
shape, true);//is within itself
}
@Test
public void testWorld() throws IOException {
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
testOperation(
tree.toShape(tree.newCal()),//world matches everything
SpatialOperation.Contains,
@ -96,6 +88,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Test
public void testBugInitIterOptimization() throws Exception {
((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
//bug due to fast path initIter() optimization
testOperation(
tree.parseShape("[2014-03-27T23 TO 2014-04-01T01]"),
@ -114,6 +107,21 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Override
protected Shape randomIndexedShape() {
if (((NumberRangePrefixTreeStrategy)strategy).isPointsOnly()) {
Calendar cal = tree.newCal();
cal.setTimeInMillis(random().nextLong());
return tree.toShape(cal);
} else {
return randomShape();
}
}
@Override
protected Shape randomQueryShape() {
return randomShape();
}
private Shape randomShape() {
Calendar cal1 = randomCalendar();
UnitNRShape s1 = tree.toShape(cal1);
if (rarely()) {
@ -144,9 +152,4 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
}
return cal;
}
@Override
protected Shape randomQueryShape() {
return randomIndexedShape();
}
}

View File

@ -63,12 +63,7 @@ public class TestPointVectorStrategy extends StrategyTestCase {
@Test
public void testCitiesIntersectsBBox() throws IOException {
// note: does not require docValues
if (random().nextBoolean()) {
this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
} else {
// switch to legacy instance sometimes, which has no docValues
this.strategy = PointVectorStrategy.newLegacyInstance(ctx, getClass().getSimpleName());
}
this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
getAddAndVerifyIndexedDocuments(DATA_WORLD_CITIES_POINTS);
executeQueries(SpatialMatchConcern.FILTER, QTEST_Cities_Intersects_BBox);
}

View File

@ -60,7 +60,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
* <ul>
* <li><code>wordset</code> - This is the default format, which supports one word per
* line (including any intra-word whitespace) and allows whole line comments
* begining with the "#" character. Blank lines are ignored. See
* beginning with the "#" character. Blank lines are ignored. See
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
* </li>
* <li><code>snowball</code> - This format allows for multiple words specified on each

View File

@ -194,6 +194,13 @@ Other Changes
* SOLR-10700: Deprecated and converted the PostingsSolrHighlighter to extend UnifiedSolrHighlighter and thus no
longer use the PostingsHighlighter. It should behave mostly the same. (David Smiley)
* SOLR-10710: Fix LTR failing tests. (Diego Ceccarelli via Tomás Fernández Löbbe)
* SOLR-10755: delete/refactor many solrj deprecations (hossman)
* SOLR-10752: replicationFactor (nrtReplicas) default is 0 if tlogReplicas is specified when creating a collection
(Tomás Fernández Löbbe)
================== 6.7.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -217,6 +224,8 @@ Upgrade Notes
passwords via the env variables SOLR_SSL_KEY_STORE_PASSWORD and SOLR_SSL_TRUST_STORE_PASSWORD rather
than system properties.
* SOLR-10379: ManagedSynonymFilterFactory has been deprecated in favor of ManagedSynonymGraphFilterFactory.
New Features
----------------------
@ -225,12 +234,20 @@ New Features
* SOLR-10721: Provide a way to know when Core Discovery is finished and when all async cores are done loading
(Erick Erickson)
* SOLR-10379: Add ManagedSynonymGraphFilterFactory, deprecate ManagedSynonymFilterFactory. (Steve Rowe)
* SOLR-10479: Adds support for HttpShardHandlerFactory.loadBalancerRequests(MinimumAbsolute|MaximumFraction)
configuration. (Ramsey Haddad, Daniel Collins, Christine Poerschke)
Bug Fixes
----------------------
* SOLR-10723 JSON Facet API: resize() implemented incorrectly for CountSlotAcc, HllAgg.NumericAcc
resulting in exceptions when using a hashing faceting method and sorting by hll(numeric_field).
(yonik)
* SOLR-10719: Creating a core.properties fails if the parent of core.properties is a symlinked dierctory
(Erick Erickson)
Optimizations
----------------------
* SOLR-10634: JSON Facet API: When a field/terms facet will retrieve all buckets (i.e. limit:-1)
@ -238,7 +255,6 @@ Optimizations
so that the second phase which would normally involve calculating the domain for the bucket
can be skipped entirely, leading to large performance improvements. (yonik)
Other Changes
----------------------
@ -251,6 +267,15 @@ Other Changes
* SOLR-10438: Assign explicit useDocValuesAsStored values to all points field types in
schema-point.xml/TestPointFields. (hossman, Steve Rowe)
* LUCENE-7705: Allow CharTokenizer-derived tokenizers and KeywordTokenizer to configure the max token length.
(Amrit Sarkar via Erick Erickson)
* SOLR-10659: Remove ResponseBuilder.getSortSpec use in SearchGroupShardResponseProcessor.
(Judith Silverman via Christine Poerschke)
* SOLR-10741: Factor out createSliceShardsStr method from HttpShardHandler.prepDistributed.
(Domenico Fabio Marino via Christine Poerschke)
================== 6.6.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@ -458,6 +483,14 @@ Bug Fixes
"lucene"/standard query parser, should require " TO " in range queries,
and accept "TO" as endpoints in range queries. (hossman, Steve Rowe)
* SOLR-10735: Windows script (solr.cmd) didn't work properly with directory containing spaces. Adding quotations
to fix (Uwe Schindler, janhoy, Tomas Fernandez-Lobbe, Ishan Chattopadhyaya)
Ref Guide
----------------------
* SOLR-10758: Modernize the Solr ref guide's Chinese language analysis coverage. (Steve Rowe)
Other Changes
----------------------

View File

@ -20,7 +20,7 @@ import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.FieldType;

View File

@ -24,7 +24,7 @@ import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;

View File

@ -76,8 +76,7 @@ public class FieldLengthFeature extends Feature {
static {
NORM_TABLE[0] = 0;
for (int i = 1; i < 256; i++) {
float norm = SmallFloat.byte315ToFloat((byte) i);
NORM_TABLE[i] = 1.0f / (norm * norm);
NORM_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
}
}

View File

@ -88,15 +88,16 @@ public class TestLTRQParserPlugin extends TestRerankBase {
query.add("rows", "4");
query.add("fv", "true");
String nonRerankedScore = "0.09271725";
// FIXME: design better way to test this, we cannot check an absolute score
// String nonRerankedScore = "0.09271725";
// Normal solr order
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/id=='9'",
"/response/docs/[1]/id=='8'",
"/response/docs/[2]/id=='7'",
"/response/docs/[3]/id=='6'",
"/response/docs/[3]/score=="+nonRerankedScore
"/response/docs/[3]/id=='6'"
// "/response/docs/[3]/score=="+nonRerankedScore
);
query.add("rq", "{!ltr model=6029760550880411648 reRankDocs=3}");
@ -106,8 +107,8 @@ public class TestLTRQParserPlugin extends TestRerankBase {
"/response/docs/[0]/id=='7'",
"/response/docs/[1]/id=='8'",
"/response/docs/[2]/id=='9'",
"/response/docs/[3]/id=='6'",
"/response/docs/[3]/score=="+nonRerankedScore
"/response/docs/[3]/id=='6'"
// "/response/docs/[3]/score=="+nonRerankedScore
);
}

View File

@ -42,8 +42,9 @@ public class TestParallelWeightCreation extends TestRerankBase{
query.add("rows", "4");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
aftertest();
}

View File

@ -210,14 +210,14 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
@Test
public void testSelectiveWeightsRequestFeaturesFromDifferentStore() throws Exception {
final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
"matchedTitle","1.0", "titlePhraseMatch","0.6103343");
final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
"matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
"popularity","3.0", "originalScore","1.0");
final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
// final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
// "matchedTitle","1.0", "titlePhraseMatch","0.6103343");
// final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
// "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
// final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
// "popularity","3.0", "originalScore","1.0");
//
// final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
// extract all features in externalmodel's store (default store)
// rerank using externalmodel (default store)
@ -227,11 +227,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
query.add("rows", "5");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w2 efi.userTitlePhrase2=w1}");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
// FIXME design better way to test this, we can't rely on absolute scores
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
// extract all features from fstore4
// rerank using externalmodel (default store)
@ -240,11 +241,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
query.add("fl", "*,score,fv:[fv store=fstore4 efi.myPop=3]");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
// SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
// FIXME design better way to test this, we can't rely on absolute scores
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
// extract all features from fstore4
// rerank using externalmodel2 (fstore2)
@ -256,8 +258,8 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='5'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==2.5");
// FIXME design better way to test this, we can't rely on absolute scores
// assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
}
}

View File

@ -95,9 +95,9 @@ public class CreateCollectionCmd implements Cmd {
// look at the replication factor and see if it matches reality
// if it does not, find best nodes to create more cores
int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, 1));
int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
int numTlogReplicas = message.getInt(TLOG_REPLICAS, 0);
int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1));
int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
final String async = message.getStr(ASYNC);

View File

@ -39,6 +39,7 @@ import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import org.apache.solr.common.SolrException;
import org.apache.solr.util.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -85,13 +86,15 @@ public class CorePropertiesLocator implements CoresLocator {
private void writePropertiesFile(CoreDescriptor cd, Path propfile) {
Properties p = buildCoreProperties(cd);
try {
Files.createDirectories(propfile.getParent());
FileUtils.createDirectories(propfile.getParent()); // Handling for symlinks.
try (Writer os = new OutputStreamWriter(Files.newOutputStream(propfile), StandardCharsets.UTF_8)) {
p.store(os, "Written by CorePropertiesLocator");
}
}
catch (IOException e) {
logger.error("Couldn't persist core properties to {}: {}", propfile, e.getMessage());
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
"Couldn't persist core properties to " + propfile.toAbsolutePath().toString() + " : " + e.getMessage());
}
}

View File

@ -45,6 +45,9 @@ import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.client.solrj.io.stream.metrics.CountMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric;
@ -185,6 +188,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("percentile", PercentileEvaluator.class)
.withFunctionName("empiricalDistribution", EmpiricalDistributionEvaluator.class)
.withFunctionName("describe", DescribeEvaluator.class)
.withFunctionName("finddelay", FindDelayEvaluator.class)
.withFunctionName("sequence", SequenceEvaluator.class)
.withFunctionName("array", ArrayEvaluator.class)
.withFunctionName("hist", HistogramEvaluator.class)
.withFunctionName("anova", AnovaEvaluator.class)
.withFunctionName("movingAvg", MovingAverageEvaluator.class)
// metrics
.withFunctionName("min", MinMetric.class)
@ -296,7 +305,14 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
TupleStream tupleStream;
try {
tupleStream = this.streamFactory.constructStream(params.get("expr"));
StreamExpression streamExpression = StreamExpressionParser.parse(params.get("expr"));
if(this.streamFactory.isEvaluator(streamExpression)) {
StreamExpression tupleExpression = new StreamExpression("tuple");
tupleExpression.addParameter(new StreamExpressionNamedParameter("return-value", streamExpression));
tupleStream = this.streamFactory.constructStream(tupleExpression);
} else {
tupleStream = this.streamFactory.constructStream(streamExpression);
}
} catch (Exception e) {
//Catch exceptions that occur while the stream is being created. This will include streaming expression parse rules.
SolrException.log(logger, e);

View File

@ -449,17 +449,7 @@ public class HttpShardHandler extends ShardHandler {
}
}
// And now recreate the | delimited list of equivalent servers
final StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (String shardUrl : shardUrls) {
if (first) {
first = false;
} else {
sliceShardsStr.append('|');
}
sliceShardsStr.append(shardUrl);
}
rb.shards[i] = sliceShardsStr.toString();
rb.shards[i] = createSliceShardsStr(shardUrls);
}
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
@ -472,6 +462,20 @@ public class HttpShardHandler extends ShardHandler {
}
}
private static String createSliceShardsStr(final List<String> shardUrls) {
final StringBuilder sliceShardsStr = new StringBuilder();
boolean first = true;
for (String shardUrl : shardUrls) {
if (first) {
first = false;
} else {
sliceShardsStr.append('|');
}
sliceShardsStr.append(shardUrl);
}
return sliceShardsStr.toString();
}
private void addSlices(Map<String,Slice> target, ClusterState state, SolrParams params, String collectionName, String shardKeys, boolean multiCollection) {
DocCollection coll = state.getCollection(collectionName);

View File

@ -97,6 +97,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
int maximumPoolSize = Integer.MAX_VALUE;
int keepAliveTime = 5;
int queueSize = -1;
int permittedLoadBalancerRequestsMinimumAbsolute = 0;
float permittedLoadBalancerRequestsMaximumFraction = 1.0f;
boolean accessPolicy = false;
private String scheme = null;
@ -122,6 +124,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
// If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff
static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue";
// The minimum number of replicas that may be used
static final String LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE = "loadBalancerRequestsMinimumAbsolute";
// The maximum proportion of replicas to be used
static final String LOAD_BALANCER_REQUESTS_MAX_FRACTION = "loadBalancerRequestsMaximumFraction";
// Configure if the threadpool favours fairness over throughput
static final String INIT_FAIRNESS_POLICY = "fairnessPolicy";
@ -164,6 +172,16 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, maximumPoolSize,sb);
this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, keepAliveTime,sb);
this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, queueSize,sb);
this.permittedLoadBalancerRequestsMinimumAbsolute = getParameter(
args,
LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE,
permittedLoadBalancerRequestsMinimumAbsolute,
sb);
this.permittedLoadBalancerRequestsMaximumFraction = getParameter(
args,
LOAD_BALANCER_REQUESTS_MAX_FRACTION,
permittedLoadBalancerRequestsMaximumFraction,
sb);
this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, accessPolicy,sb);
log.debug("created with {}",sb);
@ -252,7 +270,15 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
*/
public LBHttpSolrClient.Rsp makeLoadBalancedRequest(final QueryRequest req, List<String> urls)
throws SolrServerException, IOException {
return loadbalancer.request(new LBHttpSolrClient.Req(req, urls));
return loadbalancer.request(newLBHttpSolrClientReq(req, urls));
}
protected LBHttpSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List<String> urls) {
int numServersToTry = (int)Math.floor(urls.size() * this.permittedLoadBalancerRequestsMaximumFraction);
if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) {
numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute;
}
return new LBHttpSolrClient.Req(req, urls, numServersToTry);
}
/**

View File

@ -84,6 +84,12 @@ import org.slf4j.LoggerFactory;
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{
/**
* This constant was formerly part of HighlightParams. After deprecation it was removed so clients
* would no longer use it, but we still support it server side.
*/
private static final String USE_FVH = HighlightParams.HIGHLIGHT + ".useFastVectorHighlighter";
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected final SolrCore solrCore;
@ -492,7 +498,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
boolean methodFvh =
HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
|| params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
|| params.getFieldBool(schemaField.getName(), USE_FVH, false);
if (!methodFvh) return false;
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
if (!termPosOff) {

View File

@ -0,0 +1,706 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.legacy;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
/**
* A SpatialStrategy for indexing and searching Rectangles by storing its
* coordinates in numeric fields. It supports all {@link SpatialOperation}s and
* has a custom overlap relevancy. It is based on GeoPortal's <a
* href="http://geoportal.svn.sourceforge.net/svnroot/geoportal/Geoportal/trunk/src/com/esri/gpt/catalog/lucene/SpatialClauseAdapter.java">SpatialClauseAdapter</a>.
* <p>
* <b>Characteristics:</b>
* <br>
* <ul>
* <li>Only indexes Rectangles; just one per field value. Other shapes can be provided
* and the bounding box will be used.</li>
* <li>Can query only by a Rectangle. Providing other shapes is an error.</li>
* <li>Supports most {@link SpatialOperation}s but not Overlaps.</li>
* <li>Uses the DocValues API for any sorting / relevancy.</li>
* </ul>
* <p>
* <b>Implementation:</b>
* <p>
* This uses 4 double fields for minX, maxX, minY, maxY
* and a boolean to mark a dateline cross. Depending on the particular {@link
* SpatialOperation}s, there are a variety of range queries on {@link DoublePoint}s to be
* done.
* The {@link #makeOverlapRatioValueSource(org.locationtech.spatial4j.shape.Rectangle, double)}
* works by calculating the query bbox overlap percentage against the indexed
* shape overlap percentage. The indexed shape's coordinates are retrieved from
* {@link org.apache.lucene.index.LeafReader#getNumericDocValues}.
*
* @lucene.experimental
*/
public class BBoxStrategy extends SpatialStrategy {
// note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
// create more than one Field.
/**
* pointValues, docValues, and nothing else.
*/
public static FieldType DEFAULT_FIELDTYPE;
@Deprecated
public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
type.setDocValuesType(DocValuesType.NUMERIC);//docValues
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
// Legacy default: legacyNumerics + docValues
LegacyFieldType legacyType = new LegacyFieldType();
legacyType.setIndexOptions(IndexOptions.DOCS);
legacyType.setNumericType(LegacyNumericType.DOUBLE);
legacyType.setNumericPrecisionStep(8);// same as solr default
legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
legacyType.setStored(false);
legacyType.freeze();
LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_MINX = "__minX";
public static final String SUFFIX_MAXX = "__maxX";
public static final String SUFFIX_MINY = "__minY";
public static final String SUFFIX_MAXY = "__maxY";
public static final String SUFFIX_XDL = "__xdl";
/*
* The Bounding Box gets stored as four fields for x/y min/max and a flag
* that says if the box crosses the dateline (xdl).
*/
final String field_bbox;
final String field_minX;
final String field_minY;
final String field_maxX;
final String field_maxY;
final String field_xdl; // crosses dateline
private final FieldType optionsFieldType;//from constructor; aggregate field type used to express all options
private final int fieldsLen;
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
// equiv to "hasLegacyNumerics":
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
private final FieldType xdlFieldType;
/**
* Creates a new {@link BBoxStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
*/
public static BBoxStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
/**
* Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
* @deprecated LegacyNumerics will be removed
*/
@Deprecated
public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
}
/**
* Creates this strategy.
* {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
* field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features
* aren't needed then disable them.
*/
public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
super(ctx, fieldNamePrefix);
field_bbox = fieldNamePrefix;
field_minX = fieldNamePrefix + SUFFIX_MINX;
field_maxX = fieldNamePrefix + SUFFIX_MAXX;
field_minY = fieldNamePrefix + SUFFIX_MINY;
field_maxY = fieldNamePrefix + SUFFIX_MAXY;
field_xdl = fieldNamePrefix + SUFFIX_XDL;
fieldType.freeze();
this.optionsFieldType = fieldType;
int numQuads = 0;
if ((this.hasStored = fieldType.stored())) {
numQuads++;
}
if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
numQuads++;
}
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numQuads++;
}
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
if (hasPointVals) {
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
}
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
}
numQuads++;
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
legacyNumericFieldType.freeze();
} else {
legacyNumericFieldType = null;
}
if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
xdlFieldType.setIndexOptions(IndexOptions.DOCS);
xdlFieldType.freeze();
} else {
xdlFieldType = null;
}
this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0);
}
/** Returns a field type representing the set of field options. This is identical to what was passed into the
* constructor. It's frozen. */
public FieldType getFieldType() {
return optionsFieldType;
}
//---------------------------------
// Indexing
//---------------------------------
@Override
public Field[] createIndexableFields(Shape shape) {
return createIndexableFields(shape.getBoundingBox());
}
private Field[] createIndexableFields(Rectangle bbox) {
Field[] fields = new Field[fieldsLen];
int idx = -1;
if (hasStored) {
fields[++idx] = new StoredField(field_minX, bbox.getMinX());
fields[++idx] = new StoredField(field_minY, bbox.getMinY());
fields[++idx] = new StoredField(field_maxX, bbox.getMaxX());
fields[++idx] = new StoredField(field_maxY, bbox.getMaxY());
}
if (hasDocVals) {
fields[++idx] = new DoubleDocValuesField(field_minX, bbox.getMinX());
fields[++idx] = new DoubleDocValuesField(field_minY, bbox.getMinY());
fields[++idx] = new DoubleDocValuesField(field_maxX, bbox.getMaxX());
fields[++idx] = new DoubleDocValuesField(field_maxY, bbox.getMaxY());
}
if (hasPointVals) {
fields[++idx] = new DoublePoint(field_minX, bbox.getMinX());
fields[++idx] = new DoublePoint(field_minY, bbox.getMinY());
fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
}
if (legacyNumericFieldType != null) {
fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
}
if (xdlFieldType != null) {
fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
}
assert idx == fields.length - 1;
return fields;
}
//---------------------------------
// Value Source / Relevancy
//---------------------------------
/**
* Provides access to each rectangle per document as a ValueSource in which
* {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)} returns a {@link
* Shape}.
*/ //TODO raise to SpatialStrategy
public ValueSource makeShapeValueSource() {
return new BBoxValueSource(this);
}
@Override
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
//TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl.
return new DistanceToShapeValueSource(makeShapeValueSource(), queryPoint, multiplier, ctx);
}
/** Returns a similarity based on {@link BBoxOverlapRatioValueSource}. This is just a
* convenience method. */
public ValueSource makeOverlapRatioValueSource(Rectangle queryBox, double queryTargetProportion) {
return new BBoxOverlapRatioValueSource(
makeShapeValueSource(), ctx.isGeo(), queryBox, queryTargetProportion, 0.0);
}
//---------------------------------
// Query Building
//---------------------------------
// Utility on SpatialStrategy?
// public Query makeQueryWithValueSource(SpatialArgs args, ValueSource valueSource) {
// return new CustomScoreQuery(makeQuery(args), new FunctionQuery(valueSource));
//or...
// return new BooleanQuery.Builder()
// .add(new FunctionQuery(valueSource), BooleanClause.Occur.MUST)//matches everything and provides score
// .add(filterQuery, BooleanClause.Occur.FILTER)//filters (score isn't used)
// .build();
// }
@Override
public Query makeQuery(SpatialArgs args) {
Shape shape = args.getShape();
if (!(shape instanceof Rectangle))
throw new UnsupportedOperationException("Can only query by Rectangle, not " + shape);
Rectangle bbox = (Rectangle) shape;
Query spatial;
// Useful for understanding Relations:
// http://edndoc.esri.com/arcsde/9.1/general_topics/understand_spatial_relations.htm
SpatialOperation op = args.getOperation();
if( op == SpatialOperation.BBoxIntersects ) spatial = makeIntersects(bbox);
else if( op == SpatialOperation.BBoxWithin ) spatial = makeWithin(bbox);
else if( op == SpatialOperation.Contains ) spatial = makeContains(bbox);
else if( op == SpatialOperation.Intersects ) spatial = makeIntersects(bbox);
else if( op == SpatialOperation.IsEqualTo ) spatial = makeEquals(bbox);
else if( op == SpatialOperation.IsDisjointTo ) spatial = makeDisjoint(bbox);
else if( op == SpatialOperation.IsWithin ) spatial = makeWithin(bbox);
else { //no Overlaps support yet
throw new UnsupportedSpatialOperation(op);
}
return new ConstantScoreQuery(spatial);
}
/**
* Constructs a query to retrieve documents that fully contain the input envelope.
*
* @return the spatial query
*/
Query makeContains(Rectangle bbox) {
// general case
// docMinX <= queryExtent.getMinX() AND docMinY <= queryExtent.getMinY() AND docMaxX >= queryExtent.getMaxX() AND docMaxY >= queryExtent.getMaxY()
// Y conditions
// docMinY <= queryExtent.getMinY() AND docMaxY >= queryExtent.getMaxY()
Query qMinY = this.makeNumericRangeQuery(field_minY, null, bbox.getMinY(), false, true);
Query qMaxY = this.makeNumericRangeQuery(field_maxY, bbox.getMaxY(), null, true, false);
Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
// X conditions
Query xConditions;
// queries that do not cross the date line
if (!bbox.getCrossesDateLine()) {
// X Conditions for documents that do not cross the date line,
// documents that contain the min X and max X of the query envelope,
// docMinX <= queryExtent.getMinX() AND docMaxX >= queryExtent.getMaxX()
Query qMinX = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
Query qMaxX = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
Query qNonXDL = this.makeXDL(false, qMinMax);
if (!ctx.isGeo()) {
xConditions = qNonXDL;
} else {
// X Conditions for documents that cross the date line,
// the left portion of the document contains the min X of the query
// OR the right portion of the document contains the max X of the query,
// docMinXLeft <= queryExtent.getMinX() OR docMaxXRight >= queryExtent.getMaxX()
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qXDLLeft, qXDLRight);
Query qXDL = this.makeXDL(true, qXDLLeftRight);
Query qEdgeDL = null;
if (bbox.getMinX() == bbox.getMaxX() && Math.abs(bbox.getMinX()) == 180) {
double edge = bbox.getMinX() * -1;//opposite dateline edge
qEdgeDL = makeQuery(BooleanClause.Occur.SHOULD,
makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
}
// apply the non-XDL and XDL conditions
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL, qEdgeDL);
}
} else {
// queries that cross the date line
// No need to search for documents that do not cross the date line
// X Conditions for documents that cross the date line,
// the left portion of the document contains the min X of the query
// AND the right portion of the document contains the max X of the query,
// docMinXLeft <= queryExtent.getMinX() AND docMaxXRight >= queryExtent.getMaxX()
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
Query qXDLLeftRight = this.makeXDL(true, this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight));
Query qWorld = makeQuery(BooleanClause.Occur.MUST,
makeNumberTermQuery(field_minX, -180), makeNumberTermQuery(field_maxX, 180));
xConditions = makeQuery(BooleanClause.Occur.SHOULD, qXDLLeftRight, qWorld);
}
// both X and Y conditions must occur
return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
}
/**
* Constructs a query to retrieve documents that are disjoint to the input envelope.
*
* @return the spatial query
*/
Query makeDisjoint(Rectangle bbox) {
// general case
// docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX() OR docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
// Y conditions
// docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMaxY(), null, false, false);
Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMinY(), false, false);
Query yConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qMinY, qMaxY);
// X conditions
Query xConditions;
// queries that do not cross the date line
if (!bbox.getCrossesDateLine()) {
// X Conditions for documents that do not cross the date line,
// docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()
Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
if (bbox.getMinX() == -180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(qMinX, BooleanClause.Occur.MUST);
bq.add(makeNumberTermQuery(field_maxX, 180.0), BooleanClause.Occur.MUST_NOT);
qMinX = bq.build();
}
Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
if (bbox.getMaxX() == 180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(qMaxX, BooleanClause.Occur.MUST);
bq.add(makeNumberTermQuery(field_minX, -180.0), BooleanClause.Occur.MUST_NOT);
qMaxX = bq.build();
}
Query qMinMax = this.makeQuery(BooleanClause.Occur.SHOULD, qMinX, qMaxX);
Query qNonXDL = this.makeXDL(false, qMinMax);
if (!ctx.isGeo()) {
xConditions = qNonXDL;
} else {
// X Conditions for documents that cross the date line,
// both the left and right portions of the document must be disjoint to the query
// (docMinXLeft > queryExtent.getMaxX() OR docMaxXLeft < queryExtent.getMinX()) AND
// (docMinXRight > queryExtent.getMaxX() OR docMaxXRight < queryExtent.getMinX())
// where: docMaxXLeft = 180.0, docMinXRight = -180.0
// (docMaxXLeft < queryExtent.getMinX()) equates to (180.0 < queryExtent.getMinX()) and is ignored
// (docMinXRight > queryExtent.getMaxX()) equates to (-180.0 > queryExtent.getMaxX()) and is ignored
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXRight);
Query qXDL = this.makeXDL(true, qLeftRight);
// apply the non-XDL and XDL conditions
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
}
// queries that cross the date line
} else {
// X Conditions for documents that do not cross the date line,
// the document must be disjoint to both the left and right query portions
// (docMinX > queryExtent.getMaxX()Left OR docMaxX < queryExtent.getMinX()) AND (docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()Left)
// where: queryExtent.getMaxX()Left = 180.0, queryExtent.getMinX()Left = -180.0
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, 180.0, null, false, false);
Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
Query qMinXRight = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, -180.0, false, false);
Query qLeft = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXLeft, qMaxXLeft);
Query qRight = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXRight, qMaxXRight);
Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qLeft, qRight);
// No need to search for documents that do not cross the date line
xConditions = this.makeXDL(false, qLeftRight);
}
// either X or Y conditions should occur
return this.makeQuery(BooleanClause.Occur.SHOULD, xConditions, yConditions);
}
/**
* Constructs a query to retrieve documents that equal the input envelope.
*
* @return the spatial query
*/
Query makeEquals(Rectangle bbox) {
// docMinX = queryExtent.getMinX() AND docMinY = queryExtent.getMinY() AND docMaxX = queryExtent.getMaxX() AND docMaxY = queryExtent.getMaxY()
Query qMinX = makeNumberTermQuery(field_minX, bbox.getMinX());
Query qMinY = makeNumberTermQuery(field_minY, bbox.getMinY());
Query qMaxX = makeNumberTermQuery(field_maxX, bbox.getMaxX());
Query qMaxY = makeNumberTermQuery(field_maxY, bbox.getMaxY());
return makeQuery(BooleanClause.Occur.MUST, qMinX, qMinY, qMaxX, qMaxY);
}
/**
* Constructs a query to retrieve documents that intersect the input envelope.
*
* @return the spatial query
*/
Query makeIntersects(Rectangle bbox) {
// the original intersects query does not work for envelopes that cross the date line,
// switch to a NOT Disjoint query
// MUST_NOT causes a problem when it's the only clause type within a BooleanQuery,
// to get around it we add all documents as a SHOULD
// there must be an envelope, it must not be disjoint
Query qHasEnv;
if (ctx.isGeo()) {
Query qIsNonXDL = this.makeXDL(false);
Query qIsXDL = ctx.isGeo() ? this.makeXDL(true) : null;
qHasEnv = this.makeQuery(BooleanClause.Occur.SHOULD, qIsNonXDL, qIsXDL);
} else {
qHasEnv = this.makeXDL(false);
}
BooleanQuery.Builder qNotDisjoint = new BooleanQuery.Builder();
qNotDisjoint.add(qHasEnv, BooleanClause.Occur.MUST);
Query qDisjoint = makeDisjoint(bbox);
qNotDisjoint.add(qDisjoint, BooleanClause.Occur.MUST_NOT);
//Query qDisjoint = makeDisjoint();
//BooleanQuery qNotDisjoint = new BooleanQuery();
//qNotDisjoint.add(new MatchAllDocsQuery(),BooleanClause.Occur.SHOULD);
//qNotDisjoint.add(qDisjoint,BooleanClause.Occur.MUST_NOT);
return qNotDisjoint.build();
}
/**
* Makes a boolean query based upon a collection of queries and a logical operator.
*
* @param occur the logical operator
* @param queries the query collection
* @return the query
*/
BooleanQuery makeQuery(BooleanClause.Occur occur, Query... queries) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
for (Query query : queries) {
if (query != null)
bq.add(query, occur);
}
return bq.build();
}
/**
* Constructs a query to retrieve documents are fully within the input envelope.
*
* @return the spatial query
*/
Query makeWithin(Rectangle bbox) {
// general case
// docMinX >= queryExtent.getMinX() AND docMinY >= queryExtent.getMinY() AND docMaxX <= queryExtent.getMaxX() AND docMaxY <= queryExtent.getMaxY()
// Y conditions
// docMinY >= queryExtent.getMinY() AND docMaxY <= queryExtent.getMaxY()
Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMinY(), null, true, false);
Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMaxY(), false, true);
Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
// X conditions
Query xConditions;
if (ctx.isGeo() && bbox.getMinX() == -180.0 && bbox.getMaxX() == 180.0) {
//if query world-wraps, only the y condition matters
return yConditions;
} else if (!bbox.getCrossesDateLine()) {
// queries that do not cross the date line
// docMinX >= queryExtent.getMinX() AND docMaxX <= queryExtent.getMaxX()
Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
double edge = 0;//none, otherwise opposite dateline of query
if (bbox.getMinX() == -180.0)
edge = 180;
else if (bbox.getMaxX() == 180.0)
edge = -180;
if (edge != 0 && ctx.isGeo()) {
Query edgeQ = makeQuery(BooleanClause.Occur.MUST,
makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
qMinMax = makeQuery(BooleanClause.Occur.SHOULD, qMinMax, edgeQ);
}
xConditions = this.makeXDL(false, qMinMax);
// queries that cross the date line
} else {
// X Conditions for documents that do not cross the date line
// the document should be within the left portion of the query
// docMinX >= queryExtent.getMinX() AND docMaxX <= 180.0
Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, 180.0, false, true);
Query qLeft = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXLeft);
// the document should be within the right portion of the query
// docMinX >= -180.0 AND docMaxX <= queryExtent.getMaxX()
Query qMinXRight = this.makeNumericRangeQuery(field_minX, -180.0, null, true, false);
Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
Query qRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXRight, qMaxXRight);
// either left or right conditions should occur,
// apply the left and right conditions to documents that do not cross the date line
Query qLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qLeft, qRight);
Query qNonXDL = this.makeXDL(false, qLeftRight);
// X Conditions for documents that cross the date line,
// the left portion of the document must be within the left portion of the query,
// AND the right portion of the document must be within the right portion of the query
// docMinXLeft >= queryExtent.getMinX() AND docMaxXLeft <= 180.0
// AND docMinXRight >= -180.0 AND docMaxXRight <= queryExtent.getMaxX()
Query qXDLLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
Query qXDLRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight);
Query qXDL = this.makeXDL(true, qXDLLeftRight);
// apply the non-XDL and XDL conditions
xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
}
// both X and Y conditions must occur
return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
}
/**
* Constructs a query to retrieve documents that do or do not cross the date line.
*
* @param crossedDateLine <code>true</true> for documents that cross the date line
* @return the query
*/
private Query makeXDL(boolean crossedDateLine) {
// The 'T' and 'F' values match solr fields
return new TermQuery(new Term(field_xdl, crossedDateLine ? "T" : "F"));
}
/**
* Constructs a query to retrieve documents that do or do not cross the date line
* and match the supplied spatial query.
*
* @param crossedDateLine <code>true</true> for documents that cross the date line
* @param query the spatial query
* @return the query
*/
private Query makeXDL(boolean crossedDateLine, Query query) {
if (!ctx.isGeo()) {
assert !crossedDateLine;
return query;
}
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(this.makeXDL(crossedDateLine), BooleanClause.Occur.MUST);
bq.add(query, BooleanClause.Occur.MUST);
return bq.build();
}
private Query makeNumberTermQuery(String field, double number) {
if (hasPointVals) {
return DoublePoint.newExactQuery(field, number);
} else if (legacyNumericFieldType != null) {
BytesRefBuilder bytes = new BytesRefBuilder();
LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
return new TermQuery(new Term(field, bytes.get()));
}
throw new UnsupportedOperationException("An index is required for this operation.");
}
/**
* Returns a numeric range query based on FieldType
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*
* @param fieldname field name. must not be <code>null</code>.
* @param min minimum value of the range.
* @param max maximum value of the range.
* @param minInclusive include the minimum value if <code>true</code>.
* @param maxInclusive include the maximum value if <code>true</code>
*/
private Query makeNumericRangeQuery(String fieldname, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
if (hasPointVals) {
if (min == null) {
min = Double.NEGATIVE_INFINITY;
}
if (max == null) {
max = Double.POSITIVE_INFINITY;
}
if (minInclusive == false) {
min = Math.nextUp(min);
}
if (maxInclusive == false) {
max = Math.nextDown(max);
}
return DoublePoint.newRangeQuery(fieldname, min, max);
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
}
throw new UnsupportedOperationException("An index is required for this operation.");
}
}

View File

@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.legacy;
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Explanation;
import org.locationtech.spatial4j.shape.Rectangle;
/**
* A ValueSource in which the indexed Rectangle is returned from
* {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)}.
*
* @lucene.internal
*/
class BBoxValueSource extends ValueSource {
private final BBoxStrategy strategy;
public BBoxValueSource(BBoxStrategy strategy) {
this.strategy = strategy;
}
@Override
public String description() {
return "bboxShape(" + strategy.getFieldName() + ")";
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
LeafReader reader = readerContext.reader();
final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX);
final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY);
final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX);
final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY);
//reused
final Rectangle rect = strategy.getSpatialContext().makeRectangle(0,0,0,0);
return new FunctionValues() {
private int lastDocID = -1;
private double getDocValue(NumericDocValues values, int doc) throws IOException {
int curDocID = values.docID();
if (doc > curDocID) {
curDocID = values.advance(doc);
}
if (doc == curDocID) {
return Double.longBitsToDouble(values.longValue());
} else {
return 0.0;
}
}
@Override
public Object objectVal(int doc) throws IOException {
if (doc < lastDocID) {
throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc);
}
lastDocID = doc;
double minXValue = getDocValue(minX, doc);
if (minX.docID() != doc) {
return null;
} else {
double minYValue = getDocValue(minY, doc);
double maxXValue = getDocValue(maxX, doc);
double maxYValue = getDocValue(maxY, doc);
rect.reset(minXValue, maxXValue, minYValue, maxYValue);
return rect;
}
}
@Override
public String strVal(int doc) throws IOException {//TODO support WKT output once Spatial4j does
Object v = objectVal(doc);
return v == null ? null : v.toString();
}
@Override
public boolean exists(int doc) throws IOException {
getDocValue(minX, doc);
return minX.docID() == doc;
}
@Override
public Explanation explain(int doc) throws IOException {
return Explanation.match(Float.NaN, toString(doc));
}
@Override
public String toString(int doc) throws IOException {
return description() + '=' + strVal(doc);
}
};
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
BBoxValueSource that = (BBoxValueSource) o;
if (!strategy.equals(that.strategy)) return false;
return true;
}
@Override
public int hashCode() {
return strategy.hashCode();
}
}

View File

@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.legacy;
import org.apache.lucene.index.NumericDocValues;
import org.locationtech.spatial4j.distance.DistanceCalculator;
import org.locationtech.spatial4j.shape.Point;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import java.io.IOException;
import java.util.Map;
/**
* An implementation of the Lucene ValueSource model that returns the distance
* for a {@link PointVectorStrategy}.
*
* @lucene.internal
*/
public class DistanceValueSource extends ValueSource {
private PointVectorStrategy strategy;
private final Point from;
private final double multiplier;
/**
* Constructor.
*/
public DistanceValueSource(PointVectorStrategy strategy, Point from, double multiplier) {
this.strategy = strategy;
this.from = from;
this.multiplier = multiplier;
}
/**
* Returns the ValueSource description.
*/
@Override
public String description() {
return "DistanceValueSource("+strategy+", "+from+")";
}
/**
* Returns the FunctionValues used by the function query.
*/
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
LeafReader reader = readerContext.reader();
final NumericDocValues ptX = DocValues.getNumeric(reader, strategy.getFieldNameX());
final NumericDocValues ptY = DocValues.getNumeric(reader, strategy.getFieldNameY());
return new FunctionValues() {
private int lastDocID = -1;
private final Point from = DistanceValueSource.this.from;
private final DistanceCalculator calculator = strategy.getSpatialContext().getDistCalc();
private final double nullValue =
(strategy.getSpatialContext().isGeo() ? 180 * multiplier : Double.MAX_VALUE);
private double getDocValue(NumericDocValues values, int doc) throws IOException {
int curDocID = values.docID();
if (doc > curDocID) {
curDocID = values.advance(doc);
}
if (doc == curDocID) {
return Double.longBitsToDouble(values.longValue());
} else {
return 0.0;
}
}
@Override
public float floatVal(int doc) throws IOException {
return (float) doubleVal(doc);
}
@Override
public double doubleVal(int doc) throws IOException {
// make sure it has minX and area
double x = getDocValue(ptX, doc);
if (ptX.docID() == doc) {
double y = getDocValue(ptY, doc);
assert ptY.docID() == doc;
return calculator.distance(from, x, y) * multiplier;
}
return nullValue;
}
@Override
public String toString(int doc) throws IOException {
return description() + "=" + floatVal(doc);
}
};
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DistanceValueSource that = (DistanceValueSource) o;
if (!from.equals(that.from)) return false;
if (!strategy.equals(that.strategy)) return false;
if (multiplier != that.multiplier) return false;
return true;
}
@Override
public int hashCode() {
return from.hashCode();
}
}

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFloatField}.
*
* <p>To perform range querying or filtering against a
* <code>LegacyDoubleField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <code>LegacyDoubleField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* <code>LegacyDoubleField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#DOUBLE}. <code>LegacyDoubleField</code>
@ -85,7 +85,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is &lt; 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@ -94,8 +94,8 @@ import org.apache.lucene.index.IndexOptions;
* <p>For more information on the internals of numeric trie
* indexing, including the <a
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
* <p>If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@ -103,7 +103,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed. </p>
*
* <p>More advanced users can instead use {@link
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
@ -144,7 +144,7 @@ public final class LegacyDoubleField extends LegacyField {
/** Creates a stored or un-stored LegacyDoubleField with the provided value
* and default <code>precisionStep</code> {@link
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* @param name field name
* @param value 64-bit double value
* @param stored Store.YES if the content should also be stored

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
@ -60,7 +60,7 @@ public final class LegacyFieldType extends FieldType {
/**
* LegacyNumericType: if non-null then the field's value will be indexed
* numerically so that {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} can be used at
* numerically so that {@link org.apache.solr.legacy.LegacyNumericRangeQuery} can be used at
* search time.
* <p>
* The default is <code>null</code> (no numeric type)
@ -97,7 +97,7 @@ public final class LegacyFieldType extends FieldType {
* <p>
* This has no effect if {@link #numericType()} returns null.
* <p>
* The default is {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
* The default is {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
* @see #setNumericPrecisionStep(int)
*
* @deprecated Please switch to {@link org.apache.lucene.index.PointValues} instead

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FloatPoint;
@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyDoubleField}.
*
* <p>To perform range querying or filtering against a
* <code>LegacyFloatField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <code>LegacyFloatField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* <code>LegacyFloatField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#FLOAT}. <code>LegacyFloatField</code>
@ -83,7 +83,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is &lt; 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@ -92,8 +92,8 @@ import org.apache.lucene.index.IndexOptions;
* <p>For more information on the internals of numeric trie
* indexing, including the <a
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
* <p>If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@ -101,7 +101,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed. </p>
*
* <p>More advanced users can instead use {@link
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
@ -144,7 +144,7 @@ public final class LegacyFloatField extends LegacyField {
/** Creates a stored or un-stored LegacyFloatField with the provided value
* and default <code>precisionStep</code> {@link
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* @param name field name
* @param value 32-bit double value
* @param stored Store.YES if the content should also be stored

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@ -50,7 +50,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyDoubleField}.
*
* <p>To perform range querying or filtering against a
* <code>LegacyIntField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <code>LegacyIntField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* <code>LegacyIntField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#INT}. <code>LegacyIntField</code>
@ -84,7 +84,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is &lt; 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@ -93,8 +93,8 @@ import org.apache.lucene.index.IndexOptions;
* <p>For more information on the internals of numeric trie
* indexing, including the <a
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
* <p>If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@ -102,7 +102,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed. </p>
*
* <p>More advanced users can instead use {@link
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
@ -145,7 +145,7 @@ public final class LegacyIntField extends LegacyField {
/** Creates a stored or un-stored LegacyIntField with the provided value
* and default <code>precisionStep</code> {@link
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* @param name field name
* @param value 32-bit integer value
* @param stored Store.YES if the content should also be stored

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@ -61,7 +61,7 @@ import org.apache.lucene.index.IndexOptions;
* <code>long</code> value.
*
* <p>To perform range querying or filtering against a
* <code>LegacyLongField</code>, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <code>LegacyLongField</code>, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* <code>LegacyLongField</code>, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#LONG}. <code>LegacyLongField</code>
@ -95,7 +95,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is &lt; 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@ -104,8 +104,8 @@ import org.apache.lucene.index.IndexOptions;
* <p>For more information on the internals of numeric trie
* indexing, including the <a
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
* configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
* indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
* <p>If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@ -113,7 +113,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed.
*
* <p>More advanced users can instead use {@link
* org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
@ -154,7 +154,7 @@ public final class LegacyLongField extends LegacyField {
/** Creates a stored or un-stored LegacyLongField with the provided value
* and default <code>precisionStep</code> {@link
* org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* @param name field name
* @param value 64-bit long value
* @param stored Store.YES if the content should also be stored

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import java.io.IOException;
@ -41,9 +41,9 @@ import org.apache.lucene.index.Term; // for javadocs
/**
* <p>A {@link Query} that matches numeric values within a
* specified range. To use this, you must first index the
* numeric values using {@link org.apache.lucene.legacy.LegacyIntField}, {@link
* org.apache.lucene.legacy.LegacyFloatField}, {@link org.apache.lucene.legacy.LegacyLongField} or {@link org.apache.lucene.legacy.LegacyDoubleField} (expert: {@link
* org.apache.lucene.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
* numeric values using {@link org.apache.solr.legacy.LegacyIntField}, {@link
* org.apache.solr.legacy.LegacyFloatField}, {@link org.apache.solr.legacy.LegacyLongField} or {@link org.apache.solr.legacy.LegacyDoubleField} (expert: {@link
* org.apache.solr.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
* you should use {@link TermRangeQuery}.</p>
*
* <p>You create a new LegacyNumericRangeQuery with the static
@ -97,7 +97,7 @@ import org.apache.lucene.index.Term; // for javadocs
* (all numerical values like doubles, longs, floats, and ints are converted to
* lexicographic sortable string representations and stored with different precisions
* (for a more detailed description of how the values are stored,
* see {@link org.apache.lucene.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
* see {@link org.apache.solr.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
* The center of the range is searched only with the lowest possible precision in the <em>trie</em>,
* while the boundaries are matched more exactly. This reduces the number of terms dramatically.</p>
*
@ -113,7 +113,7 @@ import org.apache.lucene.index.Term; // for javadocs
* <h3><a name="precisionStepDesc">Precision Step</a></h3>
* <p>You can choose any <code>precisionStep</code> when encoding values.
* Lower step values mean more precisions and so more terms in index (and index gets larger). The number
* of indexed terms per value is (those are generated by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}):
* of indexed terms per value is (those are generated by {@link org.apache.solr.legacy.LegacyNumericTokenStream}):
* <p style="font-family:serif">
* &nbsp;&nbsp;indexedTermsPerValue = <b>ceil</b><big>(</big>bitsPerValue / precisionStep<big>)</big>
* </p>
@ -149,8 +149,8 @@ import org.apache.lucene.index.Term; // for javadocs
* <li>Steps <b>&ge;64</b> for <em>long/double</em> and <b>&ge;32</b> for <em>int/float</em> produces one token
* per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used
* to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as
* <code>precisionStep</code>). Using {@link org.apache.lucene.legacy.LegacyIntField},
* {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} for sorting
* <code>precisionStep</code>). Using {@link org.apache.solr.legacy.LegacyIntField},
* {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} for sorting
* is ideal, because building the field cache is much faster than with text-only numbers.
* These fields have one term per value and therefore also work with term enumeration for building distinct lists
* (e.g. facets / preselected values to search for).
@ -205,7 +205,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
/**
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>long</code>
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
@ -231,7 +231,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
/**
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>int</code>
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
* by setting the min or max value to <code>null</code>. By setting inclusive to false, it will
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
@ -259,7 +259,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
/**
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>double</code>
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
* by setting the min or max value to <code>null</code>.
* {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query
@ -289,7 +289,7 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
/**
* Factory that creates a <code>LegacyNumericRangeQuery</code>, that queries a <code>float</code>
* range using the default <code>precisionStep</code> {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* range using the default <code>precisionStep</code> {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* You can have half-open ranges (which are in fact &lt;/&le; or &gt;/&ge; queries)
* by setting the min or max value to <code>null</code>.
* {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query
@ -390,8 +390,8 @@ public final class LegacyNumericRangeQuery<T extends Number> extends MultiTermQu
* <p>
* WARNING: This term enumeration is not guaranteed to be always ordered by
* {@link Term#compareTo}.
* The ordering depends on how {@link org.apache.lucene.legacy.LegacyNumericUtils#splitLongRange} and
* {@link org.apache.lucene.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
* The ordering depends on how {@link org.apache.solr.legacy.LegacyNumericUtils#splitLongRange} and
* {@link org.apache.solr.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
* {@link MultiTermQuery} ordering is not relevant.
*/
private final class NumericRangeTermsEnum extends FilteredTermsEnum {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import java.util.Objects;
@ -35,10 +35,10 @@ import org.apache.lucene.util.NumericUtils;
/**
* <b>Expert:</b> This class provides a {@link TokenStream}
* for indexing numeric values that can be used by {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
*
* <p>Note that for simple usage, {@link org.apache.lucene.legacy.LegacyIntField}, {@link
* org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} is
* <p>Note that for simple usage, {@link org.apache.solr.legacy.LegacyIntField}, {@link
* org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} is
* recommended. These fields disable norms and
* term freqs, as they are not usually needed during
* searching. If you need to change these settings, you
@ -81,7 +81,7 @@ import org.apache.lucene.util.NumericUtils;
* than one numeric field, use a separate <code>LegacyNumericTokenStream</code>
* instance for each.</p>
*
* <p>See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details on the
* <p>See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details on the
* <a
* href="LegacyNumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
* parameter as well as how numeric fields work under the hood.</p>
@ -140,7 +140,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
}
}
/** Implementation of {@link org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
/** Implementation of {@link org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
* @lucene.internal
* @since 4.0
*/
@ -240,7 +240,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
/**
* Creates a token stream for numeric values using the default <code>precisionStep</code>
* {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
* {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
* before using set a value using the various set<em>???</em>Value() methods.
*/
public LegacyNumericTokenStream() {

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
/** Data type of the numeric value
* @since 3.2

View File

@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;
import java.io.IOException;
@ -43,9 +43,9 @@ import org.apache.lucene.util.BytesRefBuilder;
* during encoding.
*
* <p>For easy usage, the trie algorithm is implemented for indexing inside
* {@link org.apache.lucene.legacy.LegacyNumericTokenStream} that can index <code>int</code>, <code>long</code>,
* {@link org.apache.solr.legacy.LegacyNumericTokenStream} that can index <code>int</code>, <code>long</code>,
* <code>float</code>, and <code>double</code>. For querying,
* {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} implements the query part
* {@link org.apache.solr.legacy.LegacyNumericRangeQuery} implements the query part
* for the same data types.
*
* @lucene.internal
@ -61,15 +61,15 @@ public final class LegacyNumericUtils {
private LegacyNumericUtils() {} // no instance!
/**
* The default precision step used by {@link org.apache.lucene.legacy.LegacyLongField},
* {@link org.apache.lucene.legacy.LegacyDoubleField}, {@link org.apache.lucene.legacy.LegacyNumericTokenStream}, {@link
* org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* The default precision step used by {@link org.apache.solr.legacy.LegacyLongField},
* {@link org.apache.solr.legacy.LegacyDoubleField}, {@link org.apache.solr.legacy.LegacyNumericTokenStream}, {@link
* org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static final int PRECISION_STEP_DEFAULT = 16;
/**
* The default precision step used by {@link org.apache.lucene.legacy.LegacyIntField} and
* {@link org.apache.lucene.legacy.LegacyFloatField}.
* The default precision step used by {@link org.apache.solr.legacy.LegacyIntField} and
* {@link org.apache.solr.legacy.LegacyFloatField}.
*/
public static final int PRECISION_STEP_DEFAULT_32 = 8;
@ -101,7 +101,7 @@ public final class LegacyNumericUtils {
/**
* Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
* This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* After encoding, {@code bytes.offset} will always be 0.
* @param val the numeric value
* @param shift how many bits to strip from the right
@ -128,7 +128,7 @@ public final class LegacyNumericUtils {
/**
* Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
* This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* After encoding, {@code bytes.offset} will always be 0.
* @param val the numeric value
* @param shift how many bits to strip from the right
@ -232,7 +232,7 @@ public final class LegacyNumericUtils {
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
* {@link LongRangeBuilder#addRange(BytesRef,BytesRef)}
* method.
* <p>This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <p>This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static void splitLongRange(final LongRangeBuilder builder,
final int precisionStep, final long minBound, final long maxBound
@ -246,7 +246,7 @@ public final class LegacyNumericUtils {
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
* {@link IntRangeBuilder#addRange(BytesRef,BytesRef)}
* method.
* <p>This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
* <p>This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static void splitIntRange(final IntRangeBuilder builder,
final int precisionStep, final int minBound, final int maxBound

View File

@ -0,0 +1,292 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.legacy;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.solr.legacy.LegacyDoubleField;
import org.apache.solr.legacy.LegacyFieldType;
import org.apache.solr.legacy.LegacyNumericRangeQuery;
import org.apache.solr.legacy.LegacyNumericType;
import org.apache.lucene.queries.function.FunctionRangeQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.SpatialStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Circle;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
import org.locationtech.spatial4j.shape.Shape;
/**
* Simple {@link SpatialStrategy} which represents Points in two numeric fields.
* The Strategy's best feature is decent distance sort.
*
* <p>
* <b>Characteristics:</b>
* <br>
* <ul>
* <li>Only indexes points; just one per field value.</li>
* <li>Can query by a rectangle or circle.</li>
* <li>{@link
* org.apache.lucene.spatial.query.SpatialOperation#Intersects} and {@link
* SpatialOperation#IsWithin} is supported.</li>
* <li>Requires DocValues for
* {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} and for
* searching with a Circle.</li>
* </ul>
*
* <p>
* <b>Implementation:</b>
* <p>
* This is a simple Strategy. Search works with a pair of range queries on two {@link DoublePoint}s representing
* x &amp; y fields. A Circle query does the same bbox query but adds a
* ValueSource filter on
* {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}.
* <p>
* One performance shortcoming with this strategy is that a scenario involving
* both a search using a Circle and sort will result in calculations for the
* spatial distance being done twice -- once for the filter and second for the
* sort.
*
* @lucene.experimental
*/
public class PointVectorStrategy extends SpatialStrategy {
// note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
// create more than one Field.
/**
* pointValues, docValues, and nothing else.
*/
public static FieldType DEFAULT_FIELDTYPE;
@Deprecated
public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
type.setDocValuesType(DocValuesType.NUMERIC);//docValues
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
// Legacy default: legacyNumerics
LegacyFieldType legacyType = new LegacyFieldType();
legacyType.setIndexOptions(IndexOptions.DOCS);
legacyType.setNumericType(LegacyNumericType.DOUBLE);
legacyType.setNumericPrecisionStep(8);// same as solr default
legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
legacyType.setStored(false);
legacyType.freeze();
LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_X = "__x";
public static final String SUFFIX_Y = "__y";
private final String fieldNameX;
private final String fieldNameY;
private final int fieldsLen;
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
// equiv to "hasLegacyNumerics":
private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
/**
* Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
*/
public static PointVectorStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
/**
* Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
* However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
* since that requires docValues (the legacy config didn't have that).
*
* @deprecated LegacyNumerics will be removed
*/
@Deprecated
public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
}
/**
* Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
* a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
* is configurable this way too.
*/
public PointVectorStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
super(ctx, fieldNamePrefix);
this.fieldNameX = fieldNamePrefix+SUFFIX_X;
this.fieldNameY = fieldNamePrefix+SUFFIX_Y;
int numPairs = 0;
if ((this.hasStored = fieldType.stored())) {
numPairs++;
}
if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
numPairs++;
}
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numPairs++;
}
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
if (hasPointVals) {
throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
}
final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
}
numPairs++;
legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
legacyNumericFieldType.freeze();
} else {
legacyNumericFieldType = null;
}
this.fieldsLen = numPairs * 2;
}
String getFieldNameX() {
return fieldNameX;
}
String getFieldNameY() {
return fieldNameY;
}
@Override
public Field[] createIndexableFields(Shape shape) {
if (shape instanceof Point)
return createIndexableFields((Point) shape);
throw new UnsupportedOperationException("Can only index Point, not " + shape);
}
/** @see #createIndexableFields(org.locationtech.spatial4j.shape.Shape) */
public Field[] createIndexableFields(Point point) {
Field[] fields = new Field[fieldsLen];
int idx = -1;
if (hasStored) {
fields[++idx] = new StoredField(fieldNameX, point.getX());
fields[++idx] = new StoredField(fieldNameY, point.getY());
}
if (hasDocVals) {
fields[++idx] = new DoubleDocValuesField(fieldNameX, point.getX());
fields[++idx] = new DoubleDocValuesField(fieldNameY, point.getY());
}
if (hasPointVals) {
fields[++idx] = new DoublePoint(fieldNameX, point.getX());
fields[++idx] = new DoublePoint(fieldNameY, point.getY());
}
if (legacyNumericFieldType != null) {
fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
}
assert idx == fields.length - 1;
return fields;
}
@Override
public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
return new DistanceValueSource(this, queryPoint, multiplier);
}
@Override
public ConstantScoreQuery makeQuery(SpatialArgs args) {
if(! SpatialOperation.is( args.getOperation(),
SpatialOperation.Intersects,
SpatialOperation.IsWithin ))
throw new UnsupportedSpatialOperation(args.getOperation());
Shape shape = args.getShape();
if (shape instanceof Rectangle) {
Rectangle bbox = (Rectangle) shape;
return new ConstantScoreQuery(makeWithin(bbox));
} else if (shape instanceof Circle) {
Circle circle = (Circle)shape;
Rectangle bbox = circle.getBoundingBox();
Query approxQuery = makeWithin(bbox);
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
FunctionRangeQuery vsRangeQuery =
new FunctionRangeQuery(makeDistanceValueSource(circle.getCenter()), 0.0, circle.getRadius(), true, true);
bqBuilder.add(approxQuery, BooleanClause.Occur.FILTER);//should have lowest "cost" value; will drive iteration
bqBuilder.add(vsRangeQuery, BooleanClause.Occur.FILTER);
return new ConstantScoreQuery(bqBuilder.build());
} else {
throw new UnsupportedOperationException("Only Rectangles and Circles are currently supported, " +
"found [" + shape.getClass() + "]");//TODO
}
}
/**
* Constructs a query to retrieve documents that fully contain the input envelope.
*/
private Query makeWithin(Rectangle bbox) {
BooleanQuery.Builder bq = new BooleanQuery.Builder();
BooleanClause.Occur MUST = BooleanClause.Occur.MUST;
if (bbox.getCrossesDateLine()) {
//use null as performance trick since no data will be beyond the world bounds
bq.add(rangeQuery(fieldNameX, null/*-180*/, bbox.getMaxX()), BooleanClause.Occur.SHOULD );
bq.add(rangeQuery(fieldNameX, bbox.getMinX(), null/*+180*/), BooleanClause.Occur.SHOULD );
bq.setMinimumNumberShouldMatch(1);//must match at least one of the SHOULD
} else {
bq.add(rangeQuery(fieldNameX, bbox.getMinX(), bbox.getMaxX()), MUST);
}
bq.add(rangeQuery(fieldNameY, bbox.getMinY(), bbox.getMaxY()), MUST);
return bq.build();
}
/**
* Returns a numeric range query based on FieldType
* {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*/
private Query rangeQuery(String fieldName, Double min, Double max) {
if (hasPointVals) {
if (min == null) {
min = Double.NEGATIVE_INFINITY;
}
if (max == null) {
max = Double.POSITIVE_INFINITY;
}
return DoublePoint.newRangeQuery(fieldName, min, max);
} else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
}
//TODO try doc-value range query?
throw new UnsupportedOperationException("An index is required for this operation.");
}
}

View File

@ -18,4 +18,4 @@
/**
* Deprecated stuff!
*/
package org.apache.lucene.legacy;
package org.apache.solr.legacy;

View File

@ -28,6 +28,7 @@ import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.FlattenGraphFilterFactory; // javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymMap;
@ -50,7 +51,11 @@ import org.slf4j.LoggerFactory;
/**
* TokenFilterFactory and ManagedResource implementation for
* doing CRUD on synonyms using the REST API.
*
* @deprecated Use {@link ManagedSynonymGraphFilterFactory} instead, but be sure to also
* use {@link FlattenGraphFilterFactory} at index time (not at search time) as well.
*/
@Deprecated
public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

View File

@ -0,0 +1,437 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.rest.schema.analysis;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.text.ParseException;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.rest.BaseSolrResource;
import org.apache.solr.rest.ManagedResource;
import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
import org.restlet.data.Status;
import org.restlet.resource.ResourceException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* TokenFilterFactory and ManagedResource implementation for
* doing CRUD on synonyms using the REST API.
*/
public class ManagedSynonymGraphFilterFactory extends BaseManagedTokenFilterFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String SYNONYM_MAPPINGS = "synonymMappings";
public static final String IGNORE_CASE_INIT_ARG = "ignoreCase";
/**
* Used internally to preserve the case of synonym mappings regardless
* of the ignoreCase setting.
*/
private static class CasePreservedSynonymMappings {
Map<String,Set<String>> mappings = new TreeMap<>();
/**
* Provides a view of the mappings for a given term; specifically, if
* ignoreCase is true, then the returned "view" contains the mappings
* for all known cases of the term, if it is false, then only the
* mappings for the specific case is returned.
*/
Set<String> getMappings(boolean ignoreCase, String key) {
Set<String> synMappings = null;
if (ignoreCase) {
// TODO: should we return the mapped values in all lower-case here?
if (mappings.size() == 1) {
// if only one in the map (which is common) just return it directly
return mappings.values().iterator().next();
}
synMappings = new TreeSet<>();
for (Set<String> next : mappings.values())
synMappings.addAll(next);
} else {
synMappings = mappings.get(key);
}
return synMappings;
}
public String toString() {
return mappings.toString();
}
}
/**
* ManagedResource implementation for synonyms, which are so specialized that
* it makes sense to implement this class as an inner class as it has little
* application outside the SynonymFilterFactory use cases.
*/
public static class SynonymManager extends ManagedResource
implements ManagedResource.ChildResourceSupport
{
protected Map<String,CasePreservedSynonymMappings> synonymMappings;
public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO)
throws SolrException {
super(resourceId, loader, storageIO);
}
@SuppressWarnings("unchecked")
@Override
protected void onManagedDataLoadedFromStorage(NamedList<?> managedInitArgs, Object managedData)
throws SolrException
{
NamedList<Object> initArgs = (NamedList<Object>)managedInitArgs;
String format = (String)initArgs.get("format");
if (format != null && !"solr".equals(format)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid format "+
format+"! Only 'solr' is supported.");
}
// the default behavior is to not ignore case,
// so if not supplied, then install the default
if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) {
initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE);
}
boolean ignoreCase = getIgnoreCase(managedInitArgs);
synonymMappings = new TreeMap<>();
if (managedData != null) {
Map<String,Object> storedSyns = (Map<String,Object>)managedData;
for (String key : storedSyns.keySet()) {
String caseKey = applyCaseSetting(ignoreCase, key);
CasePreservedSynonymMappings cpsm = synonymMappings.get(caseKey);
if (cpsm == null) {
cpsm = new CasePreservedSynonymMappings();
synonymMappings.put(caseKey, cpsm);
}
// give the nature of our JSON parsing solution, we really have
// no guarantees on what is in the file
Object mapping = storedSyns.get(key);
if (!(mapping instanceof List)) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Invalid synonym file format! Expected a list of synonyms for "+key+
" but got "+mapping.getClass().getName());
}
Set<String> sortedVals = new TreeSet<>();
sortedVals.addAll((List<String>)storedSyns.get(key));
cpsm.mappings.put(key, sortedVals);
}
}
log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId());
}
@SuppressWarnings("unchecked")
@Override
protected Object applyUpdatesToManagedData(Object updates) {
boolean ignoreCase = getIgnoreCase();
boolean madeChanges = false;
if (updates instanceof List) {
madeChanges = applyListUpdates((List<String>)updates, ignoreCase);
} else if (updates instanceof Map) {
madeChanges = applyMapUpdates((Map<String,Object>)updates, ignoreCase);
} else {
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST,
"Unsupported data format (" + updates.getClass().getName() + "); expected a JSON object (Map or List)!");
}
return madeChanges ? getStoredView() : null;
}
protected boolean applyListUpdates(List<String> jsonList, boolean ignoreCase) {
boolean madeChanges = false;
for (String term : jsonList) {
// find the mappings using the case aware key
String origTerm = term;
term = applyCaseSetting(ignoreCase, term);
CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
if (cpsm == null)
cpsm = new CasePreservedSynonymMappings();
Set<String> treeTerms = new TreeSet<>();
treeTerms.addAll(jsonList);
cpsm.mappings.put(origTerm, treeTerms);
madeChanges = true;
// only add the cpsm to the synonymMappings if it has valid data
if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
synonymMappings.put(term, cpsm);
}
}
return madeChanges;
}
protected boolean applyMapUpdates(Map<String,Object> jsonMap, boolean ignoreCase) {
boolean madeChanges = false;
for (String term : jsonMap.keySet()) {
String origTerm = term;
term = applyCaseSetting(ignoreCase, term);
// find the mappings using the case aware key
CasePreservedSynonymMappings cpsm = synonymMappings.get(term);
if (cpsm == null)
cpsm = new CasePreservedSynonymMappings();
Set<String> output = cpsm.mappings.get(origTerm);
Object val = jsonMap.get(origTerm); // IMPORTANT: use the original
if (val instanceof String) {
String strVal = (String)val;
if (output == null) {
output = new TreeSet<>();
cpsm.mappings.put(origTerm, output);
}
if (output.add(strVal)) {
madeChanges = true;
}
} else if (val instanceof List) {
List<String> vals = (List<String>)val;
if (output == null) {
output = new TreeSet<>();
cpsm.mappings.put(origTerm, output);
}
for (String nextVal : vals) {
if (output.add(nextVal)) {
madeChanges = true;
}
}
} else {
throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+
" for "+term+"; expected single value or a JSON array!");
}
// only add the cpsm to the synonymMappings if it has valid data
if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) {
synonymMappings.put(term, cpsm);
}
}
return madeChanges;
}
/**
* Returns a Map of how we store and load data managed by this resource,
* which is different than how it is managed at runtime in order to support
* the ignoreCase setting.
*/
protected Map<String,Set<String>> getStoredView() {
Map<String,Set<String>> storedView = new TreeMap<>();
for (CasePreservedSynonymMappings cpsm : synonymMappings.values()) {
for (String key : cpsm.mappings.keySet()) {
storedView.put(key, cpsm.mappings.get(key));
}
}
return storedView;
}
protected String applyCaseSetting(boolean ignoreCase, String str) {
return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str;
}
public boolean getIgnoreCase() {
return getIgnoreCase(managedInitArgs);
}
public boolean getIgnoreCase(NamedList<?> initArgs) {
Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG);
// ignoreCase = false by default
return null == ignoreCase ? false : ignoreCase;
}
@Override
public void doGet(BaseSolrResource endpoint, String childId) {
SolrQueryResponse response = endpoint.getSolrResponse();
if (childId != null) {
boolean ignoreCase = getIgnoreCase();
String key = applyCaseSetting(ignoreCase, childId);
// if ignoreCase==true, then we get the mappings using the lower-cased key
// and then return a union of all case-sensitive keys, if false, then
// we only return the mappings for the exact case requested
CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
Set<String> mappings = (cpsm != null) ? cpsm.getMappings(ignoreCase, childId) : null;
if (mappings == null)
throw new SolrException(ErrorCode.NOT_FOUND,
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
response.add(childId, mappings);
} else {
response.add(SYNONYM_MAPPINGS, buildMapToStore(getStoredView()));
}
}
@Override
public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) {
boolean ignoreCase = getIgnoreCase();
String key = applyCaseSetting(ignoreCase, childId);
CasePreservedSynonymMappings cpsm = synonymMappings.get(key);
if (cpsm == null)
throw new SolrException(ErrorCode.NOT_FOUND,
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
if (ignoreCase) {
// delete all mappings regardless of case
synonymMappings.remove(key);
} else {
// just delete the mappings for the specific case-sensitive key
if (cpsm.mappings.containsKey(childId)) {
cpsm.mappings.remove(childId);
if (cpsm.mappings.isEmpty())
synonymMappings.remove(key);
} else {
throw new SolrException(ErrorCode.NOT_FOUND,
String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId()));
}
}
// store the updated data (using the stored view)
storeManagedData(getStoredView());
log.info("Removed synonym mappings for: {}", childId);
}
}
/**
* Custom SynonymMap.Parser implementation that provides synonym
* mappings from the managed JSON in this class during SynonymMap
* building.
*/
private class ManagedSynonymParser extends SynonymMap.Parser {
SynonymManager synonymManager;
public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) {
super(dedup, analyzer);
this.synonymManager = synonymManager;
}
/**
* Add the managed synonyms and their mappings into the SynonymMap builder.
*/
@Override
public void parse(Reader in) throws IOException, ParseException {
boolean ignoreCase = synonymManager.getIgnoreCase();
for (CasePreservedSynonymMappings cpsm : synonymManager.synonymMappings.values()) {
for (String term : cpsm.mappings.keySet()) {
for (String mapping : cpsm.mappings.get(term)) {
// apply the case setting to match the behavior of the SynonymMap builder
CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder());
CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder());
add(casedTerm, casedMapping, false);
}
}
}
}
}
protected SynonymGraphFilterFactory delegate;
public ManagedSynonymGraphFilterFactory(Map<String,String> args) {
super(args);
}
@Override
public String getResourceId() {
return "/schema/analysis/synonyms/"+handle;
}
protected Class<? extends ManagedResource> getManagedResourceImplClass() {
return SynonymManager.class;
}
/**
* Called once, during core initialization, to initialize any analysis components
* that depend on the data managed by this resource. It is important that the
* analysis component is only initialized once during core initialization so that
* text analysis is consistent, especially in a distributed environment, as we
* don't want one server applying a different set of stop words than other servers.
*/
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res)
throws SolrException
{
NamedList<Object> args = (NamedList<Object>)initArgs;
args.add("synonyms", getResourceId());
args.add("expand", "false");
args.add("format", "solr");
Map<String,String> filtArgs = new HashMap<>();
for (Map.Entry<String,?> entry : args) {
filtArgs.put(entry.getKey(), entry.getValue().toString());
}
// create the actual filter factory that pulls the synonym mappings
// from synonymMappings using a custom parser implementation
delegate = new SynonymGraphFilterFactory(filtArgs) {
@Override
protected SynonymMap loadSynonyms
(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
throws IOException, ParseException {
ManagedSynonymParser parser =
new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
// null is safe here because there's no actual parsing done against a input Reader
parser.parse(null);
return parser.build();
}
};
try {
delegate.inform(res.getResourceLoader());
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
@Override
public TokenStream create(TokenStream input) {
if (delegate == null)
throw new IllegalStateException(this.getClass().getName()+
" not initialized correctly! The SynonymFilterFactory delegate was not initialized.");
return delegate.create(input);
}
}

View File

@ -23,10 +23,10 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.solr.legacy.LegacyFieldType;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource;
import org.apache.lucene.spatial.bbox.BBoxStrategy;
import org.apache.solr.legacy.BBoxStrategy;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeAreaValueSource;
import org.apache.solr.common.SolrException;

View File

@ -35,11 +35,11 @@ import javax.xml.xpath.XPathFactory;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.legacy.LegacyIntField;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyFieldType;
import org.apache.solr.legacy.LegacyIntField;
import org.apache.solr.legacy.LegacyNumericRangeQuery;
import org.apache.solr.legacy.LegacyNumericType;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
import org.apache.lucene.search.ConstantScoreQuery;

View File

@ -373,7 +373,7 @@ public class IndexSchema {
void persist(Writer writer) throws IOException {
final SolrQueryResponse response = new SolrQueryResponse();
response.add(IndexSchema.SCHEMA, getNamedPropertyValues());
final NamedList args = new NamedList(Arrays.<Object>asList("indent", "on"));
final SolrParams args = (new ModifiableSolrParams()).set("indent", "on");
final LocalSolrQueryRequest req = new LocalSolrQueryRequest(null, args);
final SchemaXmlWriter schemaXmlWriter = new SchemaXmlWriter(writer, req, response);
schemaXmlWriter.setEmitManagedSchemaDoNotEditWarning(true);

View File

@ -20,8 +20,8 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.spatial.vector.PointVectorStrategy;
import org.apache.solr.legacy.LegacyFieldType;
import org.apache.solr.legacy.PointVectorStrategy;
/**
* @see PointVectorStrategy

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.DoubleDocValues;

View File

@ -30,14 +30,14 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.legacy.LegacyDoubleField;
import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.legacy.LegacyFloatField;
import org.apache.lucene.legacy.LegacyIntField;
import org.apache.lucene.legacy.LegacyLongField;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyDoubleField;
import org.apache.solr.legacy.LegacyFieldType;
import org.apache.solr.legacy.LegacyFloatField;
import org.apache.solr.legacy.LegacyIntField;
import org.apache.solr.legacy.LegacyLongField;
import org.apache.solr.legacy.LegacyNumericRangeQuery;
import org.apache.solr.legacy.LegacyNumericType;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
@ -63,9 +63,9 @@ import org.slf4j.LoggerFactory;
/**
* Provides field types to support for Lucene's {@link
* org.apache.lucene.legacy.LegacyIntField}, {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} and
* {@link org.apache.lucene.legacy.LegacyDoubleField}.
* See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details.
* org.apache.solr.legacy.LegacyIntField}, {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} and
* {@link org.apache.solr.legacy.LegacyDoubleField}.
* See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details.
* It supports integer, float, long, double and date types.
* <p>
* For each number being added to this field, multiple terms are generated as per the algorithm described in the above
@ -78,7 +78,7 @@ import org.slf4j.LoggerFactory;
* generated, range search will be no faster than any other number field, but sorting will still be possible.
*
*
* @see org.apache.lucene.legacy.LegacyNumericRangeQuery
* @see org.apache.solr.legacy.LegacyNumericRangeQuery
* @since solr 1.4
*/
public class TrieField extends NumericFieldType {

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.IntDocValues;

View File

@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.docvalues.LongDocValues;

View File

@ -17,8 +17,8 @@
package org.apache.solr.search;
import org.apache.lucene.search.Query;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericRangeQuery;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
@ -26,10 +26,10 @@ import org.apache.lucene.queryparser.xml.builders.PointRangeQueryBuilder;
import org.w3c.dom.Element;
/**
* Creates a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The table below specifies the required
* Creates a {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The table below specifies the required
* attributes and the defaults if optional attributes are omitted. For more
* detail on what each of the attributes actually do, consult the documentation
* for {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}:
* for {@link org.apache.solr.legacy.LegacyNumericRangeQuery}:
* <table summary="supported attributes">
* <tr>
* <th>Attribute name</th>

View File

@ -17,7 +17,7 @@
package org.apache.solr.search;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericRangeQuery;
import org.apache.solr.legacy.LegacyNumericRangeQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;

View File

@ -34,7 +34,7 @@ import org.apache.lucene.util.Bits;
* Constrains search results to only match those which also match a provided
* query.
*
* <p> This could be used, for example, with a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} on a suitably
* <p> This could be used, for example, with a {@link org.apache.solr.legacy.LegacyNumericRangeQuery} on a suitably
* formatted date field to implement date filtering. One could re-use a single
* CachingWrapperFilter(QueryWrapperFilter) that matches, e.g., only documents modified
* within the last week. This would only need to be reconstructed once per day.

View File

@ -52,7 +52,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
*/
@Override
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
SortSpec ss = rb.getSortSpec();
SortSpec groupSortSpec = rb.getGroupingSpec().getGroupSortSpec();
Sort groupSort = rb.getGroupingSpec().getGroupSort();
final String[] fields = rb.getGroupingSpec().getFields();
Sort withinGroupSort = rb.getGroupingSpec().getSortWithinGroup();
@ -144,7 +144,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor
rb.firstPhaseElapsedTime = maxElapsedTime;
for (String groupField : commandSearchGroups.keySet()) {
List<Collection<SearchGroup<BytesRef>>> topGroups = commandSearchGroups.get(groupField);
Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(topGroups, ss.getOffset(), ss.getCount(), groupSort);
Collection<SearchGroup<BytesRef>> mergedTopGroups = SearchGroup.merge(topGroups, groupSortSpec.getOffset(), groupSortSpec.getCount(), groupSort);
if (mergedTopGroups == null) {
continue;
}

View File

@ -25,7 +25,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;

View File

@ -16,7 +16,7 @@
*/
package org.apache.solr.search.mlt;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;

View File

@ -27,7 +27,7 @@ import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -159,8 +159,8 @@ public interface FieldCache {
};
/**
* A parser instance for int values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.legacy.LegacyIntField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* A parser instance for int values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.solr.legacy.LegacyIntField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #INT_POINT_PARSER} instead.
*/
@Deprecated
@ -182,8 +182,8 @@ public interface FieldCache {
};
/**
* A parser instance for float values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.legacy.LegacyFloatField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* A parser instance for float values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.solr.legacy.LegacyFloatField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead.
*/
@Deprecated
@ -207,8 +207,8 @@ public interface FieldCache {
};
/**
* A parser instance for long values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.legacy.LegacyLongField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* A parser instance for long values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.solr.legacy.LegacyLongField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead.
*/
@Deprecated
@ -229,8 +229,8 @@ public interface FieldCache {
};
/**
* A parser instance for double values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.lucene.legacy.LegacyDoubleField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
* A parser instance for double values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed
* via {@link org.apache.solr.legacy.LegacyDoubleField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead.
*/
@Deprecated
@ -277,7 +277,7 @@ public interface FieldCache {
* @param parser
* Computes long for string values. May be {@code null} if the
* requested field was indexed as {@link NumericDocValuesField} or
* {@link org.apache.lucene.legacy.LegacyLongField}.
* {@link org.apache.solr.legacy.LegacyLongField}.
* @return The values in the given field for each document.
* @throws IOException
* If any error occurs.

View File

@ -87,7 +87,7 @@ public class UninvertingReader extends FilterLeafReader {
*/
DOUBLE_POINT,
/**
* Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
* Single-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
@ -96,7 +96,7 @@ public class UninvertingReader extends FilterLeafReader {
@Deprecated
LEGACY_INTEGER,
/**
* Single-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
* Single-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
@ -105,7 +105,7 @@ public class UninvertingReader extends FilterLeafReader {
@Deprecated
LEGACY_LONG,
/**
* Single-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
* Single-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
@ -114,7 +114,7 @@ public class UninvertingReader extends FilterLeafReader {
@Deprecated
LEGACY_FLOAT,
/**
* Single-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
* Single-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
@ -144,28 +144,28 @@ public class UninvertingReader extends FilterLeafReader {
*/
SORTED_SET_BINARY,
/**
* Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField})
* Multi-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_INTEGER,
/**
* Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField})
* Multi-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_FLOAT,
/**
* Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField})
* Multi-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_LONG,
/**
* Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField})
* Multi-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.

Some files were not shown because too many files have changed in this diff Show More