diff --git a/dev-tools/scripts/checkJavaDocs.py b/dev-tools/scripts/checkJavaDocs.py index ae2b440da82..355bbdd2b4f 100644 --- a/dev-tools/scripts/checkJavaDocs.py +++ b/dev-tools/scripts/checkJavaDocs.py @@ -296,7 +296,7 @@ def checkSummary(fullPath): print() print(fullPath) printed = True - print(' missing: %s' % unescapeHTML(lastHREF)) + print(' missing description: %s' % unescapeHTML(lastHREF)) anyMissing = True elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1: if not printed: diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 879d754c038..0c1d3519a9f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -57,6 +57,8 @@ API Changes instead, which derived from the UH. WholeBreakIterator and CustomSeparatorBreakIterator were moved to UH's package. (David Smiley) +* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand) + Bug Fixes * LUCENE-7626: IndexWriter will no longer accept broken token offsets @@ -88,6 +90,10 @@ Optimizations values using different numbers of bits per value if this proves to save storage. (Adrien Grand) +* LUCENE-7845: Enhance spatial-extras RecursivePrefixTreeStrategy queries when the + query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When + the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley) + Other * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize) @@ -99,6 +105,8 @@ Other * LUCENE-7753: Make fields static when possible. (Daniel Jelinski via Adrien Grand) +* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi) + ======================= Lucene 6.7.0 ======================= Other @@ -107,6 +115,10 @@ Other from methods that don't declare them ("sneaky throw" hack). (Robert Muir, Uwe Schindler, Dawid Weiss) +Improvements + +* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss) + ======================= Lucene 6.6.0 ======================= New Features diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index c7936a4bd7a..89b2d7623a4 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -74,3 +74,9 @@ collecting TopDocs for each group, but instead takes a GroupReducer that will perform any type of reduction on the top groups collected on a first-pass. To reproduce the old behaviour of SecondPassGroupingCollector, you should instead use TopGroupsCollector. + +## Removed legacy numerics (LUCENE-7850) + +Support for legacy numerics has been removed since legacy numerics had been +deprecated since Lucene 6.0. Points should be used instead, see +org.apache.lucene.index.PointValues for an introduction. diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java index 209ecee4961..eb08eeaa8c6 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java @@ -24,6 +24,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeFactory; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** * Emits the entire input as a single token. */ @@ -41,16 +43,16 @@ public final class KeywordTokenizer extends Tokenizer { } public KeywordTokenizer(int bufferSize) { - if (bufferSize <= 0) { - throw new IllegalArgumentException("bufferSize must be > 0"); + if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize); } termAtt.resizeBuffer(bufferSize); } public KeywordTokenizer(AttributeFactory factory, int bufferSize) { super(factory); - if (bufferSize <= 0) { - throw new IllegalArgumentException("bufferSize must be > 0"); + if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize); } termAtt.resizeBuffer(bufferSize); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java index 3654f67beab..86f65d60246 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java @@ -16,26 +16,39 @@ */ package org.apache.lucene.analysis.core; - import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; import java.util.Map; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** * Factory for {@link KeywordTokenizer}. *
  * <fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.KeywordTokenizerFactory"/>
+ *     <tokenizer class="solr.KeywordTokenizerFactory" maxTokenLen="256"/>
  *   </analyzer>
  * </fieldType>
+ * + * Options: + * */ public class KeywordTokenizerFactory extends TokenizerFactory { + private final int maxTokenLen; /** Creates a new KeywordTokenizerFactory */ public KeywordTokenizerFactory(Map args) { super(args); + maxTokenLen = getInt(args, "maxTokenLen", KeywordTokenizer.DEFAULT_BUFFER_SIZE); + if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen); + } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -43,6 +56,6 @@ public class KeywordTokenizerFactory extends TokenizerFactory { @Override public KeywordTokenizer create(AttributeFactory factory) { - return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE); + return new KeywordTokenizer(factory, maxTokenLen); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java index df41b3777cb..8fb7d0e3f14 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java @@ -50,6 +50,20 @@ public class LetterTokenizer extends CharTokenizer { super(factory); } + /** + * Construct a new LetterTokenizer using a given + * {@link org.apache.lucene.util.AttributeFactory}. + * + * @param factory the attribute factory to use for this {@link Tokenizer} + * @param maxTokenLen maximum token length the tokenizer will emit. + * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024) + * @throws IllegalArgumentException if maxTokenLen is invalid. + + */ + public LetterTokenizer(AttributeFactory factory, int maxTokenLen) { + super(factory, maxTokenLen); + } + /** Collects only characters which satisfy * {@link Character#isLetter(int)}.*/ @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java index 828d6cf3fed..41ada68ba52 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java @@ -17,25 +17,40 @@ package org.apache.lucene.analysis.core; +import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; import java.util.Map; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** * Factory for {@link LetterTokenizer}. *
  * <fieldType name="text_letter" class="solr.TextField" positionIncrementGap="100">
  *   <analyzer>
- *     <tokenizer class="solr.LetterTokenizerFactory"/>
+ *     <tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="256"/>
  *   </analyzer>
  * </fieldType>
+ * + * Options: + * */ public class LetterTokenizerFactory extends TokenizerFactory { + private final int maxTokenLen; /** Creates a new LetterTokenizerFactory */ public LetterTokenizerFactory(Map args) { super(args); + maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN); + if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen); + } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -43,6 +58,6 @@ public class LetterTokenizerFactory extends TokenizerFactory { @Override public LetterTokenizer create(AttributeFactory factory) { - return new LetterTokenizer(factory); + return new LetterTokenizer(factory, maxTokenLen); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java index 982d356533e..26b8747962b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java @@ -50,6 +50,19 @@ public final class LowerCaseTokenizer extends LetterTokenizer { super(factory); } + /** + * Construct a new LowerCaseTokenizer using a given + * {@link org.apache.lucene.util.AttributeFactory}. + * + * @param factory the attribute factory to use for this {@link Tokenizer} + * @param maxTokenLen maximum token length the tokenizer will emit. + * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024) + * @throws IllegalArgumentException if maxTokenLen is invalid. + */ + public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) { + super(factory, maxTokenLen); + } + /** Converts char to lower case * {@link Character#toLowerCase(int)}.*/ @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java index 3e29161a923..a3e06c7a608 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java @@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core; import org.apache.lucene.analysis.util.AbstractAnalysisFactory; +import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.MultiTermAwareComponent; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; @@ -25,20 +26,36 @@ import org.apache.lucene.util.AttributeFactory; import java.util.HashMap; import java.util.Map; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** - * Factory for {@link LowerCaseTokenizer}. + * Factory for {@link LowerCaseTokenizer}. *
  * <fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100">
- *   <analyzer>
- *     <tokenizer class="solr.LowerCaseTokenizerFactory"/>
- *   </analyzer>
+ * <analyzer>
+ * <tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/>
+ * </analyzer>
  * </fieldType>
+ *

+ * Options: + *

*/ public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent { - - /** Creates a new LowerCaseTokenizerFactory */ - public LowerCaseTokenizerFactory(Map args) { + private final int maxTokenLen; + + /** + * Creates a new LowerCaseTokenizerFactory + */ + public LowerCaseTokenizerFactory(Map args) { super(args); + maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN); + if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen); + } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -46,11 +63,13 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi @Override public LowerCaseTokenizer create(AttributeFactory factory) { - return new LowerCaseTokenizer(factory); + return new LowerCaseTokenizer(factory, maxTokenLen); } @Override public AbstractAnalysisFactory getMultiTermComponent() { - return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs())); + Map map = new HashMap<>(getOriginalArgs()); + map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init + return new LowerCaseFilterFactory(map); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java index 37e9d2b8a22..b6b8b609863 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java @@ -58,7 +58,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; *
    *
  • wordset - This is the default format, which supports one word per * line (including any intra-word whitespace) and allows whole line comments - * begining with the "#" character. Blank lines are ignored. See + * beginning with the "#" character. Blank lines are ignored. See * {@link WordlistLoader#getLines WordlistLoader.getLines} for details. *
  • *
  • snowball - This format allows for multiple words specified on each diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java index 5e4313f6c51..00c181f1262 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java @@ -47,6 +47,19 @@ public final class UnicodeWhitespaceTokenizer extends CharTokenizer { public UnicodeWhitespaceTokenizer(AttributeFactory factory) { super(factory); } + + /** + * Construct a new UnicodeWhitespaceTokenizer using a given + * {@link org.apache.lucene.util.AttributeFactory}. + * + * @param factory the attribute factory to use for this {@link Tokenizer} + * @param maxTokenLen maximum token length the tokenizer will emit. + * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024) + * @throws IllegalArgumentException if maxTokenLen is invalid. + */ + public UnicodeWhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) { + super(factory, maxTokenLen); + } /** Collects only characters which do not satisfy Unicode's WHITESPACE property. */ @Override diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java index 70f2d620bbd..065522761d0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java @@ -46,6 +46,19 @@ public final class WhitespaceTokenizer extends CharTokenizer { public WhitespaceTokenizer(AttributeFactory factory) { super(factory); } + + /** + * Construct a new WhitespaceTokenizer using a given + * {@link org.apache.lucene.util.AttributeFactory}. + * + * @param factory the attribute factory to use for this {@link Tokenizer} + * @param maxTokenLen maximum token length the tokenizer will emit. + * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024) + * @throws IllegalArgumentException if maxTokenLen is invalid. + */ + public WhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) { + super(factory, maxTokenLen); + } /** Collects only characters which do not satisfy * {@link Character#isWhitespace(int)}.*/ diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java index fd38b632adc..29e9ed519fa 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java @@ -22,15 +22,18 @@ import java.util.Collection; import java.util.Map; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** * Factory for {@link WhitespaceTokenizer}. *
      * <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
      *   <analyzer>
    - *     <tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"/>
    + *     <tokenizer class="solr.WhitespaceTokenizerFactory" rule="unicode"  maxTokenLen="256"/>
      *   </analyzer>
      * </fieldType>
    * @@ -38,6 +41,9 @@ import org.apache.lucene.util.AttributeFactory; *
      *
    • rule: either "java" for {@link WhitespaceTokenizer} * or "unicode" for {@link UnicodeWhitespaceTokenizer}
    • + *
    • maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024). + * It is rare to need to change this + * else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN
    • *
    */ public class WhitespaceTokenizerFactory extends TokenizerFactory { @@ -46,13 +52,17 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { private static final Collection RULE_NAMES = Arrays.asList(RULE_JAVA, RULE_UNICODE); private final String rule; + private final int maxTokenLen; /** Creates a new WhitespaceTokenizerFactory */ public WhitespaceTokenizerFactory(Map args) { super(args); rule = get(args, "rule", RULE_NAMES, RULE_JAVA); - + maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN); + if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen); + } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } @@ -62,9 +72,9 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory { public Tokenizer create(AttributeFactory factory) { switch (rule) { case RULE_JAVA: - return new WhitespaceTokenizer(factory); + return new WhitespaceTokenizer(factory, maxTokenLen); case RULE_UNICODE: - return new UnicodeWhitespaceTokenizer(factory); + return new UnicodeWhitespaceTokenizer(factory, maxTokenLen); default: throw new AssertionError(); } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java index 13289bee1bd..ff9d6ff93c1 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java @@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.util.AttributeFactory; +import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT; + /** * An abstract base class for simple, character-oriented tokenizers. *

    @@ -50,6 +52,7 @@ public abstract class CharTokenizer extends Tokenizer { * Creates a new {@link CharTokenizer} instance */ public CharTokenizer() { + this.maxTokenLen = DEFAULT_MAX_WORD_LEN; } /** @@ -60,6 +63,23 @@ public abstract class CharTokenizer extends Tokenizer { */ public CharTokenizer(AttributeFactory factory) { super(factory); + this.maxTokenLen = DEFAULT_MAX_WORD_LEN; + } + + /** + * Creates a new {@link CharTokenizer} instance + * + * @param factory the attribute factory to use for this {@link Tokenizer} + * @param maxTokenLen maximum token length the tokenizer will emit. + * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024) + * @throws IllegalArgumentException if maxTokenLen is invalid. + */ + public CharTokenizer(AttributeFactory factory, int maxTokenLen) { + super(factory); + if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) { + throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen); + } + this.maxTokenLen = maxTokenLen; } /** @@ -193,9 +213,10 @@ public abstract class CharTokenizer extends Tokenizer { } private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0; - private static final int MAX_WORD_LEN = 255; + public static final int DEFAULT_MAX_WORD_LEN = 255; private static final int IO_BUFFER_SIZE = 4096; - + private final int maxTokenLen; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @@ -256,7 +277,7 @@ public abstract class CharTokenizer extends Tokenizer { } end += charCount; length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized - if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test + if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test break; } } else if (length > 0) { // at non-Letter w/ chars diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java index 75070d10700..00ee311a4b0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java @@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet; /** * This file contains unicode properties used by various {@link CharTokenizer}s. - * The data was created using ICU4J v56.1.0.0 + * The data was created using ICU4J v59.1.0.0 *

    - * Unicode version: 8.0.0.0 + * Unicode version: 9.0.0.0 */ public final class UnicodeProps { private UnicodeProps() {} /** Unicode version that was used to generate this file: {@value} */ - public static final String UNICODE_VERSION = "8.0.0.0"; + public static final String UNICODE_VERSION = "9.0.0.0"; /** Bitset with Unicode WHITESPACE code points. */ public static final Bits WHITESPACE = createBits( diff --git a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd index daca530737f..f413afc2f9a 100644 --- a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd +++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd @@ -53,7 +53,7 @@ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd index 15bb8ca60ed..fb3db16cf67 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd @@ -54,7 +54,7 @@ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java new file mode 100644 index 00000000000..3f03a008c01 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.core; + +import java.io.IOException; +import java.io.StringReader; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.util.AttributeFactory; + +public class TestKeywordTokenizer extends BaseTokenStreamTestCase { + + public void testSimple() throws IOException { + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + KeywordTokenizer tokenizer = new KeywordTokenizer(); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"Tokenizer \ud801\udc1ctest"}); + } + + public void testFactory() { + Map args = new HashMap<>(); + KeywordTokenizerFactory factory = new KeywordTokenizerFactory(args); + AttributeFactory attributeFactory = newAttributeFactory(); + Tokenizer tokenizer = factory.create(attributeFactory); + assertEquals(KeywordTokenizer.class, tokenizer.getClass()); + } + + private Map makeArgs(String... args) { + Map ret = new HashMap<>(); + for (int idx = 0; idx < args.length; idx += 2) { + ret.put(args[idx], args[idx + 1]); + } + return ret; + } + + public void testParamsFactory() throws IOException { + // negative maxTokenLen + IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> + new KeywordTokenizerFactory(makeArgs("maxTokenLen", "-1"))); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage()); + + // zero maxTokenLen + iae = expectThrows(IllegalArgumentException.class, () -> + new KeywordTokenizerFactory(makeArgs("maxTokenLen", "0"))); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage()); + + // Added random param, should throw illegal error + iae = expectThrows(IllegalArgumentException.class, () -> + new KeywordTokenizerFactory(makeArgs("maxTokenLen", "255", "randomParam", "rValue"))); + assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage()); + + // tokeniser will never split, no matter what is passed, + // but the buffer will not be more than length of the token + + KeywordTokenizerFactory factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "5")); + AttributeFactory attributeFactory = newAttributeFactory(); + Tokenizer tokenizer = factory.create(attributeFactory); + StringReader reader = new StringReader("Tokenizertest"); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"Tokenizertest"}); + + // tokeniser will never split, no matter what is passed, + // but the buffer will not be more than length of the token + factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "2")); + attributeFactory = newAttributeFactory(); + tokenizer = factory.create(attributeFactory); + reader = new StringReader("Tokenizer\u00A0test"); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"Tokenizer\u00A0test"}); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java index acdb670f7ea..16089e9eda9 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java @@ -54,4 +54,55 @@ public class TestUnicodeWhitespaceTokenizer extends BaseTokenStreamTestCase { assertEquals(UnicodeWhitespaceTokenizer.class, tokenizer.getClass()); } + private Map makeArgs(String... args) { + Map ret = new HashMap<>(); + for (int idx = 0; idx < args.length; idx += 2) { + ret.put(args[idx], args[idx + 1]); + } + return ret; + } + + public void testParamsFactory() throws IOException { + + + // negative maxTokenLen + IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> + new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "-1"))); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage()); + + // zero maxTokenLen + iae = expectThrows(IllegalArgumentException.class, () -> + new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "0"))); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage()); + + // Added random param, should throw illegal error + iae = expectThrows(IllegalArgumentException.class, () -> + new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "255", "randomParam", "rValue"))); + assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage()); + + // tokeniser will split at 5, Token | izer, no matter what happens + WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "5")); + AttributeFactory attributeFactory = newAttributeFactory(); + Tokenizer tokenizer = factory.create(attributeFactory); + StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest"); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"Token", "izer", "\ud801\udc1ctes", "t"}); + + // tokeniser will split at 2, To | ke | ni | ze | r, no matter what happens + factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "2")); + attributeFactory = newAttributeFactory(); + tokenizer = factory.create(attributeFactory); + reader = new StringReader("Tokenizer\u00A0test"); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"To", "ke", "ni", "ze", "r", "te", "st"}); + + // tokeniser will split at 10, no matter what happens, + // but tokens' length are less than that + factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "10")); + attributeFactory = newAttributeFactory(); + tokenizer = factory.create(attributeFactory); + reader = new StringReader("Tokenizer\u00A0test"); + tokenizer.setReader(reader); + assertTokenStreamContents(tokenizer, new String[]{"Tokenizer", "test"}); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index 783fc3e4b51..4596608b747 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.LowerCaseTokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.util.TestUtil; @@ -89,6 +91,99 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)}); } + + /* + * tests the max word length passed as parameter - tokenizer will split at the passed position char no matter what happens + */ + public void testCustomMaxTokenLength() throws IOException { + + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < 100; i++) { + builder.append("A"); + } + Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100); + // Tricky, passing two copies of the string to the reader.... + tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT), + builder.toString().toLowerCase(Locale.ROOT) }); + + Exception e = expectThrows(IllegalArgumentException.class, () -> + new LowerCaseTokenizer(newAttributeFactory(), -1)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage()); + + tokenizer = new LetterTokenizer(newAttributeFactory(), 100); + tokenizer.setReader(new StringReader(builder.toString() + builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString(), builder.toString()}); + + + // Let's test that we can get a token longer than 255 through. + builder.setLength(0); + for (int i = 0; i < 500; i++) { + builder.append("Z"); + } + tokenizer = new LetterTokenizer(newAttributeFactory(), 500); + tokenizer.setReader(new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString()}); + + + // Just to be sure what is happening here, token lengths of zero make no sense, + // Let's try the edge cases, token > I/O buffer (4096) + builder.setLength(0); + for (int i = 0; i < 600; i++) { + builder.append("aUrOkIjq"); // 600 * 8 = 4800 chars. + } + + e = expectThrows(IllegalArgumentException.class, () -> + new LowerCaseTokenizer(newAttributeFactory(), 0)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> + new LowerCaseTokenizer(newAttributeFactory(), 10_000_000)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage()); + + tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800); + tokenizer.setReader(new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)}); + + + e = expectThrows(IllegalArgumentException.class, () -> + new KeywordTokenizer(newAttributeFactory(), 0)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> + new KeywordTokenizer(newAttributeFactory(), 10_000_000)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage()); + + + tokenizer = new KeywordTokenizer(newAttributeFactory(), 4800); + tokenizer.setReader(new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString()}); + + e = expectThrows(IllegalArgumentException.class, () -> + new LetterTokenizer(newAttributeFactory(), 0)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> + new LetterTokenizer(newAttributeFactory(), 2_000_000)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 2000000", e.getMessage()); + + tokenizer = new LetterTokenizer(newAttributeFactory(), 4800); + tokenizer.setReader(new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString()}); + + e = expectThrows(IllegalArgumentException.class, () -> + new WhitespaceTokenizer(newAttributeFactory(), 0)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> + new WhitespaceTokenizer(newAttributeFactory(), 3_000_000)); + assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 3000000", e.getMessage()); + + tokenizer = new WhitespaceTokenizer(newAttributeFactory(), 4800); + tokenizer.setReader(new StringReader(builder.toString())); + assertTokenStreamContents(tokenizer, new String[]{builder.toString()}); + + } /* * tests the max word length of 255 with a surrogate pair at position 255 diff --git a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt index 3772daf1aeb..eb5b78e0ea2 100644 --- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt @@ -168,11 +168,14 @@ FFE3> 1134D> 11366..1136C> 11370..11374> +11442> +11446> 114C2..114C3> 115BF..115C0> 1163F> 116B6..116B7> 1172B> +11C3F> 16AF0..16AF4> 16F8F..16F9F> 1D167..1D169> @@ -181,6 +184,8 @@ FFE3> 1D185..1D18B> 1D1AA..1D1AD> 1E8D0..1E8D6> +1E944..1E946> +1E948..1E94A> # Latin script "composed" that do not further decompose, so decompose here # These are from AsciiFoldingFilter diff --git a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt index 62e6aefdf1c..fb8cf1ac66b 100644 --- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt +++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt @@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 112F7>0037 # KHUDAWADI DIGIT SEVEN 112F8>0038 # KHUDAWADI DIGIT EIGHT 112F9>0039 # KHUDAWADI DIGIT NINE +11450>0030 # NEWA DIGIT ZERO +11451>0031 # NEWA DIGIT ONE +11452>0032 # NEWA DIGIT TWO +11453>0033 # NEWA DIGIT THREE +11454>0034 # NEWA DIGIT FOUR +11455>0035 # NEWA DIGIT FIVE +11456>0036 # NEWA DIGIT SIX +11457>0037 # NEWA DIGIT SEVEN +11458>0038 # NEWA DIGIT EIGHT +11459>0039 # NEWA DIGIT NINE 114D0>0030 # TIRHUTA DIGIT ZERO 114D1>0031 # TIRHUTA DIGIT ONE 114D2>0032 # TIRHUTA DIGIT TWO @@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 118E7>0037 # WARANG CITI DIGIT SEVEN 118E8>0038 # WARANG CITI DIGIT EIGHT 118E9>0039 # WARANG CITI DIGIT NINE +11C50>0030 # BHAIKSUKI DIGIT ZERO +11C51>0031 # BHAIKSUKI DIGIT ONE +11C52>0032 # BHAIKSUKI DIGIT TWO +11C53>0033 # BHAIKSUKI DIGIT THREE +11C54>0034 # BHAIKSUKI DIGIT FOUR +11C55>0035 # BHAIKSUKI DIGIT FIVE +11C56>0036 # BHAIKSUKI DIGIT SIX +11C57>0037 # BHAIKSUKI DIGIT SEVEN +11C58>0038 # BHAIKSUKI DIGIT EIGHT +11C59>0039 # BHAIKSUKI DIGIT NINE 16A60>0030 # MRO DIGIT ZERO 16A61>0031 # MRO DIGIT ONE 16A62>0032 # MRO DIGIT TWO @@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE 16B57>0037 # PAHAWH HMONG DIGIT SEVEN 16B58>0038 # PAHAWH HMONG DIGIT EIGHT 16B59>0039 # PAHAWH HMONG DIGIT NINE +1E950>0030 # ADLAM DIGIT ZERO +1E951>0031 # ADLAM DIGIT ONE +1E952>0032 # ADLAM DIGIT TWO +1E953>0033 # ADLAM DIGIT THREE +1E954>0034 # ADLAM DIGIT FOUR +1E955>0035 # ADLAM DIGIT FIVE +1E956>0036 # ADLAM DIGIT SIX +1E957>0037 # ADLAM DIGIT SEVEN +1E958>0038 # ADLAM DIGIT EIGHT +1E959>0039 # ADLAM DIGIT NINE diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt index 5b7374f2cd5..5f9b1821760 100644 --- a/lucene/analysis/icu/src/data/utr30/nfc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2014, International Business Machines +# Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfc.txt @@ -7,7 +7,7 @@ # # Complete data for Unicode NFC normalization. -* Unicode 7.0.0 +* Unicode 9.0.0 # Canonical_Combining_Class (ccc) values 0300..0314:230 @@ -129,6 +129,8 @@ 0825..0827:230 0829..082D:230 0859..085B:220 +08D4..08E1:230 +08E3:220 08E4..08E5:230 08E6:220 08E7..08E8:230 @@ -232,6 +234,7 @@ 1DCF:220 1DD0:202 1DD1..1DF5:230 +1DFB:230 1DFC:233 1DFD:220 1DFE:230 @@ -260,7 +263,7 @@ 3099..309A:8 A66F:230 A674..A67D:230 -A69F:230 +A69E..A69F:230 A6F0..A6F1:230 A806:9 A8C4:9 @@ -280,6 +283,7 @@ ABED:9 FB1E:26 FE20..FE26:230 FE27..FE2D:220 +FE2E..FE2F:230 101FD:220 102E0:220 10376..1037A:230 @@ -299,6 +303,7 @@ FE27..FE2D:220 11133..11134:9 11173:7 111C0:9 +111CA:7 11235:9 11236:7 112E9:7 @@ -307,6 +312,8 @@ FE27..FE2D:220 1134D:9 11366..1136C:230 11370..11374:230 +11442:9 +11446:7 114C2:9 114C3:7 115BF:9 @@ -314,6 +321,8 @@ FE27..FE2D:220 1163F:9 116B6:9 116B7:7 +1172B:9 +11C3F:9 16AF0..16AF4:1 16B30..16B36:230 1BC9E:1 @@ -326,7 +335,14 @@ FE27..FE2D:220 1D18A..1D18B:220 1D1AA..1D1AD:230 1D242..1D244:230 +1E000..1E006:230 +1E008..1E018:230 +1E01B..1E021:230 +1E023..1E024:230 +1E026..1E02A:230 1E8D0..1E8D6:220 +1E944..1E949:230 +1E94A:7 # Canonical decomposition mappings 00C0>0041 0300 # one-way: diacritic 0300 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt index fea41298bc0..f51fa5db4b7 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt @@ -1,4 +1,4 @@ -# Copyright (C) 1999-2014, International Business Machines +# Copyright (C) 1999-2016, International Business Machines # Corporation and others. All Rights Reserved. # # file name: nfkc.txt @@ -11,7 +11,7 @@ # to NFKC one-way mappings. # Use this file as the second gennorm2 input file after nfc.txt. -* Unicode 7.0.0 +* Unicode 9.0.0 00A0>0020 00A8>0020 0308 @@ -3675,6 +3675,7 @@ FFEE>25CB 1F238>7533 1F239>5272 1F23A>55B6 +1F23B>914D 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt index b24b4b277fa..7f33df58c84 100644 --- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt +++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt @@ -1,5 +1,5 @@ # Unicode Character Database -# Copyright (c) 1991-2014 Unicode, Inc. +# Copyright (c) 1991-2016 Unicode, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # For documentation, see http://www.unicode.org/reports/tr44/ # @@ -12,7 +12,7 @@ # and reformatted into syntax for the gennorm2 Normalizer2 data generator tool. # Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt. -* Unicode 7.0.0 +* Unicode 9.0.0 0041>0061 0042>0062 @@ -632,8 +632,22 @@ 10CD>2D2D 10FC>10DC 115F..1160> +13F8>13F0 +13F9>13F1 +13FA>13F2 +13FB>13F3 +13FC>13F4 +13FD>13F5 17B4..17B5> 180B..180E> +1C80>0432 +1C81>0434 +1C82>043E +1C83>0441 +1C84..1C85>0442 +1C86>044A +1C87>0463 +1C88>A64B 1D2C>0061 1D2D>00E6 1D2E>0062 @@ -2382,14 +2396,99 @@ A7AA>0266 A7AB>025C A7AC>0261 A7AD>026C +A7AE>026A A7B0>029E A7B1>0287 +A7B2>029D +A7B3>AB53 +A7B4>A7B5 +A7B6>A7B7 A7F8>0127 A7F9>0153 AB5C>A727 AB5D>AB37 AB5E>026B AB5F>AB52 +AB70>13A0 +AB71>13A1 +AB72>13A2 +AB73>13A3 +AB74>13A4 +AB75>13A5 +AB76>13A6 +AB77>13A7 +AB78>13A8 +AB79>13A9 +AB7A>13AA +AB7B>13AB +AB7C>13AC +AB7D>13AD +AB7E>13AE +AB7F>13AF +AB80>13B0 +AB81>13B1 +AB82>13B2 +AB83>13B3 +AB84>13B4 +AB85>13B5 +AB86>13B6 +AB87>13B7 +AB88>13B8 +AB89>13B9 +AB8A>13BA +AB8B>13BB +AB8C>13BC +AB8D>13BD +AB8E>13BE +AB8F>13BF +AB90>13C0 +AB91>13C1 +AB92>13C2 +AB93>13C3 +AB94>13C4 +AB95>13C5 +AB96>13C6 +AB97>13C7 +AB98>13C8 +AB99>13C9 +AB9A>13CA +AB9B>13CB +AB9C>13CC +AB9D>13CD +AB9E>13CE +AB9F>13CF +ABA0>13D0 +ABA1>13D1 +ABA2>13D2 +ABA3>13D3 +ABA4>13D4 +ABA5>13D5 +ABA6>13D6 +ABA7>13D7 +ABA8>13D8 +ABA9>13D9 +ABAA>13DA +ABAB>13DB +ABAC>13DC +ABAD>13DD +ABAE>13DE +ABAF>13DF +ABB0>13E0 +ABB1>13E1 +ABB2>13E2 +ABB3>13E3 +ABB4>13E4 +ABB5>13E5 +ABB6>13E6 +ABB7>13E7 +ABB8>13E8 +ABB9>13E9 +ABBA>13EA +ABBB>13EB +ABBC>13EC +ABBD>13ED +ABBE>13EE +ABBF>13EF F900>8C48 F901>66F4 F902>8ECA @@ -3766,6 +3865,93 @@ FFF0..FFF8> 10425>1044D 10426>1044E 10427>1044F +104B0>104D8 +104B1>104D9 +104B2>104DA +104B3>104DB +104B4>104DC +104B5>104DD +104B6>104DE +104B7>104DF +104B8>104E0 +104B9>104E1 +104BA>104E2 +104BB>104E3 +104BC>104E4 +104BD>104E5 +104BE>104E6 +104BF>104E7 +104C0>104E8 +104C1>104E9 +104C2>104EA +104C3>104EB +104C4>104EC +104C5>104ED +104C6>104EE +104C7>104EF +104C8>104F0 +104C9>104F1 +104CA>104F2 +104CB>104F3 +104CC>104F4 +104CD>104F5 +104CE>104F6 +104CF>104F7 +104D0>104F8 +104D1>104F9 +104D2>104FA +104D3>104FB +10C80>10CC0 +10C81>10CC1 +10C82>10CC2 +10C83>10CC3 +10C84>10CC4 +10C85>10CC5 +10C86>10CC6 +10C87>10CC7 +10C88>10CC8 +10C89>10CC9 +10C8A>10CCA +10C8B>10CCB +10C8C>10CCC +10C8D>10CCD +10C8E>10CCE +10C8F>10CCF +10C90>10CD0 +10C91>10CD1 +10C92>10CD2 +10C93>10CD3 +10C94>10CD4 +10C95>10CD5 +10C96>10CD6 +10C97>10CD7 +10C98>10CD8 +10C99>10CD9 +10C9A>10CDA +10C9B>10CDB +10C9C>10CDC +10C9D>10CDD +10C9E>10CDE +10C9F>10CDF +10CA0>10CE0 +10CA1>10CE1 +10CA2>10CE2 +10CA3>10CE3 +10CA4>10CE4 +10CA5>10CE5 +10CA6>10CE6 +10CA7>10CE7 +10CA8>10CE8 +10CA9>10CE9 +10CAA>10CEA +10CAB>10CEB +10CAC>10CEC +10CAD>10CED +10CAE>10CEE +10CAF>10CEF +10CB0>10CF0 +10CB1>10CF1 +10CB2>10CF2 118A0>118C0 118A1>118C1 118A2>118C2 @@ -4803,6 +4989,40 @@ FFF0..FFF8> 1D7FD>0037 1D7FE>0038 1D7FF>0039 +1E900>1E922 +1E901>1E923 +1E902>1E924 +1E903>1E925 +1E904>1E926 +1E905>1E927 +1E906>1E928 +1E907>1E929 +1E908>1E92A +1E909>1E92B +1E90A>1E92C +1E90B>1E92D +1E90C>1E92E +1E90D>1E92F +1E90E>1E930 +1E90F>1E931 +1E910>1E932 +1E911>1E933 +1E912>1E934 +1E913>1E935 +1E914>1E936 +1E915>1E937 +1E916>1E938 +1E917>1E939 +1E918>1E93A +1E919>1E93B +1E91A>1E93C +1E91B>1E93D +1E91C>1E93E +1E91D>1E93F +1E91E>1E940 +1E91F>1E941 +1E920>1E942 +1E921>1E943 1EE00>0627 1EE01>0628 1EE02>062C @@ -5067,6 +5287,7 @@ FFF0..FFF8> 1F238>7533 1F239>5272 1F23A>55B6 +1F23B>914D 1F240>3014 672C 3015 1F241>3014 4E09 3015 1F242>3014 4E8C 3015 diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk index 5b8479751d5..c94a023c2ce 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk index 41b977b259a..c3357efa7ce 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk differ diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm index 2680264f181..1a16f3eb182 100644 Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java index 96f44d686b0..75481f1924c 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java @@ -53,7 +53,14 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase { new String[] { "我", "购买", "了", "道具", "和", "服装" } ); } - + + public void testTraditionalChinese() throws Exception { + assertAnalyzesTo(a, "我購買了道具和服裝。", + new String[] { "我", "購買", "了", "道具", "和", "服裝"}); + assertAnalyzesTo(a, "定義切分字串的基本單位是訂定分詞標準的首要工作", // From http://godel.iis.sinica.edu.tw/CKIP/paper/wordsegment_standard.pdf + new String[] { "定義", "切", "分", "字串", "的", "基本", "單位", "是", "訂定", "分詞", "標準", "的", "首要", "工作" }); + } + public void testChineseNumerics() throws Exception { assertAnalyzesTo(a, "9483", new String[] { "9483" }); assertAnalyzesTo(a, "院內分機9483。", diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java index 035a3a086b2..0f2bffecfb0 100644 --- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java +++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java @@ -63,7 +63,7 @@ import java.util.regex.Pattern; public class GenerateUTR30DataFiles { private static final String ICU_SVN_TAG_URL = "http://source.icu-project.org/repos/icu/icu/tags"; - private static final String ICU_RELEASE_TAG = "release-54-1"; + private static final String ICU_RELEASE_TAG = "release-58-1"; private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2"; private static final String NFC_TXT = "nfc.txt"; private static final String NFKC_TXT = "nfkc.txt"; diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java index 6955fe334fc..cd502fd8291 100644 --- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java @@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase { // ignored characters builder.add("\u0301", ""); builder.add("\u00AD", ""); + builder.add("ґ", "г"); + builder.add("Ґ", "Г"); NormalizeCharMap normMap = builder.build(); reader = new MappingCharFilter(normMap, reader); diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java index 15b247d5af7..e9a010212e6 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java @@ -52,10 +52,17 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase { public void testCapsTokenStream() throws Exception { Analyzer a = new UkrainianMorfologikAnalyzer(); assertAnalyzesTo(a, "Цих Чайковського і Ґете.", - new String[] { "Чайковське", "Чайковський", "Ґете" }); + new String[] { "Чайковське", "Чайковський", "Гете" }); a.close(); } + public void testCharNormalization() throws Exception { + Analyzer a = new UkrainianMorfologikAnalyzer(); + assertAnalyzesTo(a, "Ґюмрі та Гюмрі.", + new String[] { "Гюмрі", "Гюмрі" }); + a.close(); + } + public void testSampleSentence() throws Exception { Analyzer a = new UkrainianMorfologikAnalyzer(); assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.", diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index b4f50474f0a..a876b7de81f 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -60,10 +60,6 @@ import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericUtils; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; @@ -1114,9 +1110,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase { doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2)); doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2)); doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2)); - // add numeric fields, to test if flex preserves encoding - doc.add(new LegacyIntField("trieInt", id, Field.Store.NO)); - doc.add(new LegacyLongField("trieLong", (long) id, Field.Store.NO)); // add docvalues fields doc.add(new NumericDocValuesField("dvByte", (byte) id)); @@ -1294,51 +1287,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase { } } - public void testNumericFields() throws Exception { - for (String name : oldNames) { - - Directory dir = oldIndexDirs.get(name); - IndexReader reader = DirectoryReader.open(dir); - IndexSearcher searcher = newSearcher(reader); - - for (int id=10; id<15; id++) { - ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs; - assertEquals("wrong number of hits", 1, hits.length); - Document d = searcher.doc(hits[0].doc); - assertEquals(String.valueOf(id), d.get("id")); - - hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs; - assertEquals("wrong number of hits", 1, hits.length); - d = searcher.doc(hits[0].doc); - assertEquals(String.valueOf(id), d.get("id")); - } - - // check that also lower-precision fields are ok - ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs; - assertEquals("wrong number of hits", 34, hits.length); - - hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs; - assertEquals("wrong number of hits", 34, hits.length); - - // check decoding of terms - Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt"); - TermsEnum termsEnum = LegacyNumericUtils.filterPrefixCodedInts(terms.iterator()); - while (termsEnum.next() != null) { - int val = LegacyNumericUtils.prefixCodedToInt(termsEnum.term()); - assertTrue("value in id bounds", val >= 0 && val < 35); - } - - terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong"); - termsEnum = LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator()); - while (termsEnum.next() != null) { - long val = LegacyNumericUtils.prefixCodedToLong(termsEnum.term()); - assertTrue("value in id bounds", val >= 0L && val < 35L); - } - - reader.close(); - } - } - private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException { final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); if (VERBOSE) { diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index 7e0e7c72f58..3318c214e86 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4 /com.googlecode.juniversalchardet/juniversalchardet = 1.0.3 /com.googlecode.mp4parser/isoparser = 1.1.18 /com.healthmarketscience.jackcess/jackcess = 2.1.3 -/com.ibm.icu/icu4j = 56.1 +/com.ibm.icu/icu4j = 59.1 /com.pff/java-libpst = 0.8.1 com.sun.jersey.version = 1.9 @@ -276,7 +276,7 @@ org.slf4j.version = 1.7.7 /org.tukaani/xz = 1.5 /rome/rome = 1.0 -ua.net.nlp.morfologik-ukrainian-search.version = 3.7.5 +ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6 /ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version} /xerces/xercesImpl = 2.9.1 diff --git a/lucene/licenses/icu4j-56.1.jar.sha1 b/lucene/licenses/icu4j-56.1.jar.sha1 deleted file mode 100644 index 5f8e0466fde..00000000000 --- a/lucene/licenses/icu4j-56.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8dd6671f52165a0419e6de5e1016400875a90fa9 diff --git a/lucene/licenses/icu4j-59.1.jar.sha1 b/lucene/licenses/icu4j-59.1.jar.sha1 new file mode 100644 index 00000000000..f3f0018f053 --- /dev/null +++ b/lucene/licenses/icu4j-59.1.jar.sha1 @@ -0,0 +1 @@ +6f06e820cf4c8968bbbaae66ae0b33f6a256b57f diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1 deleted file mode 100644 index 8794e71fbe9..00000000000 --- a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2b8c8fbd740164d220ca7d18605b8b2092e163e9 diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1 new file mode 100644 index 00000000000..6f0b86c8290 --- /dev/null +++ b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1 @@ -0,0 +1 @@ +8d2c4bf006f59227bcba8885b4602b3a8b5bd799 diff --git a/lucene/spatial-extras/build.xml b/lucene/spatial-extras/build.xml index 2e425fda202..e9cc29cc224 100644 --- a/lucene/spatial-extras/build.xml +++ b/lucene/spatial-extras/build.xml @@ -31,9 +31,7 @@ - - diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java index 90e36d835db..7536b60bf6e 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java @@ -25,11 +25,6 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericType; -import org.apache.lucene.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -41,8 +36,6 @@ import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; import org.apache.lucene.spatial.util.DistanceToShapeValueSource; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.NumericUtils; import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.shape.Point; import org.locationtech.spatial4j.shape.Rectangle; @@ -88,8 +81,6 @@ public class BBoxStrategy extends SpatialStrategy { */ public static FieldType DEFAULT_FIELDTYPE; - @Deprecated - public static LegacyFieldType LEGACY_FIELDTYPE; static { // Default: pointValues + docValues FieldType type = new FieldType(); @@ -98,15 +89,6 @@ public class BBoxStrategy extends SpatialStrategy { type.setStored(false); type.freeze(); DEFAULT_FIELDTYPE = type; - // Legacy default: legacyNumerics + docValues - LegacyFieldType legacyType = new LegacyFieldType(); - legacyType.setIndexOptions(IndexOptions.DOCS); - legacyType.setNumericType(LegacyNumericType.DOUBLE); - legacyType.setNumericPrecisionStep(8);// same as solr default - legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues - legacyType.setStored(false); - legacyType.freeze(); - LEGACY_FIELDTYPE = legacyType; } public static final String SUFFIX_MINX = "__minX"; @@ -131,8 +113,6 @@ public class BBoxStrategy extends SpatialStrategy { private final boolean hasStored; private final boolean hasDocVals; private final boolean hasPointVals; - // equiv to "hasLegacyNumerics": - private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step. private final FieldType xdlFieldType; /** @@ -142,15 +122,6 @@ public class BBoxStrategy extends SpatialStrategy { return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE); } - /** - * Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility - * @deprecated LegacyNumerics will be removed - */ - @Deprecated - public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) { - return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE); - } - /** * Creates this strategy. * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL @@ -179,23 +150,8 @@ public class BBoxStrategy extends SpatialStrategy { if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { numQuads++; } - if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) { - if (hasPointVals) { - throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); - } - final LegacyFieldType legacyType = (LegacyFieldType) fieldType; - if (legacyType.numericType() != LegacyNumericType.DOUBLE) { - throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); - } - numQuads++; - legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); - legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); - legacyNumericFieldType.freeze(); - } else { - legacyNumericFieldType = null; - } - if (hasPointVals || legacyNumericFieldType != null) { // if we have an index... + if (hasPointVals) { // if we have an index... xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED); xdlFieldType.setIndexOptions(IndexOptions.DOCS); xdlFieldType.freeze(); @@ -242,12 +198,6 @@ public class BBoxStrategy extends SpatialStrategy { fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX()); fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY()); } - if (legacyNumericFieldType != null) { - fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType); - fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType); - fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType); - fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType); - } if (xdlFieldType != null) { fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType); } @@ -664,17 +614,12 @@ public class BBoxStrategy extends SpatialStrategy { private Query makeNumberTermQuery(String field, double number) { if (hasPointVals) { return DoublePoint.newExactQuery(field, number); - } else if (legacyNumericFieldType != null) { - BytesRefBuilder bytes = new BytesRefBuilder(); - LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes); - return new TermQuery(new Term(field, bytes.get())); } throw new UnsupportedOperationException("An index is required for this operation."); } /** * Returns a numeric range query based on FieldType - * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType} * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields * * @param fieldname field name. must not be null. @@ -702,8 +647,6 @@ public class BBoxStrategy extends SpatialStrategy { } return DoublePoint.newRangeQuery(fieldname, min, max); - } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0 - return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive); } throw new UnsupportedOperationException("An index is required for this operation."); } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java index 757e2bd38f7..ca38abf2400 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java @@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRefIterator; /** * A TokenStream used internally by {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}. * - * This is modelled after {@link org.apache.lucene.legacy.LegacyNumericTokenStream}. - * * @lucene.internal */ class BytesRefIteratorTokenStream extends TokenStream { diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java index c727c0da075..8367644e889 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java @@ -18,18 +18,17 @@ package org.apache.lucene.spatial.prefix; import java.io.IOException; import java.util.Arrays; -import java.util.Iterator; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; -import org.locationtech.spatial4j.shape.Point; -import org.locationtech.spatial4j.shape.Shape; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree; import org.apache.lucene.util.Bits; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Shape; import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape; @@ -57,9 +56,22 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy { } @Override - protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { - //levels doesn't actually matter; NumberRange based Shapes have their own "level". - return super.createCellIteratorToIndex(shape, grid.getMaxLevels(), reuse); + protected boolean isPointShape(Shape shape) { + if (shape instanceof NumberRangePrefixTree.UnitNRShape) { + return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() == grid.getMaxLevels(); + } else { + return false; + } + } + + @Override + protected boolean isGridAlignedShape(Shape shape) { + // any UnitNRShape other than the world is a single cell/term + if (shape instanceof NumberRangePrefixTree.UnitNRShape) { + return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() > 0; + } else { + return false; + } } /** Unsupported. */ diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java index e9f43fd43bd..43851c747b7 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java @@ -21,8 +21,6 @@ import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import org.locationtech.spatial4j.shape.Point; -import org.locationtech.spatial4j.shape.Shape; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; @@ -34,6 +32,10 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource; import org.apache.lucene.util.Bits; +import org.locationtech.spatial4j.shape.Circle; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Rectangle; +import org.locationtech.spatial4j.shape.Shape; /** * An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two @@ -163,7 +165,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { } protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { - if (pointsOnly && !(shape instanceof Point)) { + if (pointsOnly && !isPointShape(shape)) { throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing"); } return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator @@ -205,4 +207,16 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy { Shape inputShape, final int facetLevel, int maxCells) throws IOException { return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells); } + + protected boolean isPointShape(Shape shape) { + if (shape instanceof Point) { + return true; + } else if (shape instanceof Circle) { + return ((Circle) shape).getRadius() == 0.0; + } else if (shape instanceof Rectangle) { + Rectangle rect = (Rectangle) shape; + return rect.getWidth() == 0.0 && rect.getHeight() == 0.0; + } + return false; + } } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java index d3d16263b9c..7c792006b0c 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java @@ -20,9 +20,9 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.locationtech.spatial4j.shape.Point; -import org.locationtech.spatial4j.shape.Shape; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; import org.apache.lucene.spatial.prefix.tree.LegacyCell; @@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.query.SpatialOperation; import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; +import org.locationtech.spatial4j.shape.Shape; /** * A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeQuery}. @@ -121,7 +122,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { @Override protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) { - if (shape instanceof Point || !pruneLeafyBranches) + if (!pruneLeafyBranches || isGridAlignedShape(shape)) return super.createCellIteratorToIndex(shape, detailLevel, reuse); List cells = new ArrayList<>(4096); @@ -177,6 +178,9 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct)); if (op == SpatialOperation.Intersects) { + if (isGridAlignedShape(args.getShape())) { + return makeGridShapeIntersectsQuery(args.getShape()); + } return new IntersectsPrefixTreeQuery( shape, getFieldName(), grid, detailLevel, prefixGridScanLevel); } else if (op == SpatialOperation.IsWithin) { @@ -189,4 +193,35 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy { } throw new UnsupportedSpatialOperation(op); } + + /** + * A quick check of the shape to see if it is perfectly aligned to a grid. + * Points always are as they are indivisible. It's okay to return false + * if the shape actually is aligned; this is an optimization hint. + */ + protected boolean isGridAlignedShape(Shape shape) { + return isPointShape(shape); + } + + /** {@link #makeQuery(SpatialArgs)} specialized for the query being a grid square. */ + protected Query makeGridShapeIntersectsQuery(Shape gridShape) { + assert isGridAlignedShape(gridShape); + if (isPointsOnly()) { + // Awesome; this will be equivalent to a TermQuery. + Iterator cellIterator = grid.getTreeCellIterator(gridShape, grid.getMaxLevels()); + // get last cell + Cell cell = cellIterator.next(); + while (cellIterator.hasNext()) { + int prevLevel = cell.getLevel(); + cell = cellIterator.next(); + assert prevLevel < cell.getLevel(); + } + return new TermQuery(new Term(getFieldName(), cell.getTokenBytesWithLeaf(null))); + } else { + // Well there could be parent cells. But we can reduce the "scan level" which will be slower for a point query. + // TODO: AVPTQ will still scan the bottom nonetheless; file an issue to eliminate that + return new IntersectsPrefixTreeQuery( + gridShape, getFieldName(), grid, getGrid().getMaxLevels(), getGrid().getMaxLevels() + 1); + } + } } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java index 59aff490916..ef3eaa4b24e 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java @@ -22,11 +22,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericType; import org.apache.lucene.queries.function.FunctionRangeQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.BooleanClause; @@ -86,8 +81,6 @@ public class PointVectorStrategy extends SpatialStrategy { */ public static FieldType DEFAULT_FIELDTYPE; - @Deprecated - public static LegacyFieldType LEGACY_FIELDTYPE; static { // Default: pointValues + docValues FieldType type = new FieldType(); @@ -96,15 +89,6 @@ public class PointVectorStrategy extends SpatialStrategy { type.setStored(false); type.freeze(); DEFAULT_FIELDTYPE = type; - // Legacy default: legacyNumerics - LegacyFieldType legacyType = new LegacyFieldType(); - legacyType.setIndexOptions(IndexOptions.DOCS); - legacyType.setNumericType(LegacyNumericType.DOUBLE); - legacyType.setNumericPrecisionStep(8);// same as solr default - legacyType.setDocValuesType(DocValuesType.NONE);//no docValues! - legacyType.setStored(false); - legacyType.freeze(); - LEGACY_FIELDTYPE = legacyType; } public static final String SUFFIX_X = "__x"; @@ -117,8 +101,6 @@ public class PointVectorStrategy extends SpatialStrategy { private final boolean hasStored; private final boolean hasDocVals; private final boolean hasPointVals; - // equiv to "hasLegacyNumerics": - private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step. /** * Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery} @@ -127,18 +109,6 @@ public class PointVectorStrategy extends SpatialStrategy { return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE); } - /** - * Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility. - * However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)} - * since that requires docValues (the legacy config didn't have that). - * - * @deprecated LegacyNumerics will be removed - */ - @Deprecated - public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) { - return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE); - } - /** * Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}. * a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics @@ -159,21 +129,6 @@ public class PointVectorStrategy extends SpatialStrategy { if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { numPairs++; } - if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) { - if (hasPointVals) { - throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); - } - final LegacyFieldType legacyType = (LegacyFieldType) fieldType; - if (legacyType.numericType() != LegacyNumericType.DOUBLE) { - throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); - } - numPairs++; - legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); - legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); - legacyNumericFieldType.freeze(); - } else { - legacyNumericFieldType = null; - } this.fieldsLen = numPairs * 2; } @@ -209,10 +164,6 @@ public class PointVectorStrategy extends SpatialStrategy { fields[++idx] = new DoublePoint(fieldNameX, point.getX()); fields[++idx] = new DoublePoint(fieldNameY, point.getY()); } - if (legacyNumericFieldType != null) { - fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType); - fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType); - } assert idx == fields.length - 1; return fields; } @@ -268,7 +219,6 @@ public class PointVectorStrategy extends SpatialStrategy { /** * Returns a numeric range query based on FieldType - * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType} * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields */ private Query rangeQuery(String fieldName, Double min, Double max) { @@ -283,8 +233,6 @@ public class PointVectorStrategy extends SpatialStrategy { return DoublePoint.newRangeQuery(fieldName, min, max); - } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0 - return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive } //TODO try doc-value range query? throw new UnsupportedOperationException("An index is required for this operation."); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java index d54e1c970a0..536436b9897 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java @@ -68,9 +68,6 @@ public class DistanceStrategyTest extends StrategyTestCase { strategy = BBoxStrategy.newInstance(ctx, "bbox"); ctorArgs.add(new Object[]{strategy.getFieldName(), strategy}); - strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy"); - ctorArgs.add(new Object[]{strategy.getFieldName(), strategy}); - strategy = new SerializedDVStrategy(ctx, "serialized"); ctorArgs.add(new Object[]{strategy.getFieldName(), strategy}); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java index c14fe546f97..f52ef2b444d 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java @@ -58,9 +58,7 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase { strategies.add(recursive_geohash); strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad")); strategies.add(PointVectorStrategy.newInstance(ctx, "pointvector")); - strategies.add(PointVectorStrategy.newLegacyInstance(ctx, "pointvector_legacy")); strategies.add(BBoxStrategy.newInstance(ctx, "bbox")); - strategies.add(BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy")); final SerializedDVStrategy serialized = new SerializedDVStrategy(ctx, "serialized"); strategies.add(serialized); strategies.add(new CompositeSpatialStrategy("composite", recursive_geohash, serialized)); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java index 20df7305cbe..210ab386fab 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java @@ -21,8 +21,6 @@ import java.io.IOException; import com.carrotsearch.randomizedtesting.annotations.Repeat; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.legacy.LegacyFieldType; import org.apache.lucene.search.Query; import org.apache.lucene.spatial.SpatialMatchConcern; import org.apache.lucene.spatial.prefix.RandomSpatialOpStrategyTestCase; @@ -93,20 +91,10 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase { factory.worldBounds = new RectangleImpl(-300, 300, -100, 100, null); this.ctx = factory.newSpatialContext(); } - // randomly test legacy (numeric) and point based bbox strategy - if (random().nextBoolean()) { - this.strategy = BBoxStrategy.newInstance(ctx, "bbox"); - } else { - this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox"); - } + this.strategy = BBoxStrategy.newInstance(ctx, "bbox"); //test we can disable docValues for predicate tests if (random().nextBoolean()) { - FieldType fieldType = ((BBoxStrategy)strategy).getFieldType(); - if (fieldType instanceof LegacyFieldType) { - fieldType = new LegacyFieldType((LegacyFieldType)fieldType); - } else { - fieldType = new FieldType(fieldType); - } + FieldType fieldType = new FieldType(((BBoxStrategy)strategy).getFieldType()); fieldType.setDocValuesType(DocValuesType.NONE); strategy = new BBoxStrategy(ctx, strategy.getFieldName(), fieldType); } @@ -194,11 +182,7 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase { private void setupGeo() { this.ctx = SpatialContext.GEO; - if (random().nextBoolean()) { - this.strategy = BBoxStrategy.newInstance(ctx, "bbox"); - } else { - this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox"); - } + this.strategy = BBoxStrategy.newInstance(ctx, "bbox"); } // OLD STATIC TESTS (worthless?) @@ -239,16 +223,9 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase { FieldType fieldType; // random legacy or not legacy String FIELD_PREFIX = "bbox"; + fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE); if (random().nextBoolean()) { - fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE); - if (random().nextBoolean()) { - fieldType.setDimensions(0, 0); - } - } else { - fieldType = new FieldType(BBoxStrategy.LEGACY_FIELDTYPE); - if (random().nextBoolean()) { - fieldType.setIndexOptions(IndexOptions.NONE); - } + fieldType.setDimensions(0, 0); } strategy = new BBoxStrategy(ctx, FIELD_PREFIX, fieldType); diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java index 77c25298b67..54296da040c 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java @@ -27,7 +27,7 @@ import org.junit.Before; import org.junit.Test; import org.locationtech.spatial4j.shape.Shape; -import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean; +import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween; public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { @@ -42,17 +42,8 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { public void setUp() throws Exception { super.setUp(); tree = DateRangePrefixTree.INSTANCE; - if (randomBoolean()) { - strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange"); - } else { - //Test the format that existed <= Lucene 5.0 - strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange") { - @Override - protected CellToBytesRefIterator newCellToBytesRefIterator() { - return new CellToBytesRefIterator50(); - } - }; - } + strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange"); + ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(randomInt() % 5 == 0); Calendar tmpCal = tree.newCal(); int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND); tmpCal.add(randomCalWindowField, 2_000); @@ -79,15 +70,16 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { @Test public void testWithinSame() throws IOException { - final Calendar cal = tree.newCal(); + Shape shape = randomIndexedShape(); testOperation( - tree.toShape(cal), + shape, SpatialOperation.IsWithin, - tree.toShape(cal), true);//is within itself + shape, true);//is within itself } @Test public void testWorld() throws IOException { + ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false); testOperation( tree.toShape(tree.newCal()),//world matches everything SpatialOperation.Contains, @@ -96,6 +88,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { @Test public void testBugInitIterOptimization() throws Exception { + ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false); //bug due to fast path initIter() optimization testOperation( tree.parseShape("[2014-03-27T23 TO 2014-04-01T01]"), @@ -114,6 +107,21 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { @Override protected Shape randomIndexedShape() { + if (((NumberRangePrefixTreeStrategy)strategy).isPointsOnly()) { + Calendar cal = tree.newCal(); + cal.setTimeInMillis(random().nextLong()); + return tree.toShape(cal); + } else { + return randomShape(); + } + } + + @Override + protected Shape randomQueryShape() { + return randomShape(); + } + + private Shape randomShape() { Calendar cal1 = randomCalendar(); UnitNRShape s1 = tree.toShape(cal1); if (rarely()) { @@ -144,9 +152,4 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase { } return cal; } - - @Override - protected Shape randomQueryShape() { - return randomIndexedShape(); - } } diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java index ac5ab953455..901594ef749 100644 --- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java +++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java @@ -63,12 +63,7 @@ public class TestPointVectorStrategy extends StrategyTestCase { @Test public void testCitiesIntersectsBBox() throws IOException { // note: does not require docValues - if (random().nextBoolean()) { - this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName()); - } else { - // switch to legacy instance sometimes, which has no docValues - this.strategy = PointVectorStrategy.newLegacyInstance(ctx, getClass().getSimpleName()); - } + this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName()); getAddAndVerifyIndexedDocuments(DATA_WORLD_CITIES_POINTS); executeQueries(SpatialMatchConcern.FILTER, QTEST_Cities_Intersects_BBox); } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java index 3e222bc2d52..166d80dec12 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java @@ -60,7 +60,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory; *

      *
    • wordset - This is the default format, which supports one word per * line (including any intra-word whitespace) and allows whole line comments - * begining with the "#" character. Blank lines are ignored. See + * beginning with the "#" character. Blank lines are ignored. See * {@link WordlistLoader#getLines WordlistLoader.getLines} for details. *
    • *
    • snowball - This format allows for multiple words specified on each diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 5b92e3c4996..f57cac583d7 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -194,6 +194,13 @@ Other Changes * SOLR-10700: Deprecated and converted the PostingsSolrHighlighter to extend UnifiedSolrHighlighter and thus no longer use the PostingsHighlighter. It should behave mostly the same. (David Smiley) +* SOLR-10710: Fix LTR failing tests. (Diego Ceccarelli via Tomás Fernández Löbbe) + +* SOLR-10755: delete/refactor many solrj deprecations (hossman) + +* SOLR-10752: replicationFactor (nrtReplicas) default is 0 if tlogReplicas is specified when creating a collection + (Tomás Fernández Löbbe) + ================== 6.7.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. @@ -217,6 +224,8 @@ Upgrade Notes passwords via the env variables SOLR_SSL_KEY_STORE_PASSWORD and SOLR_SSL_TRUST_STORE_PASSWORD rather than system properties. +* SOLR-10379: ManagedSynonymFilterFactory has been deprecated in favor of ManagedSynonymGraphFilterFactory. + New Features ---------------------- @@ -224,12 +233,20 @@ New Features * SOLR-10721: Provide a way to know when Core Discovery is finished and when all async cores are done loading (Erick Erickson) + +* SOLR-10379: Add ManagedSynonymGraphFilterFactory, deprecate ManagedSynonymFilterFactory. (Steve Rowe) + +* SOLR-10479: Adds support for HttpShardHandlerFactory.loadBalancerRequests(MinimumAbsolute|MaximumFraction) + configuration. (Ramsey Haddad, Daniel Collins, Christine Poerschke) Bug Fixes ---------------------- * SOLR-10723 JSON Facet API: resize() implemented incorrectly for CountSlotAcc, HllAgg.NumericAcc resulting in exceptions when using a hashing faceting method and sorting by hll(numeric_field). (yonik) + +* SOLR-10719: Creating a core.properties fails if the parent of core.properties is a symlinked dierctory + (Erick Erickson) Optimizations ---------------------- @@ -238,7 +255,6 @@ Optimizations so that the second phase which would normally involve calculating the domain for the bucket can be skipped entirely, leading to large performance improvements. (yonik) - Other Changes ---------------------- @@ -250,6 +266,15 @@ Other Changes * SOLR-10438: Assign explicit useDocValuesAsStored values to all points field types in schema-point.xml/TestPointFields. (hossman, Steve Rowe) + +* LUCENE-7705: Allow CharTokenizer-derived tokenizers and KeywordTokenizer to configure the max token length. + (Amrit Sarkar via Erick Erickson) + +* SOLR-10659: Remove ResponseBuilder.getSortSpec use in SearchGroupShardResponseProcessor. + (Judith Silverman via Christine Poerschke) + +* SOLR-10741: Factor out createSliceShardsStr method from HttpShardHandler.prepDistributed. + (Domenico Fabio Marino via Christine Poerschke) ================== 6.6.0 ================== @@ -458,6 +483,14 @@ Bug Fixes "lucene"/standard query parser, should require " TO " in range queries, and accept "TO" as endpoints in range queries. (hossman, Steve Rowe) +* SOLR-10735: Windows script (solr.cmd) didn't work properly with directory containing spaces. Adding quotations + to fix (Uwe Schindler, janhoy, Tomas Fernandez-Lobbe, Ishan Chattopadhyaya) + +Ref Guide +---------------------- + +* SOLR-10758: Modernize the Solr ref guide's Chinese language analysis coverage. (Steve Rowe) + Other Changes ---------------------- diff --git a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java index aadb9e2d4ce..dd64c3ff96a 100644 --- a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java +++ b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.time.Instant; import java.util.Arrays; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.apache.solr.schema.FieldType; diff --git a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java index d13795d8e29..803d8e0eacf 100644 --- a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java +++ b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java @@ -24,7 +24,7 @@ import java.util.Map; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.docvalues.LongDocValues; import org.apache.lucene.queries.function.valuesource.LongFieldSource; diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java index 4c17affe5bc..00159b927a4 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java @@ -76,8 +76,7 @@ public class FieldLengthFeature extends Feature { static { NORM_TABLE[0] = 0; for (int i = 1; i < 256; i++) { - float norm = SmallFloat.byte315ToFloat((byte) i); - NORM_TABLE[i] = 1.0f / (norm * norm); + NORM_TABLE[i] = SmallFloat.byte4ToInt((byte) i); } } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java index d4457a0a7a2..decb1c0888b 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java @@ -88,15 +88,16 @@ public class TestLTRQParserPlugin extends TestRerankBase { query.add("rows", "4"); query.add("fv", "true"); - String nonRerankedScore = "0.09271725"; + // FIXME: design better way to test this, we cannot check an absolute score + // String nonRerankedScore = "0.09271725"; // Normal solr order assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='9'", "/response/docs/[1]/id=='8'", "/response/docs/[2]/id=='7'", - "/response/docs/[3]/id=='6'", - "/response/docs/[3]/score=="+nonRerankedScore + "/response/docs/[3]/id=='6'" + // "/response/docs/[3]/score=="+nonRerankedScore ); query.add("rq", "{!ltr model=6029760550880411648 reRankDocs=3}"); @@ -106,8 +107,8 @@ public class TestLTRQParserPlugin extends TestRerankBase { "/response/docs/[0]/id=='7'", "/response/docs/[1]/id=='8'", "/response/docs/[2]/id=='9'", - "/response/docs/[3]/id=='6'", - "/response/docs/[3]/score=="+nonRerankedScore + "/response/docs/[3]/id=='6'" + // "/response/docs/[3]/score=="+nonRerankedScore ); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java index 630a68cf87f..46330c9f26d 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java @@ -42,8 +42,9 @@ public class TestParallelWeightCreation extends TestRerankBase{ query.add("rows", "4"); query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'"); + // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'"); aftertest(); } diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java index 7bf8373a56a..cbd0e2389da 100644 --- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java +++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java @@ -210,14 +210,14 @@ public class TestSelectiveWeightCreation extends TestRerankBase { @Test public void testSelectiveWeightsRequestFeaturesFromDifferentStore() throws Exception { - final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector( - "matchedTitle","1.0", "titlePhraseMatch","0.6103343"); - final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector( - "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0"); - final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector( - "popularity","3.0", "originalScore","1.0"); - - final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse); +// final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector( +// "matchedTitle","1.0", "titlePhraseMatch","0.6103343"); +// final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector( +// "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0"); +// final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector( +// "popularity","3.0", "originalScore","1.0"); +// +// final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse); // extract all features in externalmodel's store (default store) // rerank using externalmodel (default store) @@ -227,11 +227,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase { query.add("rows", "5"); query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w2 efi.userTitlePhrase2=w1}"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552"); + // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'"); + // FIXME design better way to test this, we can't rely on absolute scores + // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'"); // extract all features from fstore4 // rerank using externalmodel (default store) @@ -240,11 +241,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase { query.add("fl", "*,score,fv:[fv store=fstore4 efi.myPop=3]"); query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552"); + // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'"); + // FIXME design better way to test this, we can't rely on absolute scores + // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'"); // extract all features from fstore4 // rerank using externalmodel2 (fstore2) @@ -255,9 +257,9 @@ public class TestSelectiveWeightCreation extends TestRerankBase { assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='5'"); assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==2.5"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'"); + // FIXME design better way to test this, we can't rely on absolute scores + // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'"); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java index 3d1a54e45e3..02bb0180a28 100644 --- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java @@ -95,9 +95,9 @@ public class CreateCollectionCmd implements Cmd { // look at the replication factor and see if it matches reality // if it does not, find best nodes to create more cores - int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, 1)); - int numPullReplicas = message.getInt(PULL_REPLICAS, 0); int numTlogReplicas = message.getInt(TLOG_REPLICAS, 0); + int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1)); + int numPullReplicas = message.getInt(PULL_REPLICAS, 0); ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler(); final String async = message.getStr(ASYNC); diff --git a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java index e942c9b90ee..99c101bfac0 100644 --- a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java +++ b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java @@ -39,6 +39,7 @@ import java.util.stream.Collectors; import com.google.common.collect.Lists; import org.apache.solr.common.SolrException; +import org.apache.solr.util.FileUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,13 +86,15 @@ public class CorePropertiesLocator implements CoresLocator { private void writePropertiesFile(CoreDescriptor cd, Path propfile) { Properties p = buildCoreProperties(cd); try { - Files.createDirectories(propfile.getParent()); + FileUtils.createDirectories(propfile.getParent()); // Handling for symlinks. try (Writer os = new OutputStreamWriter(Files.newOutputStream(propfile), StandardCharsets.UTF_8)) { p.store(os, "Written by CorePropertiesLocator"); } } catch (IOException e) { logger.error("Couldn't persist core properties to {}: {}", propfile, e.getMessage()); + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Couldn't persist core properties to " + propfile.toAbsolutePath().toString() + " : " + e.getMessage()); } } diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index c045f206508..f79f86ee05e 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -45,6 +45,9 @@ import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.metrics.CountMetric; import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric; @@ -185,6 +188,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("percentile", PercentileEvaluator.class) .withFunctionName("empiricalDistribution", EmpiricalDistributionEvaluator.class) .withFunctionName("describe", DescribeEvaluator.class) + .withFunctionName("finddelay", FindDelayEvaluator.class) + .withFunctionName("sequence", SequenceEvaluator.class) + .withFunctionName("array", ArrayEvaluator.class) + .withFunctionName("hist", HistogramEvaluator.class) + .withFunctionName("anova", AnovaEvaluator.class) + .withFunctionName("movingAvg", MovingAverageEvaluator.class) // metrics .withFunctionName("min", MinMetric.class) @@ -296,7 +305,14 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, TupleStream tupleStream; try { - tupleStream = this.streamFactory.constructStream(params.get("expr")); + StreamExpression streamExpression = StreamExpressionParser.parse(params.get("expr")); + if(this.streamFactory.isEvaluator(streamExpression)) { + StreamExpression tupleExpression = new StreamExpression("tuple"); + tupleExpression.addParameter(new StreamExpressionNamedParameter("return-value", streamExpression)); + tupleStream = this.streamFactory.constructStream(tupleExpression); + } else { + tupleStream = this.streamFactory.constructStream(streamExpression); + } } catch (Exception e) { //Catch exceptions that occur while the stream is being created. This will include streaming expression parse rules. SolrException.log(logger, e); diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java index 4ec3b7924f4..bc620b61418 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java @@ -449,17 +449,7 @@ public class HttpShardHandler extends ShardHandler { } } // And now recreate the | delimited list of equivalent servers - final StringBuilder sliceShardsStr = new StringBuilder(); - boolean first = true; - for (String shardUrl : shardUrls) { - if (first) { - first = false; - } else { - sliceShardsStr.append('|'); - } - sliceShardsStr.append(shardUrl); - } - rb.shards[i] = sliceShardsStr.toString(); + rb.shards[i] = createSliceShardsStr(shardUrls); } } String shards_rows = params.get(ShardParams.SHARDS_ROWS); @@ -472,6 +462,20 @@ public class HttpShardHandler extends ShardHandler { } } + private static String createSliceShardsStr(final List shardUrls) { + final StringBuilder sliceShardsStr = new StringBuilder(); + boolean first = true; + for (String shardUrl : shardUrls) { + if (first) { + first = false; + } else { + sliceShardsStr.append('|'); + } + sliceShardsStr.append(shardUrl); + } + return sliceShardsStr.toString(); + } + private void addSlices(Map target, ClusterState state, SolrParams params, String collectionName, String shardKeys, boolean multiCollection) { DocCollection coll = state.getCollection(collectionName); diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java index e3787cdf1ca..73d97078a09 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java +++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java @@ -97,6 +97,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. int maximumPoolSize = Integer.MAX_VALUE; int keepAliveTime = 5; int queueSize = -1; + int permittedLoadBalancerRequestsMinimumAbsolute = 0; + float permittedLoadBalancerRequestsMaximumFraction = 1.0f; boolean accessPolicy = false; private String scheme = null; @@ -122,6 +124,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. // If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue"; + // The minimum number of replicas that may be used + static final String LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE = "loadBalancerRequestsMinimumAbsolute"; + + // The maximum proportion of replicas to be used + static final String LOAD_BALANCER_REQUESTS_MAX_FRACTION = "loadBalancerRequestsMaximumFraction"; + // Configure if the threadpool favours fairness over throughput static final String INIT_FAIRNESS_POLICY = "fairnessPolicy"; @@ -164,6 +172,16 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, maximumPoolSize,sb); this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, keepAliveTime,sb); this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, queueSize,sb); + this.permittedLoadBalancerRequestsMinimumAbsolute = getParameter( + args, + LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE, + permittedLoadBalancerRequestsMinimumAbsolute, + sb); + this.permittedLoadBalancerRequestsMaximumFraction = getParameter( + args, + LOAD_BALANCER_REQUESTS_MAX_FRACTION, + permittedLoadBalancerRequestsMaximumFraction, + sb); this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, accessPolicy,sb); log.debug("created with {}",sb); @@ -252,7 +270,15 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org. */ public LBHttpSolrClient.Rsp makeLoadBalancedRequest(final QueryRequest req, List urls) throws SolrServerException, IOException { - return loadbalancer.request(new LBHttpSolrClient.Req(req, urls)); + return loadbalancer.request(newLBHttpSolrClientReq(req, urls)); + } + + protected LBHttpSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List urls) { + int numServersToTry = (int)Math.floor(urls.size() * this.permittedLoadBalancerRequestsMaximumFraction); + if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) { + numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute; + } + return new LBHttpSolrClient.Req(req, urls, numServersToTry); } /** diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 7e56ee44e58..18d9b446121 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -84,6 +84,12 @@ import org.slf4j.LoggerFactory; public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized { + /** + * This constant was formerly part of HighlightParams. After deprecation it was removed so clients + * would no longer use it, but we still support it server side. + */ + private static final String USE_FVH = HighlightParams.HIGHLIGHT + ".useFastVectorHighlighter"; + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); protected final SolrCore solrCore; @@ -492,7 +498,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf boolean methodFvh = HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals( params.getFieldParam(schemaField.getName(), HighlightParams.METHOD)) - || params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false); + || params.getFieldBool(schemaField.getName(), USE_FVH, false); if (!methodFvh) return false; boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets(); if (!termPosOff) { diff --git a/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java b/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java new file mode 100644 index 00000000000..c919eb8297d --- /dev/null +++ b/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java @@ -0,0 +1,706 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.legacy; + +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; +import org.apache.lucene.spatial.util.DistanceToShapeValueSource; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.NumericUtils; +import org.locationtech.spatial4j.context.SpatialContext; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Rectangle; +import org.locationtech.spatial4j.shape.Shape; + + +/** + * A SpatialStrategy for indexing and searching Rectangles by storing its + * coordinates in numeric fields. It supports all {@link SpatialOperation}s and + * has a custom overlap relevancy. It is based on GeoPortal's SpatialClauseAdapter. + *

      + * Characteristics: + *
      + *

        + *
      • Only indexes Rectangles; just one per field value. Other shapes can be provided + * and the bounding box will be used.
      • + *
      • Can query only by a Rectangle. Providing other shapes is an error.
      • + *
      • Supports most {@link SpatialOperation}s but not Overlaps.
      • + *
      • Uses the DocValues API for any sorting / relevancy.
      • + *
      + *

      + * Implementation: + *

      + * This uses 4 double fields for minX, maxX, minY, maxY + * and a boolean to mark a dateline cross. Depending on the particular {@link + * SpatialOperation}s, there are a variety of range queries on {@link DoublePoint}s to be + * done. + * The {@link #makeOverlapRatioValueSource(org.locationtech.spatial4j.shape.Rectangle, double)} + * works by calculating the query bbox overlap percentage against the indexed + * shape overlap percentage. The indexed shape's coordinates are retrieved from + * {@link org.apache.lucene.index.LeafReader#getNumericDocValues}. + * + * @lucene.experimental + */ +public class BBoxStrategy extends SpatialStrategy { + + // note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we + // create more than one Field. + + /** + * pointValues, docValues, and nothing else. + */ + public static FieldType DEFAULT_FIELDTYPE; + + @Deprecated + public static LegacyFieldType LEGACY_FIELDTYPE; + static { + // Default: pointValues + docValues + FieldType type = new FieldType(); + type.setDimensions(1, Double.BYTES);//pointValues (assume Double) + type.setDocValuesType(DocValuesType.NUMERIC);//docValues + type.setStored(false); + type.freeze(); + DEFAULT_FIELDTYPE = type; + // Legacy default: legacyNumerics + docValues + LegacyFieldType legacyType = new LegacyFieldType(); + legacyType.setIndexOptions(IndexOptions.DOCS); + legacyType.setNumericType(LegacyNumericType.DOUBLE); + legacyType.setNumericPrecisionStep(8);// same as solr default + legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues + legacyType.setStored(false); + legacyType.freeze(); + LEGACY_FIELDTYPE = legacyType; + } + + public static final String SUFFIX_MINX = "__minX"; + public static final String SUFFIX_MAXX = "__maxX"; + public static final String SUFFIX_MINY = "__minY"; + public static final String SUFFIX_MAXY = "__maxY"; + public static final String SUFFIX_XDL = "__xdl"; + + /* + * The Bounding Box gets stored as four fields for x/y min/max and a flag + * that says if the box crosses the dateline (xdl). + */ + final String field_bbox; + final String field_minX; + final String field_minY; + final String field_maxX; + final String field_maxY; + final String field_xdl; // crosses dateline + + private final FieldType optionsFieldType;//from constructor; aggregate field type used to express all options + private final int fieldsLen; + private final boolean hasStored; + private final boolean hasDocVals; + private final boolean hasPointVals; + // equiv to "hasLegacyNumerics": + private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step. + private final FieldType xdlFieldType; + + /** + * Creates a new {@link BBoxStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery} + */ + public static BBoxStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) { + return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE); + } + + /** + * Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility + * @deprecated LegacyNumerics will be removed + */ + @Deprecated + public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) { + return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE); + } + + /** + * Creates this strategy. + * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL + * field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features + * aren't needed then disable them. + */ + public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) { + super(ctx, fieldNamePrefix); + field_bbox = fieldNamePrefix; + field_minX = fieldNamePrefix + SUFFIX_MINX; + field_maxX = fieldNamePrefix + SUFFIX_MAXX; + field_minY = fieldNamePrefix + SUFFIX_MINY; + field_maxY = fieldNamePrefix + SUFFIX_MAXY; + field_xdl = fieldNamePrefix + SUFFIX_XDL; + + fieldType.freeze(); + this.optionsFieldType = fieldType; + + int numQuads = 0; + if ((this.hasStored = fieldType.stored())) { + numQuads++; + } + if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) { + numQuads++; + } + if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { + numQuads++; + } + if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) { + if (hasPointVals) { + throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); + } + final LegacyFieldType legacyType = (LegacyFieldType) fieldType; + if (legacyType.numericType() != LegacyNumericType.DOUBLE) { + throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); + } + numQuads++; + legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); + legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); + legacyNumericFieldType.freeze(); + } else { + legacyNumericFieldType = null; + } + + if (hasPointVals || legacyNumericFieldType != null) { // if we have an index... + xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED); + xdlFieldType.setIndexOptions(IndexOptions.DOCS); + xdlFieldType.freeze(); + } else { + xdlFieldType = null; + } + + this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0); + } + + /** Returns a field type representing the set of field options. This is identical to what was passed into the + * constructor. It's frozen. */ + public FieldType getFieldType() { + return optionsFieldType; + } + + //--------------------------------- + // Indexing + //--------------------------------- + + @Override + public Field[] createIndexableFields(Shape shape) { + return createIndexableFields(shape.getBoundingBox()); + } + + private Field[] createIndexableFields(Rectangle bbox) { + Field[] fields = new Field[fieldsLen]; + int idx = -1; + if (hasStored) { + fields[++idx] = new StoredField(field_minX, bbox.getMinX()); + fields[++idx] = new StoredField(field_minY, bbox.getMinY()); + fields[++idx] = new StoredField(field_maxX, bbox.getMaxX()); + fields[++idx] = new StoredField(field_maxY, bbox.getMaxY()); + } + if (hasDocVals) { + fields[++idx] = new DoubleDocValuesField(field_minX, bbox.getMinX()); + fields[++idx] = new DoubleDocValuesField(field_minY, bbox.getMinY()); + fields[++idx] = new DoubleDocValuesField(field_maxX, bbox.getMaxX()); + fields[++idx] = new DoubleDocValuesField(field_maxY, bbox.getMaxY()); + } + if (hasPointVals) { + fields[++idx] = new DoublePoint(field_minX, bbox.getMinX()); + fields[++idx] = new DoublePoint(field_minY, bbox.getMinY()); + fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX()); + fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY()); + } + if (legacyNumericFieldType != null) { + fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType); + fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType); + fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType); + fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType); + } + if (xdlFieldType != null) { + fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType); + } + assert idx == fields.length - 1; + return fields; + } + + + //--------------------------------- + // Value Source / Relevancy + //--------------------------------- + + /** + * Provides access to each rectangle per document as a ValueSource in which + * {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)} returns a {@link + * Shape}. + */ //TODO raise to SpatialStrategy + public ValueSource makeShapeValueSource() { + return new BBoxValueSource(this); + } + + @Override + public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) { + //TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl. + return new DistanceToShapeValueSource(makeShapeValueSource(), queryPoint, multiplier, ctx); + } + + /** Returns a similarity based on {@link BBoxOverlapRatioValueSource}. This is just a + * convenience method. */ + public ValueSource makeOverlapRatioValueSource(Rectangle queryBox, double queryTargetProportion) { + return new BBoxOverlapRatioValueSource( + makeShapeValueSource(), ctx.isGeo(), queryBox, queryTargetProportion, 0.0); + } + + //--------------------------------- + // Query Building + //--------------------------------- + + // Utility on SpatialStrategy? +// public Query makeQueryWithValueSource(SpatialArgs args, ValueSource valueSource) { +// return new CustomScoreQuery(makeQuery(args), new FunctionQuery(valueSource)); + //or... +// return new BooleanQuery.Builder() +// .add(new FunctionQuery(valueSource), BooleanClause.Occur.MUST)//matches everything and provides score +// .add(filterQuery, BooleanClause.Occur.FILTER)//filters (score isn't used) +// .build(); +// } + + @Override + public Query makeQuery(SpatialArgs args) { + Shape shape = args.getShape(); + if (!(shape instanceof Rectangle)) + throw new UnsupportedOperationException("Can only query by Rectangle, not " + shape); + + Rectangle bbox = (Rectangle) shape; + Query spatial; + + // Useful for understanding Relations: + // http://edndoc.esri.com/arcsde/9.1/general_topics/understand_spatial_relations.htm + SpatialOperation op = args.getOperation(); + if( op == SpatialOperation.BBoxIntersects ) spatial = makeIntersects(bbox); + else if( op == SpatialOperation.BBoxWithin ) spatial = makeWithin(bbox); + else if( op == SpatialOperation.Contains ) spatial = makeContains(bbox); + else if( op == SpatialOperation.Intersects ) spatial = makeIntersects(bbox); + else if( op == SpatialOperation.IsEqualTo ) spatial = makeEquals(bbox); + else if( op == SpatialOperation.IsDisjointTo ) spatial = makeDisjoint(bbox); + else if( op == SpatialOperation.IsWithin ) spatial = makeWithin(bbox); + else { //no Overlaps support yet + throw new UnsupportedSpatialOperation(op); + } + return new ConstantScoreQuery(spatial); + } + + /** + * Constructs a query to retrieve documents that fully contain the input envelope. + * + * @return the spatial query + */ + Query makeContains(Rectangle bbox) { + + // general case + // docMinX <= queryExtent.getMinX() AND docMinY <= queryExtent.getMinY() AND docMaxX >= queryExtent.getMaxX() AND docMaxY >= queryExtent.getMaxY() + + // Y conditions + // docMinY <= queryExtent.getMinY() AND docMaxY >= queryExtent.getMaxY() + Query qMinY = this.makeNumericRangeQuery(field_minY, null, bbox.getMinY(), false, true); + Query qMaxY = this.makeNumericRangeQuery(field_maxY, bbox.getMaxY(), null, true, false); + Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY); + + // X conditions + Query xConditions; + + // queries that do not cross the date line + if (!bbox.getCrossesDateLine()) { + + // X Conditions for documents that do not cross the date line, + // documents that contain the min X and max X of the query envelope, + // docMinX <= queryExtent.getMinX() AND docMaxX >= queryExtent.getMaxX() + Query qMinX = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true); + Query qMaxX = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false); + Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX); + Query qNonXDL = this.makeXDL(false, qMinMax); + + if (!ctx.isGeo()) { + xConditions = qNonXDL; + } else { + // X Conditions for documents that cross the date line, + // the left portion of the document contains the min X of the query + // OR the right portion of the document contains the max X of the query, + // docMinXLeft <= queryExtent.getMinX() OR docMaxXRight >= queryExtent.getMaxX() + Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true); + Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false); + Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qXDLLeft, qXDLRight); + Query qXDL = this.makeXDL(true, qXDLLeftRight); + + Query qEdgeDL = null; + if (bbox.getMinX() == bbox.getMaxX() && Math.abs(bbox.getMinX()) == 180) { + double edge = bbox.getMinX() * -1;//opposite dateline edge + qEdgeDL = makeQuery(BooleanClause.Occur.SHOULD, + makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge)); + } + + // apply the non-XDL and XDL conditions + xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL, qEdgeDL); + } + } else { + // queries that cross the date line + + // No need to search for documents that do not cross the date line + + // X Conditions for documents that cross the date line, + // the left portion of the document contains the min X of the query + // AND the right portion of the document contains the max X of the query, + // docMinXLeft <= queryExtent.getMinX() AND docMaxXRight >= queryExtent.getMaxX() + Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true); + Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false); + Query qXDLLeftRight = this.makeXDL(true, this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight)); + + Query qWorld = makeQuery(BooleanClause.Occur.MUST, + makeNumberTermQuery(field_minX, -180), makeNumberTermQuery(field_maxX, 180)); + + xConditions = makeQuery(BooleanClause.Occur.SHOULD, qXDLLeftRight, qWorld); + } + + // both X and Y conditions must occur + return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions); + } + + /** + * Constructs a query to retrieve documents that are disjoint to the input envelope. + * + * @return the spatial query + */ + Query makeDisjoint(Rectangle bbox) { + + // general case + // docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX() OR docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY() + + // Y conditions + // docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY() + Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMaxY(), null, false, false); + Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMinY(), false, false); + Query yConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qMinY, qMaxY); + + // X conditions + Query xConditions; + + // queries that do not cross the date line + if (!bbox.getCrossesDateLine()) { + + // X Conditions for documents that do not cross the date line, + // docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX() + Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false); + if (bbox.getMinX() == -180.0 && ctx.isGeo()) {//touches dateline; -180 == 180 + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(qMinX, BooleanClause.Occur.MUST); + bq.add(makeNumberTermQuery(field_maxX, 180.0), BooleanClause.Occur.MUST_NOT); + qMinX = bq.build(); + } + Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false); + + if (bbox.getMaxX() == 180.0 && ctx.isGeo()) {//touches dateline; -180 == 180 + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(qMaxX, BooleanClause.Occur.MUST); + bq.add(makeNumberTermQuery(field_minX, -180.0), BooleanClause.Occur.MUST_NOT); + qMaxX = bq.build(); + } + Query qMinMax = this.makeQuery(BooleanClause.Occur.SHOULD, qMinX, qMaxX); + Query qNonXDL = this.makeXDL(false, qMinMax); + + if (!ctx.isGeo()) { + xConditions = qNonXDL; + } else { + // X Conditions for documents that cross the date line, + + // both the left and right portions of the document must be disjoint to the query + // (docMinXLeft > queryExtent.getMaxX() OR docMaxXLeft < queryExtent.getMinX()) AND + // (docMinXRight > queryExtent.getMaxX() OR docMaxXRight < queryExtent.getMinX()) + // where: docMaxXLeft = 180.0, docMinXRight = -180.0 + // (docMaxXLeft < queryExtent.getMinX()) equates to (180.0 < queryExtent.getMinX()) and is ignored + // (docMinXRight > queryExtent.getMaxX()) equates to (-180.0 > queryExtent.getMaxX()) and is ignored + Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false); + Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false); + Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXRight); + Query qXDL = this.makeXDL(true, qLeftRight); + + // apply the non-XDL and XDL conditions + xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL); + } + // queries that cross the date line + } else { + + // X Conditions for documents that do not cross the date line, + // the document must be disjoint to both the left and right query portions + // (docMinX > queryExtent.getMaxX()Left OR docMaxX < queryExtent.getMinX()) AND (docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()Left) + // where: queryExtent.getMaxX()Left = 180.0, queryExtent.getMinX()Left = -180.0 + Query qMinXLeft = this.makeNumericRangeQuery(field_minX, 180.0, null, false, false); + Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false); + Query qMinXRight = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false); + Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, -180.0, false, false); + Query qLeft = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXLeft, qMaxXLeft); + Query qRight = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXRight, qMaxXRight); + Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qLeft, qRight); + + // No need to search for documents that do not cross the date line + + xConditions = this.makeXDL(false, qLeftRight); + } + + // either X or Y conditions should occur + return this.makeQuery(BooleanClause.Occur.SHOULD, xConditions, yConditions); + } + + /** + * Constructs a query to retrieve documents that equal the input envelope. + * + * @return the spatial query + */ + Query makeEquals(Rectangle bbox) { + + // docMinX = queryExtent.getMinX() AND docMinY = queryExtent.getMinY() AND docMaxX = queryExtent.getMaxX() AND docMaxY = queryExtent.getMaxY() + Query qMinX = makeNumberTermQuery(field_minX, bbox.getMinX()); + Query qMinY = makeNumberTermQuery(field_minY, bbox.getMinY()); + Query qMaxX = makeNumberTermQuery(field_maxX, bbox.getMaxX()); + Query qMaxY = makeNumberTermQuery(field_maxY, bbox.getMaxY()); + return makeQuery(BooleanClause.Occur.MUST, qMinX, qMinY, qMaxX, qMaxY); + } + + /** + * Constructs a query to retrieve documents that intersect the input envelope. + * + * @return the spatial query + */ + Query makeIntersects(Rectangle bbox) { + + // the original intersects query does not work for envelopes that cross the date line, + // switch to a NOT Disjoint query + + // MUST_NOT causes a problem when it's the only clause type within a BooleanQuery, + // to get around it we add all documents as a SHOULD + + // there must be an envelope, it must not be disjoint + Query qHasEnv; + if (ctx.isGeo()) { + Query qIsNonXDL = this.makeXDL(false); + Query qIsXDL = ctx.isGeo() ? this.makeXDL(true) : null; + qHasEnv = this.makeQuery(BooleanClause.Occur.SHOULD, qIsNonXDL, qIsXDL); + } else { + qHasEnv = this.makeXDL(false); + } + + BooleanQuery.Builder qNotDisjoint = new BooleanQuery.Builder(); + qNotDisjoint.add(qHasEnv, BooleanClause.Occur.MUST); + Query qDisjoint = makeDisjoint(bbox); + qNotDisjoint.add(qDisjoint, BooleanClause.Occur.MUST_NOT); + + //Query qDisjoint = makeDisjoint(); + //BooleanQuery qNotDisjoint = new BooleanQuery(); + //qNotDisjoint.add(new MatchAllDocsQuery(),BooleanClause.Occur.SHOULD); + //qNotDisjoint.add(qDisjoint,BooleanClause.Occur.MUST_NOT); + return qNotDisjoint.build(); + } + + /** + * Makes a boolean query based upon a collection of queries and a logical operator. + * + * @param occur the logical operator + * @param queries the query collection + * @return the query + */ + BooleanQuery makeQuery(BooleanClause.Occur occur, Query... queries) { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + for (Query query : queries) { + if (query != null) + bq.add(query, occur); + } + return bq.build(); + } + + /** + * Constructs a query to retrieve documents are fully within the input envelope. + * + * @return the spatial query + */ + Query makeWithin(Rectangle bbox) { + + // general case + // docMinX >= queryExtent.getMinX() AND docMinY >= queryExtent.getMinY() AND docMaxX <= queryExtent.getMaxX() AND docMaxY <= queryExtent.getMaxY() + + // Y conditions + // docMinY >= queryExtent.getMinY() AND docMaxY <= queryExtent.getMaxY() + Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMinY(), null, true, false); + Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMaxY(), false, true); + Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY); + + // X conditions + Query xConditions; + + if (ctx.isGeo() && bbox.getMinX() == -180.0 && bbox.getMaxX() == 180.0) { + //if query world-wraps, only the y condition matters + return yConditions; + + } else if (!bbox.getCrossesDateLine()) { + // queries that do not cross the date line + + // docMinX >= queryExtent.getMinX() AND docMaxX <= queryExtent.getMaxX() + Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false); + Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true); + Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX); + + double edge = 0;//none, otherwise opposite dateline of query + if (bbox.getMinX() == -180.0) + edge = 180; + else if (bbox.getMaxX() == 180.0) + edge = -180; + if (edge != 0 && ctx.isGeo()) { + Query edgeQ = makeQuery(BooleanClause.Occur.MUST, + makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge)); + qMinMax = makeQuery(BooleanClause.Occur.SHOULD, qMinMax, edgeQ); + } + + xConditions = this.makeXDL(false, qMinMax); + + // queries that cross the date line + } else { + + // X Conditions for documents that do not cross the date line + + // the document should be within the left portion of the query + // docMinX >= queryExtent.getMinX() AND docMaxX <= 180.0 + Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false); + Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, 180.0, false, true); + Query qLeft = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXLeft); + + // the document should be within the right portion of the query + // docMinX >= -180.0 AND docMaxX <= queryExtent.getMaxX() + Query qMinXRight = this.makeNumericRangeQuery(field_minX, -180.0, null, true, false); + Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true); + Query qRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXRight, qMaxXRight); + + // either left or right conditions should occur, + // apply the left and right conditions to documents that do not cross the date line + Query qLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qLeft, qRight); + Query qNonXDL = this.makeXDL(false, qLeftRight); + + // X Conditions for documents that cross the date line, + // the left portion of the document must be within the left portion of the query, + // AND the right portion of the document must be within the right portion of the query + // docMinXLeft >= queryExtent.getMinX() AND docMaxXLeft <= 180.0 + // AND docMinXRight >= -180.0 AND docMaxXRight <= queryExtent.getMaxX() + Query qXDLLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false); + Query qXDLRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true); + Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight); + Query qXDL = this.makeXDL(true, qXDLLeftRight); + + // apply the non-XDL and XDL conditions + xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL); + } + + // both X and Y conditions must occur + return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions); + } + + /** + * Constructs a query to retrieve documents that do or do not cross the date line. + * + * @param crossedDateLine true for documents that cross the date line + * @return the query + */ + private Query makeXDL(boolean crossedDateLine) { + // The 'T' and 'F' values match solr fields + return new TermQuery(new Term(field_xdl, crossedDateLine ? "T" : "F")); + } + + /** + * Constructs a query to retrieve documents that do or do not cross the date line + * and match the supplied spatial query. + * + * @param crossedDateLine true for documents that cross the date line + * @param query the spatial query + * @return the query + */ + private Query makeXDL(boolean crossedDateLine, Query query) { + if (!ctx.isGeo()) { + assert !crossedDateLine; + return query; + } + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(this.makeXDL(crossedDateLine), BooleanClause.Occur.MUST); + bq.add(query, BooleanClause.Occur.MUST); + return bq.build(); + } + + private Query makeNumberTermQuery(String field, double number) { + if (hasPointVals) { + return DoublePoint.newExactQuery(field, number); + } else if (legacyNumericFieldType != null) { + BytesRefBuilder bytes = new BytesRefBuilder(); + LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes); + return new TermQuery(new Term(field, bytes.get())); + } + throw new UnsupportedOperationException("An index is required for this operation."); + } + + /** + * Returns a numeric range query based on FieldType + * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType} + * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields + * + * @param fieldname field name. must not be null. + * @param min minimum value of the range. + * @param max maximum value of the range. + * @param minInclusive include the minimum value if true. + * @param maxInclusive include the maximum value if true + */ + private Query makeNumericRangeQuery(String fieldname, Double min, Double max, boolean minInclusive, boolean maxInclusive) { + if (hasPointVals) { + if (min == null) { + min = Double.NEGATIVE_INFINITY; + } + + if (max == null) { + max = Double.POSITIVE_INFINITY; + } + + if (minInclusive == false) { + min = Math.nextUp(min); + } + + if (maxInclusive == false) { + max = Math.nextDown(max); + } + + return DoublePoint.newRangeQuery(fieldname, min, max); + } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0 + return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive); + } + throw new UnsupportedOperationException("An index is required for this operation."); + } +} diff --git a/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java b/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java new file mode 100644 index 00000000000..cd577c71a75 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.legacy; + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.Explanation; +import org.locationtech.spatial4j.shape.Rectangle; + +/** + * A ValueSource in which the indexed Rectangle is returned from + * {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)}. + * + * @lucene.internal + */ +class BBoxValueSource extends ValueSource { + + private final BBoxStrategy strategy; + + public BBoxValueSource(BBoxStrategy strategy) { + this.strategy = strategy; + } + + @Override + public String description() { + return "bboxShape(" + strategy.getFieldName() + ")"; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { + LeafReader reader = readerContext.reader(); + final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX); + final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY); + final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX); + final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY); + + //reused + final Rectangle rect = strategy.getSpatialContext().makeRectangle(0,0,0,0); + + return new FunctionValues() { + private int lastDocID = -1; + + private double getDocValue(NumericDocValues values, int doc) throws IOException { + int curDocID = values.docID(); + if (doc > curDocID) { + curDocID = values.advance(doc); + } + if (doc == curDocID) { + return Double.longBitsToDouble(values.longValue()); + } else { + return 0.0; + } + } + + @Override + public Object objectVal(int doc) throws IOException { + if (doc < lastDocID) { + throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc); + } + lastDocID = doc; + + double minXValue = getDocValue(minX, doc); + if (minX.docID() != doc) { + return null; + } else { + double minYValue = getDocValue(minY, doc); + double maxXValue = getDocValue(maxX, doc); + double maxYValue = getDocValue(maxY, doc); + rect.reset(minXValue, maxXValue, minYValue, maxYValue); + return rect; + } + } + + @Override + public String strVal(int doc) throws IOException {//TODO support WKT output once Spatial4j does + Object v = objectVal(doc); + return v == null ? null : v.toString(); + } + + @Override + public boolean exists(int doc) throws IOException { + getDocValue(minX, doc); + return minX.docID() == doc; + } + + @Override + public Explanation explain(int doc) throws IOException { + return Explanation.match(Float.NaN, toString(doc)); + } + + @Override + public String toString(int doc) throws IOException { + return description() + '=' + strVal(doc); + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + BBoxValueSource that = (BBoxValueSource) o; + + if (!strategy.equals(that.strategy)) return false; + + return true; + } + + @Override + public int hashCode() { + return strategy.hashCode(); + } +} diff --git a/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java b/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java new file mode 100644 index 00000000000..8685d8824a6 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.legacy; + +import org.apache.lucene.index.NumericDocValues; +import org.locationtech.spatial4j.distance.DistanceCalculator; +import org.locationtech.spatial4j.shape.Point; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.ValueSource; + +import java.io.IOException; +import java.util.Map; + +/** + * An implementation of the Lucene ValueSource model that returns the distance + * for a {@link PointVectorStrategy}. + * + * @lucene.internal + */ +public class DistanceValueSource extends ValueSource { + + private PointVectorStrategy strategy; + private final Point from; + private final double multiplier; + + /** + * Constructor. + */ + public DistanceValueSource(PointVectorStrategy strategy, Point from, double multiplier) { + this.strategy = strategy; + this.from = from; + this.multiplier = multiplier; + } + + /** + * Returns the ValueSource description. + */ + @Override + public String description() { + return "DistanceValueSource("+strategy+", "+from+")"; + } + + /** + * Returns the FunctionValues used by the function query. + */ + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { + LeafReader reader = readerContext.reader(); + + final NumericDocValues ptX = DocValues.getNumeric(reader, strategy.getFieldNameX()); + final NumericDocValues ptY = DocValues.getNumeric(reader, strategy.getFieldNameY()); + + return new FunctionValues() { + + private int lastDocID = -1; + + private final Point from = DistanceValueSource.this.from; + private final DistanceCalculator calculator = strategy.getSpatialContext().getDistCalc(); + private final double nullValue = + (strategy.getSpatialContext().isGeo() ? 180 * multiplier : Double.MAX_VALUE); + + private double getDocValue(NumericDocValues values, int doc) throws IOException { + int curDocID = values.docID(); + if (doc > curDocID) { + curDocID = values.advance(doc); + } + if (doc == curDocID) { + return Double.longBitsToDouble(values.longValue()); + } else { + return 0.0; + } + } + + @Override + public float floatVal(int doc) throws IOException { + return (float) doubleVal(doc); + } + + @Override + public double doubleVal(int doc) throws IOException { + // make sure it has minX and area + double x = getDocValue(ptX, doc); + if (ptX.docID() == doc) { + double y = getDocValue(ptY, doc); + assert ptY.docID() == doc; + return calculator.distance(from, x, y) * multiplier; + } + return nullValue; + } + + @Override + public String toString(int doc) throws IOException { + return description() + "=" + floatVal(doc); + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + DistanceValueSource that = (DistanceValueSource) o; + + if (!from.equals(that.from)) return false; + if (!strategy.equals(that.strategy)) return false; + if (multiplier != that.multiplier) return false; + + return true; + } + + @Override + public int hashCode() { + return from.hashCode(); + } +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java similarity index 92% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java index e98a4f0f567..b6a2897f5c9 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.Document; @@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyFloatField}. * *

      To perform range querying or filtering against a - * LegacyDoubleField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * LegacyDoubleField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. * To sort according to a * LegacyDoubleField, use the normal numeric sort types, eg * {@link org.apache.lucene.search.SortField.Type#DOUBLE}. LegacyDoubleField @@ -85,7 +85,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyFieldType#setNumericPrecisionStep} method if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * org.apache.solr.legacy.LegacyNumericRangeQuery}. * For low cardinality fields larger precision steps are good. * If the cardinality is < 100, it is fair * to use {@link Integer#MAX_VALUE}, which produces one @@ -94,8 +94,8 @@ import org.apache.lucene.index.IndexOptions; *

      For more information on the internals of numeric trie * indexing, including the precisionStep - * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of - * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}. + * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of + * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}. * *

      If you only need to sort by numeric value, and never * run range querying/filtering, you can index using a @@ -103,7 +103,7 @@ import org.apache.lucene.index.IndexOptions; * This will minimize disk space consumed.

      * *

      More advanced users can instead use {@link - * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This + * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This * class is a wrapper around this token stream type for * easier, more intuitive usage.

      * @@ -144,7 +144,7 @@ public final class LegacyDoubleField extends LegacyField { /** Creates a stored or un-stored LegacyDoubleField with the provided value * and default precisionStep {@link - * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). + * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). * @param name field name * @param value 64-bit double value * @param stored Store.YES if the content should also be stored diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyField.java similarity index 98% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyField.java index 87ac0e566cf..7a6bde026a1 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java b/solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java similarity index 95% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java index 1f4b0af4768..a18a00a34a6 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; @@ -60,7 +60,7 @@ public final class LegacyFieldType extends FieldType { /** * LegacyNumericType: if non-null then the field's value will be indexed - * numerically so that {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} can be used at + * numerically so that {@link org.apache.solr.legacy.LegacyNumericRangeQuery} can be used at * search time. *

      * The default is null (no numeric type) @@ -97,7 +97,7 @@ public final class LegacyFieldType extends FieldType { *

      * This has no effect if {@link #numericType()} returns null. *

      - * The default is {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} + * The default is {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} * @see #setNumericPrecisionStep(int) * * @deprecated Please switch to {@link org.apache.lucene.index.PointValues} instead diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java similarity index 92% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java index ea3b84ab65f..79ec0bd441a 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.Document; import org.apache.lucene.document.FloatPoint; @@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyDoubleField}. * *

      To perform range querying or filtering against a - * LegacyFloatField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * LegacyFloatField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. * To sort according to a * LegacyFloatField, use the normal numeric sort types, eg * {@link org.apache.lucene.search.SortField.Type#FLOAT}. LegacyFloatField @@ -83,7 +83,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyFieldType#setNumericPrecisionStep} method if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * org.apache.solr.legacy.LegacyNumericRangeQuery}. * For low cardinality fields larger precision steps are good. * If the cardinality is < 100, it is fair * to use {@link Integer#MAX_VALUE}, which produces one @@ -92,8 +92,8 @@ import org.apache.lucene.index.IndexOptions; *

      For more information on the internals of numeric trie * indexing, including the precisionStep - * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of - * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}. + * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of + * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}. * *

      If you only need to sort by numeric value, and never * run range querying/filtering, you can index using a @@ -101,7 +101,7 @@ import org.apache.lucene.index.IndexOptions; * This will minimize disk space consumed.

      * *

      More advanced users can instead use {@link - * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This + * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This * class is a wrapper around this token stream type for * easier, more intuitive usage.

      * @@ -144,7 +144,7 @@ public final class LegacyFloatField extends LegacyField { /** Creates a stored or un-stored LegacyFloatField with the provided value * and default precisionStep {@link - * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). + * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). * @param name field name * @param value 32-bit double value * @param stored Store.YES if the content should also be stored diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java similarity index 92% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java index e3ae9658b1d..838ad4ecce0 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.Document; @@ -50,7 +50,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyDoubleField}. * *

      To perform range querying or filtering against a - * LegacyIntField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * LegacyIntField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. * To sort according to a * LegacyIntField, use the normal numeric sort types, eg * {@link org.apache.lucene.search.SortField.Type#INT}. LegacyIntField @@ -84,7 +84,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyFieldType#setNumericPrecisionStep} method if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * org.apache.solr.legacy.LegacyNumericRangeQuery}. * For low cardinality fields larger precision steps are good. * If the cardinality is < 100, it is fair * to use {@link Integer#MAX_VALUE}, which produces one @@ -93,8 +93,8 @@ import org.apache.lucene.index.IndexOptions; *

      For more information on the internals of numeric trie * indexing, including the precisionStep - * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of - * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}. + * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of + * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}. * *

      If you only need to sort by numeric value, and never * run range querying/filtering, you can index using a @@ -102,7 +102,7 @@ import org.apache.lucene.index.IndexOptions; * This will minimize disk space consumed.

      * *

      More advanced users can instead use {@link - * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This + * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This * class is a wrapper around this token stream type for * easier, more intuitive usage.

      * @@ -145,7 +145,7 @@ public final class LegacyIntField extends LegacyField { /** Creates a stored or un-stored LegacyIntField with the provided value * and default precisionStep {@link - * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). + * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). * @param name field name * @param value 32-bit integer value * @param stored Store.YES if the content should also be stored diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java similarity index 93% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java index 3e20b448b96..fb4843733ec 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.Document; @@ -61,7 +61,7 @@ import org.apache.lucene.index.IndexOptions; * long value. * *

      To perform range querying or filtering against a - * LegacyLongField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * LegacyLongField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. * To sort according to a * LegacyLongField, use the normal numeric sort types, eg * {@link org.apache.lucene.search.SortField.Type#LONG}. LegacyLongField @@ -95,7 +95,7 @@ import org.apache.lucene.index.IndexOptions; * LegacyFieldType#setNumericPrecisionStep} method if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * org.apache.solr.legacy.LegacyNumericRangeQuery}. * For low cardinality fields larger precision steps are good. * If the cardinality is < 100, it is fair * to use {@link Integer#MAX_VALUE}, which produces one @@ -104,8 +104,8 @@ import org.apache.lucene.index.IndexOptions; *

      For more information on the internals of numeric trie * indexing, including the precisionStep - * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of - * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}. + * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of + * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}. * *

      If you only need to sort by numeric value, and never * run range querying/filtering, you can index using a @@ -113,7 +113,7 @@ import org.apache.lucene.index.IndexOptions; * This will minimize disk space consumed. * *

      More advanced users can instead use {@link - * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This + * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This * class is a wrapper around this token stream type for * easier, more intuitive usage.

      * @@ -154,7 +154,7 @@ public final class LegacyLongField extends LegacyField { /** Creates a stored or un-stored LegacyLongField with the provided value * and default precisionStep {@link - * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). + * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). * @param name field name * @param value 64-bit long value * @param stored Store.YES if the content should also be stored diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java similarity index 95% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java index f172a200779..d07e497da27 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.io.IOException; @@ -41,9 +41,9 @@ import org.apache.lucene.index.Term; // for javadocs /** *

      A {@link Query} that matches numeric values within a * specified range. To use this, you must first index the - * numeric values using {@link org.apache.lucene.legacy.LegacyIntField}, {@link - * org.apache.lucene.legacy.LegacyFloatField}, {@link org.apache.lucene.legacy.LegacyLongField} or {@link org.apache.lucene.legacy.LegacyDoubleField} (expert: {@link - * org.apache.lucene.legacy.LegacyNumericTokenStream}). If your terms are instead textual, + * numeric values using {@link org.apache.solr.legacy.LegacyIntField}, {@link + * org.apache.solr.legacy.LegacyFloatField}, {@link org.apache.solr.legacy.LegacyLongField} or {@link org.apache.solr.legacy.LegacyDoubleField} (expert: {@link + * org.apache.solr.legacy.LegacyNumericTokenStream}). If your terms are instead textual, * you should use {@link TermRangeQuery}.

      * *

      You create a new LegacyNumericRangeQuery with the static @@ -97,7 +97,7 @@ import org.apache.lucene.index.Term; // for javadocs * (all numerical values like doubles, longs, floats, and ints are converted to * lexicographic sortable string representations and stored with different precisions * (for a more detailed description of how the values are stored, - * see {@link org.apache.lucene.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching: + * see {@link org.apache.solr.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching: * The center of the range is searched only with the lowest possible precision in the trie, * while the boundaries are matched more exactly. This reduces the number of terms dramatically.

      * @@ -113,7 +113,7 @@ import org.apache.lucene.index.Term; // for javadocs *

      Precision Step

      *

      You can choose any precisionStep when encoding values. * Lower step values mean more precisions and so more terms in index (and index gets larger). The number - * of indexed terms per value is (those are generated by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}): + * of indexed terms per value is (those are generated by {@link org.apache.solr.legacy.LegacyNumericTokenStream}): *

      *   indexedTermsPerValue = ceil(bitsPerValue / precisionStep) *

      @@ -149,8 +149,8 @@ import org.apache.lucene.index.Term; // for javadocs *
    • Steps ≥64 for long/double and ≥32 for int/float produces one token * per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used * to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as - * precisionStep). Using {@link org.apache.lucene.legacy.LegacyIntField}, - * {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} for sorting + * precisionStep). Using {@link org.apache.solr.legacy.LegacyIntField}, + * {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} for sorting * is ideal, because building the field cache is much faster than with text-only numbers. * These fields have one term per value and therefore also work with term enumeration for building distinct lists * (e.g. facets / preselected values to search for). @@ -205,7 +205,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu /** * Factory that creates a LegacyNumericRangeQuery, that queries a long - * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). + * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -231,7 +231,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu /** * Factory that creates a LegacyNumericRangeQuery, that queries a int - * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). + * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. By setting inclusive to false, it will * match all documents excluding the bounds, with inclusive on, the boundaries are hits, too. @@ -259,7 +259,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu /** * Factory that creates a LegacyNumericRangeQuery, that queries a double - * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). + * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query @@ -289,7 +289,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu /** * Factory that creates a LegacyNumericRangeQuery, that queries a float - * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). + * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8). * You can have half-open ranges (which are in fact </≤ or >/≥ queries) * by setting the min or max value to null. * {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query @@ -390,8 +390,8 @@ public final class LegacyNumericRangeQuery extends MultiTermQu *

      * WARNING: This term enumeration is not guaranteed to be always ordered by * {@link Term#compareTo}. - * The ordering depends on how {@link org.apache.lucene.legacy.LegacyNumericUtils#splitLongRange} and - * {@link org.apache.lucene.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For + * The ordering depends on how {@link org.apache.solr.legacy.LegacyNumericUtils#splitLongRange} and + * {@link org.apache.solr.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For * {@link MultiTermQuery} ordering is not relevant. */ private final class NumericRangeTermsEnum extends FilteredTermsEnum { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java similarity index 95% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java index a2aba19e2ac..c18cd595278 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.util.Objects; @@ -35,10 +35,10 @@ import org.apache.lucene.util.NumericUtils; /** * Expert: This class provides a {@link TokenStream} * for indexing numeric values that can be used by {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * org.apache.solr.legacy.LegacyNumericRangeQuery}. * - *

      Note that for simple usage, {@link org.apache.lucene.legacy.LegacyIntField}, {@link - * org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} is + *

      Note that for simple usage, {@link org.apache.solr.legacy.LegacyIntField}, {@link + * org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} is * recommended. These fields disable norms and * term freqs, as they are not usually needed during * searching. If you need to change these settings, you @@ -81,7 +81,7 @@ import org.apache.lucene.util.NumericUtils; * than one numeric field, use a separate LegacyNumericTokenStream * instance for each.

      * - *

      See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details on the + *

      See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details on the * precisionStep * parameter as well as how numeric fields work under the hood.

      @@ -140,7 +140,7 @@ public final class LegacyNumericTokenStream extends TokenStream { } } - /** Implementation of {@link org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}. + /** Implementation of {@link org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}. * @lucene.internal * @since 4.0 */ @@ -240,7 +240,7 @@ public final class LegacyNumericTokenStream extends TokenStream { /** * Creates a token stream for numeric values using the default precisionStep - * {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized, + * {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized, * before using set a value using the various set???Value() methods. */ public LegacyNumericTokenStream() { diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java similarity index 97% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java index 345b4974b02..8cc3fcc7ed2 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; /** Data type of the numeric value * @since 3.2 diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java similarity index 94% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java index e6659d7e102..52fae9c8171 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java +++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.io.IOException; @@ -43,9 +43,9 @@ import org.apache.lucene.util.BytesRefBuilder; * during encoding. * *

      For easy usage, the trie algorithm is implemented for indexing inside - * {@link org.apache.lucene.legacy.LegacyNumericTokenStream} that can index int, long, + * {@link org.apache.solr.legacy.LegacyNumericTokenStream} that can index int, long, * float, and double. For querying, - * {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} implements the query part + * {@link org.apache.solr.legacy.LegacyNumericRangeQuery} implements the query part * for the same data types. * * @lucene.internal @@ -61,15 +61,15 @@ public final class LegacyNumericUtils { private LegacyNumericUtils() {} // no instance! /** - * The default precision step used by {@link org.apache.lucene.legacy.LegacyLongField}, - * {@link org.apache.lucene.legacy.LegacyDoubleField}, {@link org.apache.lucene.legacy.LegacyNumericTokenStream}, {@link - * org.apache.lucene.legacy.LegacyNumericRangeQuery}. + * The default precision step used by {@link org.apache.solr.legacy.LegacyLongField}, + * {@link org.apache.solr.legacy.LegacyDoubleField}, {@link org.apache.solr.legacy.LegacyNumericTokenStream}, {@link + * org.apache.solr.legacy.LegacyNumericRangeQuery}. */ public static final int PRECISION_STEP_DEFAULT = 16; /** - * The default precision step used by {@link org.apache.lucene.legacy.LegacyIntField} and - * {@link org.apache.lucene.legacy.LegacyFloatField}. + * The default precision step used by {@link org.apache.solr.legacy.LegacyIntField} and + * {@link org.apache.solr.legacy.LegacyFloatField}. */ public static final int PRECISION_STEP_DEFAULT_32 = 8; @@ -101,7 +101,7 @@ public final class LegacyNumericUtils { /** * Returns prefix coded bits after reducing the precision by shift bits. - * This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right @@ -128,7 +128,7 @@ public final class LegacyNumericUtils { /** * Returns prefix coded bits after reducing the precision by shift bits. - * This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right @@ -232,7 +232,7 @@ public final class LegacyNumericUtils { * {@link org.apache.lucene.search.BooleanQuery} for each call to its * {@link LongRangeBuilder#addRange(BytesRef,BytesRef)} * method. - *

      This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + *

      This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. */ public static void splitLongRange(final LongRangeBuilder builder, final int precisionStep, final long minBound, final long maxBound @@ -246,7 +246,7 @@ public final class LegacyNumericUtils { * {@link org.apache.lucene.search.BooleanQuery} for each call to its * {@link IntRangeBuilder#addRange(BytesRef,BytesRef)} * method. - *

      This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. + *

      This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. */ public static void splitIntRange(final IntRangeBuilder builder, final int precisionStep, final int minBound, final int maxBound diff --git a/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java b/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java new file mode 100644 index 00000000000..3b29a61eea8 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.legacy; + +import org.apache.lucene.document.DoubleDocValuesField; +import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.solr.legacy.LegacyDoubleField; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericType; +import org.apache.lucene.queries.function.FunctionRangeQuery; +import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.spatial.SpatialStrategy; +import org.apache.lucene.spatial.query.SpatialArgs; +import org.apache.lucene.spatial.query.SpatialOperation; +import org.apache.lucene.spatial.query.UnsupportedSpatialOperation; +import org.locationtech.spatial4j.context.SpatialContext; +import org.locationtech.spatial4j.shape.Circle; +import org.locationtech.spatial4j.shape.Point; +import org.locationtech.spatial4j.shape.Rectangle; +import org.locationtech.spatial4j.shape.Shape; + +/** + * Simple {@link SpatialStrategy} which represents Points in two numeric fields. + * The Strategy's best feature is decent distance sort. + * + *

      + * Characteristics: + *
      + *

        + *
      • Only indexes points; just one per field value.
      • + *
      • Can query by a rectangle or circle.
      • + *
      • {@link + * org.apache.lucene.spatial.query.SpatialOperation#Intersects} and {@link + * SpatialOperation#IsWithin} is supported.
      • + *
      • Requires DocValues for + * {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} and for + * searching with a Circle.
      • + *
      + * + *

      + * Implementation: + *

      + * This is a simple Strategy. Search works with a pair of range queries on two {@link DoublePoint}s representing + * x & y fields. A Circle query does the same bbox query but adds a + * ValueSource filter on + * {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}. + *

      + * One performance shortcoming with this strategy is that a scenario involving + * both a search using a Circle and sort will result in calculations for the + * spatial distance being done twice -- once for the filter and second for the + * sort. + * + * @lucene.experimental + */ +public class PointVectorStrategy extends SpatialStrategy { + + // note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we + // create more than one Field. + + /** + * pointValues, docValues, and nothing else. + */ + public static FieldType DEFAULT_FIELDTYPE; + + @Deprecated + public static LegacyFieldType LEGACY_FIELDTYPE; + static { + // Default: pointValues + docValues + FieldType type = new FieldType(); + type.setDimensions(1, Double.BYTES);//pointValues (assume Double) + type.setDocValuesType(DocValuesType.NUMERIC);//docValues + type.setStored(false); + type.freeze(); + DEFAULT_FIELDTYPE = type; + // Legacy default: legacyNumerics + LegacyFieldType legacyType = new LegacyFieldType(); + legacyType.setIndexOptions(IndexOptions.DOCS); + legacyType.setNumericType(LegacyNumericType.DOUBLE); + legacyType.setNumericPrecisionStep(8);// same as solr default + legacyType.setDocValuesType(DocValuesType.NONE);//no docValues! + legacyType.setStored(false); + legacyType.freeze(); + LEGACY_FIELDTYPE = legacyType; + } + + public static final String SUFFIX_X = "__x"; + public static final String SUFFIX_Y = "__y"; + + private final String fieldNameX; + private final String fieldNameY; + + private final int fieldsLen; + private final boolean hasStored; + private final boolean hasDocVals; + private final boolean hasPointVals; + // equiv to "hasLegacyNumerics": + private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step. + + /** + * Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery} + */ + public static PointVectorStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) { + return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE); + } + + /** + * Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility. + * However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)} + * since that requires docValues (the legacy config didn't have that). + * + * @deprecated LegacyNumerics will be removed + */ + @Deprecated + public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) { + return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE); + } + + /** + * Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}. + * a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics + * is configurable this way too. + */ + public PointVectorStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) { + super(ctx, fieldNamePrefix); + this.fieldNameX = fieldNamePrefix+SUFFIX_X; + this.fieldNameY = fieldNamePrefix+SUFFIX_Y; + + int numPairs = 0; + if ((this.hasStored = fieldType.stored())) { + numPairs++; + } + if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) { + numPairs++; + } + if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) { + numPairs++; + } + if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) { + if (hasPointVals) { + throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive"); + } + final LegacyFieldType legacyType = (LegacyFieldType) fieldType; + if (legacyType.numericType() != LegacyNumericType.DOUBLE) { + throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType()); + } + numPairs++; + legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED); + legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep()); + legacyNumericFieldType.freeze(); + } else { + legacyNumericFieldType = null; + } + this.fieldsLen = numPairs * 2; + } + + + String getFieldNameX() { + return fieldNameX; + } + + String getFieldNameY() { + return fieldNameY; + } + + @Override + public Field[] createIndexableFields(Shape shape) { + if (shape instanceof Point) + return createIndexableFields((Point) shape); + throw new UnsupportedOperationException("Can only index Point, not " + shape); + } + + /** @see #createIndexableFields(org.locationtech.spatial4j.shape.Shape) */ + public Field[] createIndexableFields(Point point) { + Field[] fields = new Field[fieldsLen]; + int idx = -1; + if (hasStored) { + fields[++idx] = new StoredField(fieldNameX, point.getX()); + fields[++idx] = new StoredField(fieldNameY, point.getY()); + } + if (hasDocVals) { + fields[++idx] = new DoubleDocValuesField(fieldNameX, point.getX()); + fields[++idx] = new DoubleDocValuesField(fieldNameY, point.getY()); + } + if (hasPointVals) { + fields[++idx] = new DoublePoint(fieldNameX, point.getX()); + fields[++idx] = new DoublePoint(fieldNameY, point.getY()); + } + if (legacyNumericFieldType != null) { + fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType); + fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType); + } + assert idx == fields.length - 1; + return fields; + } + + @Override + public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) { + return new DistanceValueSource(this, queryPoint, multiplier); + } + + @Override + public ConstantScoreQuery makeQuery(SpatialArgs args) { + if(! SpatialOperation.is( args.getOperation(), + SpatialOperation.Intersects, + SpatialOperation.IsWithin )) + throw new UnsupportedSpatialOperation(args.getOperation()); + Shape shape = args.getShape(); + if (shape instanceof Rectangle) { + Rectangle bbox = (Rectangle) shape; + return new ConstantScoreQuery(makeWithin(bbox)); + } else if (shape instanceof Circle) { + Circle circle = (Circle)shape; + Rectangle bbox = circle.getBoundingBox(); + Query approxQuery = makeWithin(bbox); + BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + FunctionRangeQuery vsRangeQuery = + new FunctionRangeQuery(makeDistanceValueSource(circle.getCenter()), 0.0, circle.getRadius(), true, true); + bqBuilder.add(approxQuery, BooleanClause.Occur.FILTER);//should have lowest "cost" value; will drive iteration + bqBuilder.add(vsRangeQuery, BooleanClause.Occur.FILTER); + return new ConstantScoreQuery(bqBuilder.build()); + } else { + throw new UnsupportedOperationException("Only Rectangles and Circles are currently supported, " + + "found [" + shape.getClass() + "]");//TODO + } + } + + /** + * Constructs a query to retrieve documents that fully contain the input envelope. + */ + private Query makeWithin(Rectangle bbox) { + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + BooleanClause.Occur MUST = BooleanClause.Occur.MUST; + if (bbox.getCrossesDateLine()) { + //use null as performance trick since no data will be beyond the world bounds + bq.add(rangeQuery(fieldNameX, null/*-180*/, bbox.getMaxX()), BooleanClause.Occur.SHOULD ); + bq.add(rangeQuery(fieldNameX, bbox.getMinX(), null/*+180*/), BooleanClause.Occur.SHOULD ); + bq.setMinimumNumberShouldMatch(1);//must match at least one of the SHOULD + } else { + bq.add(rangeQuery(fieldNameX, bbox.getMinX(), bbox.getMaxX()), MUST); + } + bq.add(rangeQuery(fieldNameY, bbox.getMinY(), bbox.getMaxY()), MUST); + return bq.build(); + } + + /** + * Returns a numeric range query based on FieldType + * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType} + * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields + */ + private Query rangeQuery(String fieldName, Double min, Double max) { + if (hasPointVals) { + if (min == null) { + min = Double.NEGATIVE_INFINITY; + } + + if (max == null) { + max = Double.POSITIVE_INFINITY; + } + + return DoublePoint.newRangeQuery(fieldName, min, max); + + } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0 + return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive + } + //TODO try doc-value range query? + throw new UnsupportedOperationException("An index is required for this operation."); + } +} diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-1.png b/solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-1.png similarity index 100% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-1.png rename to solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-1.png diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-2.png b/solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-2.png similarity index 100% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-2.png rename to solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-2.png diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java b/solr/core/src/java/org/apache/solr/legacy/package-info.java similarity index 96% rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java rename to solr/core/src/java/org/apache/solr/legacy/package-info.java index d0167f80023..df981d0157b 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java +++ b/solr/core/src/java/org/apache/solr/legacy/package-info.java @@ -18,4 +18,4 @@ /** * Deprecated stuff! */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java index ffbbb36dbd7..63b47479905 100644 --- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java @@ -28,6 +28,7 @@ import java.util.TreeMap; import java.util.TreeSet; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.FlattenGraphFilterFactory; // javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.synonym.SynonymFilterFactory; import org.apache.lucene.analysis.synonym.SynonymMap; @@ -50,7 +51,11 @@ import org.slf4j.LoggerFactory; /** * TokenFilterFactory and ManagedResource implementation for * doing CRUD on synonyms using the REST API. + * + * @deprecated Use {@link ManagedSynonymGraphFilterFactory} instead, but be sure to also + * use {@link FlattenGraphFilterFactory} at index time (not at search time) as well. */ +@Deprecated public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java new file mode 100644 index 00000000000..1f4a9707139 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java @@ -0,0 +1,437 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.rest.schema.analysis; +import java.io.IOException; +import java.io.Reader; +import java.lang.invoke.MethodHandles; +import java.text.ParseException; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory; +import org.apache.lucene.analysis.synonym.SynonymMap; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceStorage.StorageIO; +import org.restlet.data.Status; +import org.restlet.resource.ResourceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * TokenFilterFactory and ManagedResource implementation for + * doing CRUD on synonyms using the REST API. + */ +public class ManagedSynonymGraphFilterFactory extends BaseManagedTokenFilterFactory { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public static final String SYNONYM_MAPPINGS = "synonymMappings"; + public static final String IGNORE_CASE_INIT_ARG = "ignoreCase"; + + /** + * Used internally to preserve the case of synonym mappings regardless + * of the ignoreCase setting. + */ + private static class CasePreservedSynonymMappings { + Map> mappings = new TreeMap<>(); + + /** + * Provides a view of the mappings for a given term; specifically, if + * ignoreCase is true, then the returned "view" contains the mappings + * for all known cases of the term, if it is false, then only the + * mappings for the specific case is returned. + */ + Set getMappings(boolean ignoreCase, String key) { + Set synMappings = null; + if (ignoreCase) { + // TODO: should we return the mapped values in all lower-case here? + if (mappings.size() == 1) { + // if only one in the map (which is common) just return it directly + return mappings.values().iterator().next(); + } + + synMappings = new TreeSet<>(); + for (Set next : mappings.values()) + synMappings.addAll(next); + } else { + synMappings = mappings.get(key); + } + return synMappings; + } + + public String toString() { + return mappings.toString(); + } + } + + /** + * ManagedResource implementation for synonyms, which are so specialized that + * it makes sense to implement this class as an inner class as it has little + * application outside the SynonymFilterFactory use cases. + */ + public static class SynonymManager extends ManagedResource + implements ManagedResource.ChildResourceSupport + { + protected Map synonymMappings; + + public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + } + + @SuppressWarnings("unchecked") + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, Object managedData) + throws SolrException + { + NamedList initArgs = (NamedList)managedInitArgs; + + String format = (String)initArgs.get("format"); + if (format != null && !"solr".equals(format)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "Invalid format "+ + format+"! Only 'solr' is supported."); + } + + // the default behavior is to not ignore case, + // so if not supplied, then install the default + if (initArgs.get(IGNORE_CASE_INIT_ARG) == null) { + initArgs.add(IGNORE_CASE_INIT_ARG, Boolean.FALSE); + } + + boolean ignoreCase = getIgnoreCase(managedInitArgs); + synonymMappings = new TreeMap<>(); + if (managedData != null) { + Map storedSyns = (Map)managedData; + for (String key : storedSyns.keySet()) { + + String caseKey = applyCaseSetting(ignoreCase, key); + CasePreservedSynonymMappings cpsm = synonymMappings.get(caseKey); + if (cpsm == null) { + cpsm = new CasePreservedSynonymMappings(); + synonymMappings.put(caseKey, cpsm); + } + + // give the nature of our JSON parsing solution, we really have + // no guarantees on what is in the file + Object mapping = storedSyns.get(key); + if (!(mapping instanceof List)) { + throw new SolrException(ErrorCode.SERVER_ERROR, + "Invalid synonym file format! Expected a list of synonyms for "+key+ + " but got "+mapping.getClass().getName()); + } + + Set sortedVals = new TreeSet<>(); + sortedVals.addAll((List)storedSyns.get(key)); + cpsm.mappings.put(key, sortedVals); + } + } + log.info("Loaded {} synonym mappings for {}", synonymMappings.size(), getResourceId()); + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + boolean ignoreCase = getIgnoreCase(); + boolean madeChanges = false; + if (updates instanceof List) { + madeChanges = applyListUpdates((List)updates, ignoreCase); + } else if (updates instanceof Map) { + madeChanges = applyMapUpdates((Map)updates, ignoreCase); + } else { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, + "Unsupported data format (" + updates.getClass().getName() + "); expected a JSON object (Map or List)!"); + } + return madeChanges ? getStoredView() : null; + } + + protected boolean applyListUpdates(List jsonList, boolean ignoreCase) { + boolean madeChanges = false; + for (String term : jsonList) { + // find the mappings using the case aware key + String origTerm = term; + term = applyCaseSetting(ignoreCase, term); + CasePreservedSynonymMappings cpsm = synonymMappings.get(term); + if (cpsm == null) + cpsm = new CasePreservedSynonymMappings(); + + Set treeTerms = new TreeSet<>(); + treeTerms.addAll(jsonList); + cpsm.mappings.put(origTerm, treeTerms); + madeChanges = true; + // only add the cpsm to the synonymMappings if it has valid data + if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) { + synonymMappings.put(term, cpsm); + } + } + return madeChanges; + } + + protected boolean applyMapUpdates(Map jsonMap, boolean ignoreCase) { + boolean madeChanges = false; + + for (String term : jsonMap.keySet()) { + + String origTerm = term; + term = applyCaseSetting(ignoreCase, term); + + // find the mappings using the case aware key + CasePreservedSynonymMappings cpsm = synonymMappings.get(term); + if (cpsm == null) + cpsm = new CasePreservedSynonymMappings(); + + Set output = cpsm.mappings.get(origTerm); + + Object val = jsonMap.get(origTerm); // IMPORTANT: use the original + if (val instanceof String) { + String strVal = (String)val; + + if (output == null) { + output = new TreeSet<>(); + cpsm.mappings.put(origTerm, output); + } + + if (output.add(strVal)) { + madeChanges = true; + } + } else if (val instanceof List) { + List vals = (List)val; + + if (output == null) { + output = new TreeSet<>(); + cpsm.mappings.put(origTerm, output); + } + + for (String nextVal : vals) { + if (output.add(nextVal)) { + madeChanges = true; + } + } + + } else { + throw new ResourceException(Status.CLIENT_ERROR_BAD_REQUEST, "Unsupported value "+val+ + " for "+term+"; expected single value or a JSON array!"); + } + + // only add the cpsm to the synonymMappings if it has valid data + if (!synonymMappings.containsKey(term) && cpsm.mappings.get(origTerm) != null) { + synonymMappings.put(term, cpsm); + } + } + + return madeChanges; + } + + /** + * Returns a Map of how we store and load data managed by this resource, + * which is different than how it is managed at runtime in order to support + * the ignoreCase setting. + */ + protected Map> getStoredView() { + Map> storedView = new TreeMap<>(); + for (CasePreservedSynonymMappings cpsm : synonymMappings.values()) { + for (String key : cpsm.mappings.keySet()) { + storedView.put(key, cpsm.mappings.get(key)); + } + } + return storedView; + } + + protected String applyCaseSetting(boolean ignoreCase, String str) { + return (ignoreCase && str != null) ? str.toLowerCase(Locale.ROOT) : str; + } + + public boolean getIgnoreCase() { + return getIgnoreCase(managedInitArgs); + } + + public boolean getIgnoreCase(NamedList initArgs) { + Boolean ignoreCase = initArgs.getBooleanArg(IGNORE_CASE_INIT_ARG); + // ignoreCase = false by default + return null == ignoreCase ? false : ignoreCase; + } + + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + SolrQueryResponse response = endpoint.getSolrResponse(); + if (childId != null) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + + // if ignoreCase==true, then we get the mappings using the lower-cased key + // and then return a union of all case-sensitive keys, if false, then + // we only return the mappings for the exact case requested + CasePreservedSynonymMappings cpsm = synonymMappings.get(key); + Set mappings = (cpsm != null) ? cpsm.getMappings(ignoreCase, childId) : null; + if (mappings == null) + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + + response.add(childId, mappings); + } else { + response.add(SYNONYM_MAPPINGS, buildMapToStore(getStoredView())); + } + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + boolean ignoreCase = getIgnoreCase(); + String key = applyCaseSetting(ignoreCase, childId); + + CasePreservedSynonymMappings cpsm = synonymMappings.get(key); + if (cpsm == null) + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + + if (ignoreCase) { + // delete all mappings regardless of case + synonymMappings.remove(key); + } else { + // just delete the mappings for the specific case-sensitive key + if (cpsm.mappings.containsKey(childId)) { + cpsm.mappings.remove(childId); + + if (cpsm.mappings.isEmpty()) + synonymMappings.remove(key); + } else { + throw new SolrException(ErrorCode.NOT_FOUND, + String.format(Locale.ROOT, "%s not found in %s", childId, getResourceId())); + } + } + + // store the updated data (using the stored view) + storeManagedData(getStoredView()); + + log.info("Removed synonym mappings for: {}", childId); + } + } + + /** + * Custom SynonymMap.Parser implementation that provides synonym + * mappings from the managed JSON in this class during SynonymMap + * building. + */ + private class ManagedSynonymParser extends SynonymMap.Parser { + + SynonymManager synonymManager; + + public ManagedSynonymParser(SynonymManager synonymManager, boolean dedup, Analyzer analyzer) { + super(dedup, analyzer); + this.synonymManager = synonymManager; + } + + /** + * Add the managed synonyms and their mappings into the SynonymMap builder. + */ + @Override + public void parse(Reader in) throws IOException, ParseException { + boolean ignoreCase = synonymManager.getIgnoreCase(); + for (CasePreservedSynonymMappings cpsm : synonymManager.synonymMappings.values()) { + for (String term : cpsm.mappings.keySet()) { + for (String mapping : cpsm.mappings.get(term)) { + // apply the case setting to match the behavior of the SynonymMap builder + CharsRef casedTerm = analyze(synonymManager.applyCaseSetting(ignoreCase, term), new CharsRefBuilder()); + CharsRef casedMapping = analyze(synonymManager.applyCaseSetting(ignoreCase, mapping), new CharsRefBuilder()); + add(casedTerm, casedMapping, false); + } + } + } + } + } + + protected SynonymGraphFilterFactory delegate; + + public ManagedSynonymGraphFilterFactory(Map args) { + super(args); + } + + @Override + public String getResourceId() { + return "/schema/analysis/synonyms/"+handle; + } + + protected Class getManagedResourceImplClass() { + return SynonymManager.class; + } + + /** + * Called once, during core initialization, to initialize any analysis components + * that depend on the data managed by this resource. It is important that the + * analysis component is only initialized once during core initialization so that + * text analysis is consistent, especially in a distributed environment, as we + * don't want one server applying a different set of stop words than other servers. + */ + @SuppressWarnings("unchecked") + @Override + public void onManagedResourceInitialized(NamedList initArgs, final ManagedResource res) + throws SolrException + { + NamedList args = (NamedList)initArgs; + args.add("synonyms", getResourceId()); + args.add("expand", "false"); + args.add("format", "solr"); + + Map filtArgs = new HashMap<>(); + for (Map.Entry entry : args) { + filtArgs.put(entry.getKey(), entry.getValue().toString()); + } + // create the actual filter factory that pulls the synonym mappings + // from synonymMappings using a custom parser implementation + delegate = new SynonymGraphFilterFactory(filtArgs) { + @Override + protected SynonymMap loadSynonyms + (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) + throws IOException, ParseException { + + ManagedSynonymParser parser = + new ManagedSynonymParser((SynonymManager)res, dedup, analyzer); + // null is safe here because there's no actual parsing done against a input Reader + parser.parse(null); + return parser.build(); + } + }; + try { + delegate.inform(res.getResourceLoader()); + } catch (IOException e) { + throw new SolrException(ErrorCode.SERVER_ERROR, e); + } + } + + @Override + public TokenStream create(TokenStream input) { + if (delegate == null) + throw new IllegalStateException(this.getClass().getName()+ + " not initialized correctly! The SynonymFilterFactory delegate was not initialized."); + + return delegate.create(input); + } +} diff --git a/solr/core/src/java/org/apache/solr/schema/BBoxField.java b/solr/core/src/java/org/apache/solr/schema/BBoxField.java index d69255bb55c..4d773c96ac4 100644 --- a/solr/core/src/java/org/apache/solr/schema/BBoxField.java +++ b/solr/core/src/java/org/apache/solr/schema/BBoxField.java @@ -23,10 +23,10 @@ import java.util.List; import java.util.Map; import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyFieldType; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource; -import org.apache.lucene.spatial.bbox.BBoxStrategy; +import org.apache.solr.legacy.BBoxStrategy; import org.apache.lucene.spatial.query.SpatialArgs; import org.apache.lucene.spatial.util.ShapeAreaValueSource; import org.apache.solr.common.SolrException; diff --git a/solr/core/src/java/org/apache/solr/schema/EnumField.java b/solr/core/src/java/org/apache/solr/schema/EnumField.java index 3127262548a..f023805f880 100644 --- a/solr/core/src/java/org/apache/solr/schema/EnumField.java +++ b/solr/core/src/java/org/apache/solr/schema/EnumField.java @@ -35,11 +35,11 @@ import javax.xml.xpath.XPathFactory; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexableField; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericType; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericType; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.EnumFieldSource; import org.apache.lucene.search.ConstantScoreQuery; diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index 13385b73ee2..04f64d5d7a3 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -373,7 +373,7 @@ public class IndexSchema { void persist(Writer writer) throws IOException { final SolrQueryResponse response = new SolrQueryResponse(); response.add(IndexSchema.SCHEMA, getNamedPropertyValues()); - final NamedList args = new NamedList(Arrays.asList("indent", "on")); + final SolrParams args = (new ModifiableSolrParams()).set("indent", "on"); final LocalSolrQueryRequest req = new LocalSolrQueryRequest(null, args); final SchemaXmlWriter schemaXmlWriter = new SchemaXmlWriter(writer, req, response); schemaXmlWriter.setEmitManagedSchemaDoNotEditWarning(true); diff --git a/solr/core/src/java/org/apache/solr/schema/SpatialPointVectorFieldType.java b/solr/core/src/java/org/apache/solr/schema/SpatialPointVectorFieldType.java index ef05f18e283..64e42eff5e9 100644 --- a/solr/core/src/java/org/apache/solr/schema/SpatialPointVectorFieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/SpatialPointVectorFieldType.java @@ -20,8 +20,8 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.spatial.vector.PointVectorStrategy; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.PointVectorStrategy; /** * @see PointVectorStrategy diff --git a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java index b610e6ecccf..e9e7779c486 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.DoubleDocValues; diff --git a/solr/core/src/java/org/apache/solr/schema/TrieField.java b/solr/core/src/java/org/apache/solr/schema/TrieField.java index e7a33bdb64f..f90877cbfdd 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieField.java @@ -30,14 +30,14 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyFloatField; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericType; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyDoubleField; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyFloatField; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericType; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; @@ -63,9 +63,9 @@ import org.slf4j.LoggerFactory; /** * Provides field types to support for Lucene's {@link - * org.apache.lucene.legacy.LegacyIntField}, {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} and - * {@link org.apache.lucene.legacy.LegacyDoubleField}. - * See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details. + * org.apache.solr.legacy.LegacyIntField}, {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} and + * {@link org.apache.solr.legacy.LegacyDoubleField}. + * See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details. * It supports integer, float, long, double and date types. *

      * For each number being added to this field, multiple terms are generated as per the algorithm described in the above @@ -78,7 +78,7 @@ import org.slf4j.LoggerFactory; * generated, range search will be no faster than any other number field, but sorting will still be possible. * * - * @see org.apache.lucene.legacy.LegacyNumericRangeQuery + * @see org.apache.solr.legacy.LegacyNumericRangeQuery * @since solr 1.4 */ public class TrieField extends NumericFieldType { diff --git a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java index b06981010b5..57efa754f05 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.FloatDocValues; diff --git a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java index 6d4d7cd85ad..1a9f486854a 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.IntDocValues; diff --git a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java index a93d0ce12c6..56b964ff704 100644 --- a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java +++ b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.LongDocValues; diff --git a/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java b/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java index a2d0ed6eff2..931634fcc87 100644 --- a/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java +++ b/solr/core/src/java/org/apache/solr/search/LegacyNumericRangeQueryBuilder.java @@ -17,8 +17,8 @@ package org.apache.solr.search; import org.apache.lucene.search.Query; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queryparser.xml.DOMUtils; import org.apache.lucene.queryparser.xml.ParserException; import org.apache.lucene.queryparser.xml.QueryBuilder; @@ -26,10 +26,10 @@ import org.apache.lucene.queryparser.xml.builders.PointRangeQueryBuilder; import org.w3c.dom.Element; /** - * Creates a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The table below specifies the required + * Creates a {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The table below specifies the required * attributes and the defaults if optional attributes are omitted. For more * detail on what each of the attributes actually do, consult the documentation - * for {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}: + * for {@link org.apache.solr.legacy.LegacyNumericRangeQuery}: * * * diff --git a/solr/core/src/java/org/apache/solr/search/QueryParsing.java b/solr/core/src/java/org/apache/solr/search/QueryParsing.java index 381276c9672..bbce610f4a1 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/core/src/java/org/apache/solr/search/QueryParsing.java @@ -17,7 +17,7 @@ package org.apache.solr.search; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; diff --git a/solr/core/src/java/org/apache/solr/search/QueryWrapperFilter.java b/solr/core/src/java/org/apache/solr/search/QueryWrapperFilter.java index d526cf394ab..fa6e87cdcaf 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryWrapperFilter.java +++ b/solr/core/src/java/org/apache/solr/search/QueryWrapperFilter.java @@ -34,7 +34,7 @@ import org.apache.lucene.util.Bits; * Constrains search results to only match those which also match a provided * query. * - *

      This could be used, for example, with a {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} on a suitably + *

      This could be used, for example, with a {@link org.apache.solr.legacy.LegacyNumericRangeQuery} on a suitably * formatted date field to implement date filtering. One could re-use a single * CachingWrapperFilter(QueryWrapperFilter) that matches, e.g., only documents modified * within the last week. This would only need to be reconstructed once per day. diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java index ab13f7244b5..18896e08cff 100644 --- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java +++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java @@ -52,7 +52,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor */ @Override public void process(ResponseBuilder rb, ShardRequest shardRequest) { - SortSpec ss = rb.getSortSpec(); + SortSpec groupSortSpec = rb.getGroupingSpec().getGroupSortSpec(); Sort groupSort = rb.getGroupingSpec().getGroupSort(); final String[] fields = rb.getGroupingSpec().getFields(); Sort withinGroupSort = rb.getGroupingSpec().getSortWithinGroup(); @@ -144,7 +144,7 @@ public class SearchGroupShardResponseProcessor implements ShardResponseProcessor rb.firstPhaseElapsedTime = maxElapsedTime; for (String groupField : commandSearchGroups.keySet()) { List>> topGroups = commandSearchGroups.get(groupField); - Collection> mergedTopGroups = SearchGroup.merge(topGroups, ss.getOffset(), ss.getCount(), groupSort); + Collection> mergedTopGroups = SearchGroup.merge(topGroups, groupSortSpec.getOffset(), groupSortSpec.getCount(), groupSort); if (mergedTopGroups == null) { continue; } diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java index 3ff432da1c4..17b7d3b04b5 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java @@ -25,7 +25,7 @@ import java.util.regex.Pattern; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.mlt.MoreLikeThis; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java index dea161dbb00..cc87e0983d0 100644 --- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java +++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java @@ -16,7 +16,7 @@ */ package org.apache.solr.search.mlt; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.mlt.MoreLikeThis; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java index 89e6f0b152f..87f5f4ca3c7 100644 --- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java +++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -159,8 +159,8 @@ public interface FieldCache { }; /** - * A parser instance for int values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed - * via {@link org.apache.lucene.legacy.LegacyIntField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * A parser instance for int values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.solr.legacy.LegacyIntField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}. * @deprecated Index with points and use {@link #INT_POINT_PARSER} instead. */ @Deprecated @@ -182,8 +182,8 @@ public interface FieldCache { }; /** - * A parser instance for float values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed - * via {@link org.apache.lucene.legacy.LegacyFloatField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * A parser instance for float values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.solr.legacy.LegacyFloatField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}. * @deprecated Index with points and use {@link #FLOAT_POINT_PARSER} instead. */ @Deprecated @@ -207,8 +207,8 @@ public interface FieldCache { }; /** - * A parser instance for long values encoded by {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed - * via {@link org.apache.lucene.legacy.LegacyLongField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * A parser instance for long values encoded by {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.solr.legacy.LegacyLongField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}. * @deprecated Index with points and use {@link #LONG_POINT_PARSER} instead. */ @Deprecated @@ -229,8 +229,8 @@ public interface FieldCache { }; /** - * A parser instance for double values encoded with {@link org.apache.lucene.legacy.LegacyNumericUtils}, e.g. when indexed - * via {@link org.apache.lucene.legacy.LegacyDoubleField}/{@link org.apache.lucene.legacy.LegacyNumericTokenStream}. + * A parser instance for double values encoded with {@link org.apache.solr.legacy.LegacyNumericUtils}, e.g. when indexed + * via {@link org.apache.solr.legacy.LegacyDoubleField}/{@link org.apache.solr.legacy.LegacyNumericTokenStream}. * @deprecated Index with points and use {@link #DOUBLE_POINT_PARSER} instead. */ @Deprecated @@ -277,7 +277,7 @@ public interface FieldCache { * @param parser * Computes long for string values. May be {@code null} if the * requested field was indexed as {@link NumericDocValuesField} or - * {@link org.apache.lucene.legacy.LegacyLongField}. + * {@link org.apache.solr.legacy.LegacyLongField}. * @return The values in the given field for each document. * @throws IOException * If any error occurs. diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java index 7006b4a290c..7158e316380 100644 --- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java +++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java @@ -87,7 +87,7 @@ public class UninvertingReader extends FilterLeafReader { */ DOUBLE_POINT, /** - * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField}) + * Single-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField}) *

      * Fields with this type act as if they were indexed with * {@link NumericDocValuesField}. @@ -96,7 +96,7 @@ public class UninvertingReader extends FilterLeafReader { @Deprecated LEGACY_INTEGER, /** - * Single-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField}) + * Single-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField}) *

      * Fields with this type act as if they were indexed with * {@link NumericDocValuesField}. @@ -105,7 +105,7 @@ public class UninvertingReader extends FilterLeafReader { @Deprecated LEGACY_LONG, /** - * Single-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField}) + * Single-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField}) *

      * Fields with this type act as if they were indexed with * {@link NumericDocValuesField}. @@ -114,7 +114,7 @@ public class UninvertingReader extends FilterLeafReader { @Deprecated LEGACY_FLOAT, /** - * Single-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField}) + * Single-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField}) *

      * Fields with this type act as if they were indexed with * {@link NumericDocValuesField}. @@ -144,28 +144,28 @@ public class UninvertingReader extends FilterLeafReader { */ SORTED_SET_BINARY, /** - * Multi-valued Integer, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyIntField}) + * Multi-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField}) *

      * Fields with this type act as if they were indexed with * {@link SortedSetDocValuesField}. */ SORTED_SET_INTEGER, /** - * Multi-valued Float, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyFloatField}) + * Multi-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField}) *

      * Fields with this type act as if they were indexed with * {@link SortedSetDocValuesField}. */ SORTED_SET_FLOAT, /** - * Multi-valued Long, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyLongField}) + * Multi-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField}) *

      * Fields with this type act as if they were indexed with * {@link SortedSetDocValuesField}. */ SORTED_SET_LONG, /** - * Multi-valued Double, (e.g. indexed with {@link org.apache.lucene.legacy.LegacyDoubleField}) + * Multi-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField}) *

      * Fields with this type act as if they were indexed with * {@link SortedSetDocValuesField}. diff --git a/solr/core/src/java/org/apache/solr/update/PeerSync.java b/solr/core/src/java/org/apache/solr/update/PeerSync.java index dfadb0cafdb..7371a943cdc 100644 --- a/solr/core/src/java/org/apache/solr/update/PeerSync.java +++ b/solr/core/src/java/org/apache/solr/update/PeerSync.java @@ -65,7 +65,12 @@ import static org.apache.solr.common.params.CommonParams.ID; import static org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase.FROMLEADER; import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; -/** @lucene.experimental */ +/** + * This class is useful for performing peer to peer synchronization of recently indexed update commands during + * recovery process. + * + * @lucene.experimental + */ public class PeerSync implements SolrMetricProducer { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private boolean debug = log.isDebugEnabled(); diff --git a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java index dac40001160..5772b2ef52c 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java +++ b/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java @@ -51,7 +51,9 @@ import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; - +/** + * Used for distributing commands from a shard leader to its replicas. + */ public class SolrCmdDistributor implements Closeable { private static final int MAX_RETRIES_ON_FORWARD = 25; private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); diff --git a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java index d484e85b13e..26b98396d15 100644 --- a/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java +++ b/solr/core/src/java/org/apache/solr/update/SolrIndexConfig.java @@ -191,7 +191,7 @@ public class SolrIndexConfig implements MapSerializable { } mergedSegmentWarmerInfo = getPluginInfo(prefix + "/mergedSegmentWarmer", solrConfig, def.mergedSegmentWarmerInfo); - assertWarnOrFail("Begining with Solr 5.0, option is no longer supported and should be removed from solrconfig.xml (these integrity checks are now automatic)", + assertWarnOrFail("Beginning with Solr 5.0, option is no longer supported and should be removed from solrconfig.xml (these integrity checks are now automatic)", (null == solrConfig.getNode(prefix + "/checkIntegrityAtMerge", false)), true); } diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java b/solr/core/src/java/org/apache/solr/update/UpdateLog.java index 87b93f4a517..bb7f5f5a4f2 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java @@ -80,7 +80,12 @@ import static org.apache.solr.update.processor.DistributedUpdateProcessor.Distri import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM; -/** @lucene.experimental */ +/** + * This holds references to the transaction logs and pointers for the document IDs to their + * exact positions in the transaction logs. + * + * @lucene.experimental + */ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { private static final long STATUS_TIME = TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS); public static String LOG_FILENAME_PATTERN = "%s.%019d"; @@ -143,7 +148,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer { /** * The index of the _version_ value in an entry from the transaction log. */ -public static final int VERSION_IDX = 1; + public static final int VERSION_IDX = 1; /** * The index of the previous pointer in an entry from the transaction log. @@ -204,6 +209,9 @@ public static final int VERSION_IDX = 1; } }; + /** + * Holds the query and the version for a DeleteByQuery command + */ public static class DBQ { public String q; // the query string public long version; // positive version of the DBQ diff --git a/solr/core/src/java/org/apache/solr/update/VersionInfo.java b/solr/core/src/java/org/apache/solr/update/VersionInfo.java index 061e7f6038b..67b40420cf9 100644 --- a/solr/core/src/java/org/apache/solr/update/VersionInfo.java +++ b/solr/core/src/java/org/apache/solr/update/VersionInfo.java @@ -25,7 +25,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Terms; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.IndexSearcher; diff --git a/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java b/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java index c4234cbd20b..9c2d08d9aae 100644 --- a/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java +++ b/solr/core/src/java/org/apache/solr/update/processor/DocExpirationUpdateProcessorFactory.java @@ -388,7 +388,7 @@ public final class DocExpirationUpdateProcessorFactory // No-Op return; } - log.info("Begining periodic deletion of expired docs"); + log.info("Beginning periodic deletion of expired docs"); UpdateRequestProcessorChain chain = core.getUpdateProcessingChain(deleteChainName); UpdateRequestProcessor proc = chain.createProcessor(req, rsp); diff --git a/solr/core/src/java/org/apache/solr/util/FileUtils.java b/solr/core/src/java/org/apache/solr/util/FileUtils.java index 09db4f0f686..20462625d55 100644 --- a/solr/core/src/java/org/apache/solr/util/FileUtils.java +++ b/solr/core/src/java/org/apache/solr/util/FileUtils.java @@ -18,6 +18,10 @@ package org.apache.solr.util; import java.io.*; import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.io.FileExistsException; /** * @@ -96,4 +100,20 @@ public class FileUtils { public static boolean fileExists(String filePathString) { return new File(filePathString).exists(); } + + // Files.createDirectories has odd behavior if the path is a symlink and it already exists + // _even if it's a symlink to a directory_. + // + // oddly, if the path to be created just contains a symlink in intermediate levels, Files.createDirectories + // works just fine. + // + // This works around that issue + public static Path createDirectories(Path path) throws IOException { + if (Files.exists(path) && Files.isSymbolicLink(path)) { + Path real = path.toRealPath(); + if (Files.isDirectory(real)) return real; + throw new FileExistsException("Tried to create a directory at to an existing non-directory symlink: " + path.toString()); + } + return Files.createDirectories(path); + } } diff --git a/solr/core/src/java/org/apache/solr/util/SolrCLI.java b/solr/core/src/java/org/apache/solr/util/SolrCLI.java index 97fdf1ea44e..51ab5d7f5dd 100644 --- a/solr/core/src/java/org/apache/solr/util/SolrCLI.java +++ b/solr/core/src/java/org/apache/solr/util/SolrCLI.java @@ -2941,7 +2941,7 @@ public class SolrCLI { solrHome = solrHome.substring(cwdPath.length()+1); String startCmd = - String.format(Locale.ROOT, "%s start %s -p %d -s \"%s\" %s %s %s %s %s %s", + String.format(Locale.ROOT, "\"%s\" start %s -p %d -s \"%s\" %s %s %s %s %s %s", callScript, cloudModeArg, port, solrHome, hostArg, zkHostArg, memArg, forceArg, extraArgs, addlOptsArg); startCmd = startCmd.replaceAll("\\s+", " ").trim(); // for pretty printing diff --git a/solr/core/src/test-files/solr/collection1/conf/hyphenation.dtd b/solr/core/src/test-files/solr/collection1/conf/hyphenation.dtd index 15bb8ca60ed..fb3db16cf67 100644 --- a/solr/core/src/test-files/solr/collection1/conf/hyphenation.dtd +++ b/solr/core/src/test-files/solr/collection1/conf/hyphenation.dtd @@ -54,7 +54,7 @@ diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml index 3233258457e..58b05eb1cb8 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml @@ -511,6 +511,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml b/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml new file mode 100644 index 00000000000..f3d3196dc82 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml @@ -0,0 +1,150 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + id + + + diff --git a/solr/core/src/test-files/solr/solr-shardhandler-loadBalancerRequests.xml b/solr/core/src/test-files/solr/solr-shardhandler-loadBalancerRequests.xml new file mode 100644 index 00000000000..92339d9befb --- /dev/null +++ b/solr/core/src/test-files/solr/solr-shardhandler-loadBalancerRequests.xml @@ -0,0 +1,23 @@ + + + + + ${solr.tests.loadBalancerRequestsMinimumAbsolute:0} + ${solr.tests.loadBalancerRequestsMaximumFraction:1.0} + + diff --git a/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java b/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java index a6d130ea4c9..f86322d779d 100644 --- a/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java +++ b/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java @@ -244,6 +244,7 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa // may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more) restore.setMaxShardsPerNode((int)Math.ceil(backupCollection.getReplicas().size()/cluster.getJettySolrRunners().size())); } + if (rarely()) { // Try with createNodeSet configuration int nodeSetSize = cluster.getJettySolrRunners().size() / 2; @@ -255,7 +256,11 @@ public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCa restore.setCreateNodeSet(String.join(",", nodeStrs)); restore.setCreateNodeSetShuffle(usually()); // we need to double maxShardsPerNode value since we reduced number of available nodes by half. - restore.setMaxShardsPerNode(origShardToDocCount.size() * 2); + if (restore.getMaxShardsPerNode() != null) { + restore.setMaxShardsPerNode(restore.getMaxShardsPerNode() * 2); + } else { + restore.setMaxShardsPerNode(origShardToDocCount.size() * 2); + } } Properties props = new Properties(); diff --git a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java index f2e88453242..e4859c005e5 100644 --- a/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderWithPullReplicasTest.java @@ -27,6 +27,7 @@ import org.apache.solr.SolrTestCaseJ4.SuppressObjectReleaseTracker; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; @@ -234,7 +235,7 @@ public class ChaosMonkeySafeLeaderWithPullReplicasTest extends AbstractFullDistr try { del("*:*"); break; - } catch (SolrServerException e) { + } catch (SolrServerException | SolrException e) { // cluster may not be up yet e.printStackTrace(); } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java b/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java index 6c20ccc113c..875c0ef10ba 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestConfigSetsAPI.java @@ -279,8 +279,7 @@ public class TestConfigSetsAPI extends SolrTestCaseJ4 { @Test public void testUploadErrors() throws Exception { - final SolrClient solrClient = new HttpSolrClient( - solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString()); + final SolrClient solrClient = getHttpSolrClient(solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString()); ByteBuffer emptyData = ByteBuffer.allocate(0); @@ -504,7 +503,7 @@ public class TestConfigSetsAPI extends SolrTestCaseJ4 { private void xsltRequest(String collection) throws SolrServerException, IOException { String baseUrl = solrCluster.getJettySolrRunners().get(0).getBaseUrl().toString(); - try (HttpSolrClient client = new HttpSolrClient(baseUrl + "/" + collection)) { + try (HttpSolrClient client = getHttpSolrClient(baseUrl + "/" + collection)) { String xml = "" + " " + diff --git a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java index a53b39f5f6d..98f952a50b0 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudClusterSSL.java @@ -332,7 +332,7 @@ public class TestMiniSolrCloudClusterSSL extends SolrTestCaseJ4 { // that "optimize" the test client construction in a way that would prevent us from finding bugs with // regular HttpSolrClient instantiation. if (random().nextBoolean()) { - return new HttpSolrClient(url); + return (new HttpSolrClient.Builder(url)).build(); } // else... return getHttpSolrClient(url); } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java index cb732ff5741..cb0603df075 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java @@ -29,6 +29,7 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrServerException; @@ -116,9 +117,22 @@ public class TestPullReplica extends SolrCloudTestCase { @Repeat(iterations=2) // 2 times to make sure cleanup is complete and we can create the same collection public void testCreateDelete() throws Exception { try { - CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1, 0, 3) - .setMaxShardsPerNode(100) - .process(cluster.getSolrClient()); + if (random().nextBoolean()) { + CollectionAdminRequest.createCollection(collectionName, "conf", 2, 1, 0, 3) + .setMaxShardsPerNode(100) + .process(cluster.getSolrClient()); + } else { + // Sometimes don't use SolrJ. + String url = String.format(Locale.ROOT, "%s/admin/collections?action=CREATE&name=%s&numShards=%s&pullReplicas=%s&maxShardsPerNode=%s", + cluster.getRandomJetty(random()).getBaseUrl(), + collectionName, + 2, // numShards + 3, // pullReplicas + 100); // maxShardsPerNode + url = url + pickRandom("", "&nrtReplicas=1", "&replicationFactor=1"); // These options should all mean the same + HttpGet createCollectionRequest = new HttpGet(url); + cluster.getSolrClient().getHttpClient().execute(createCollectionRequest); + } boolean reloaded = false; while (true) { DocCollection docCollection = getCollectionState(collectionName); diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java index f8f3f7e46bd..ddbbb649f72 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestSolrCloudWithDelegationTokens.java @@ -394,7 +394,7 @@ public class TestSolrCloudWithDelegationTokens extends SolrTestCaseJ4 { } ss = new HttpSolrClient.Builder(solrClientPrimary.getBaseURL().toString()) - .withDelegationToken(token) + .withKerberosDelegationToken(token) .withResponseParser(solrClientPrimary.getParser()) .build(); try { diff --git a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java index 034a8bf2840..1c2e7aa6d2e 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestTlogReplica.java @@ -30,6 +30,7 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import org.apache.http.client.HttpClient; +import org.apache.http.client.methods.HttpGet; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.LuceneTestCase.Slow; import org.apache.solr.client.solrj.SolrClient; @@ -144,9 +145,22 @@ public class TestTlogReplica extends SolrCloudTestCase { @Repeat(iterations=2) // 2 times to make sure cleanup is complete and we can create the same collection public void testCreateDelete() throws Exception { try { - CollectionAdminRequest.createCollection(collectionName, "conf", 2, 0, 4, 0) - .setMaxShardsPerNode(100) - .process(cluster.getSolrClient()); + if (random().nextBoolean()) { + CollectionAdminRequest.createCollection(collectionName, "conf", 2, 0, 4, 0) + .setMaxShardsPerNode(100) + .process(cluster.getSolrClient()); + } else { + // Sometimes don't use SolrJ + String url = String.format(Locale.ROOT, "%s/admin/collections?action=CREATE&name=%s&numShards=%s&tlogReplicas=%s&maxShardsPerNode=%s", + cluster.getRandomJetty(random()).getBaseUrl(), + collectionName, + 2, // numShards + 4, // tlogReplicas + 100); // maxShardsPerNode + HttpGet createCollectionRequest = new HttpGet(url); + cluster.getSolrClient().getHttpClient().execute(createCollectionRequest); + } + boolean reloaded = false; while (true) { DocCollection docCollection = getCollectionState(collectionName); diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java b/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java new file mode 100644 index 00000000000..3ffa015a26e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.component; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.impl.LBHttpSolrClient; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.handler.component.HttpShardHandlerFactory; +import org.apache.solr.handler.component.ShardHandlerFactory; + +import org.junit.BeforeClass; +import org.junit.AfterClass; + +/** + * Tests specifying a custom ShardHandlerFactory + */ +public class TestHttpShardHandlerFactory extends SolrTestCaseJ4 { + + private static final String LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE = "solr.tests.loadBalancerRequestsMinimumAbsolute"; + private static final String LOAD_BALANCER_REQUESTS_MAX_FRACTION = "solr.tests.loadBalancerRequestsMaximumFraction"; + + private static int expectedLoadBalancerRequestsMinimumAbsolute = 0; + private static float expectedLoadBalancerRequestsMaximumFraction = 1.0f; + + @BeforeClass + public static void beforeTests() throws Exception { + expectedLoadBalancerRequestsMinimumAbsolute = random().nextInt(3); // 0 .. 2 + expectedLoadBalancerRequestsMaximumFraction = (1+random().nextInt(10))/10f; // 0.1 .. 1.0 + System.setProperty(LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE, Integer.toString(expectedLoadBalancerRequestsMinimumAbsolute)); + System.setProperty(LOAD_BALANCER_REQUESTS_MAX_FRACTION, Float.toString(expectedLoadBalancerRequestsMaximumFraction)); + } + + @AfterClass + public static void afterTests() { + System.clearProperty(LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE); + System.clearProperty(LOAD_BALANCER_REQUESTS_MAX_FRACTION); + } + + public void testLoadBalancerRequestsMinMax() throws Exception { + final Path home = Paths.get(TEST_HOME()); + CoreContainer cc = null; + ShardHandlerFactory factory = null; + try { + cc = CoreContainer.createAndLoad(home, home.resolve("solr-shardhandler-loadBalancerRequests.xml")); + factory = cc.getShardHandlerFactory(); + + // test that factory is HttpShardHandlerFactory with expected url reserve fraction + assertTrue(factory instanceof HttpShardHandlerFactory); + final HttpShardHandlerFactory httpShardHandlerFactory = ((HttpShardHandlerFactory)factory); + assertEquals(expectedLoadBalancerRequestsMinimumAbsolute, httpShardHandlerFactory.permittedLoadBalancerRequestsMinimumAbsolute, 0.0); + assertEquals(expectedLoadBalancerRequestsMaximumFraction, httpShardHandlerFactory.permittedLoadBalancerRequestsMaximumFraction, 0.0); + + // create a dummy request and dummy url list + final QueryRequest queryRequest = null; + final List urls = new ArrayList<>(); + for (int ii=0; ii<10; ++ii) { + urls.add(null); + } + + // create LBHttpSolrClient request + final LBHttpSolrClient.Req req = httpShardHandlerFactory.newLBHttpSolrClientReq(queryRequest, urls); + + // actual vs. expected test + final int actualNumServersToTry = req.getNumServersToTry().intValue(); + int expectedNumServersToTry = (int)Math.floor(urls.size() * expectedLoadBalancerRequestsMaximumFraction); + if (expectedNumServersToTry < expectedLoadBalancerRequestsMinimumAbsolute) { + expectedNumServersToTry = expectedLoadBalancerRequestsMinimumAbsolute; + } + assertEquals("wrong numServersToTry for" + + " urls.size="+urls.size() + + " expectedLoadBalancerRequestsMinimumAbsolute="+expectedLoadBalancerRequestsMinimumAbsolute + + " expectedLoadBalancerRequestsMaximumFraction="+expectedLoadBalancerRequestsMaximumFraction, + expectedNumServersToTry, + actualNumServersToTry); + + } finally { + if (factory != null) factory.close(); + if (cc != null) cc.shutdown(); + } + } + +} diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyField.java b/solr/core/src/test/org/apache/solr/legacy/TestLegacyField.java similarity index 99% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyField.java rename to solr/core/src/test/org/apache/solr/legacy/TestLegacyField.java index 92d1dd6a8f3..5cfac9ab89c 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyField.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestLegacyField.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.io.StringReader; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyFieldReuse.java b/solr/core/src/test/org/apache/solr/legacy/TestLegacyFieldReuse.java similarity index 91% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyFieldReuse.java rename to solr/core/src/test/org/apache/solr/legacy/TestLegacyFieldReuse.java index 9335290247d..39d8d01f128 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyFieldReuse.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestLegacyFieldReuse.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.io.IOException; @@ -24,10 +24,10 @@ import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyNumericTokenStream; -import org.apache.lucene.legacy.LegacyNumericUtils; -import org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyNumericTokenStream; +import org.apache.solr.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute; /** test tokenstream reuse by DefaultIndexingChain */ public class TestLegacyFieldReuse extends BaseTokenStreamTestCase { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyNumericUtils.java b/solr/core/src/test/org/apache/solr/legacy/TestLegacyNumericUtils.java similarity index 99% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyNumericUtils.java rename to solr/core/src/test/org/apache/solr/legacy/TestLegacyNumericUtils.java index 8607efdc893..a87e28a31cb 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyNumericUtils.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestLegacyNumericUtils.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.util.Arrays; @@ -22,7 +22,7 @@ import java.util.Collections; import java.util.Iterator; import java.util.Random; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LongBitSet; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyTerms.java b/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java similarity index 95% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyTerms.java rename to solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java index 27fae15e916..d91ba88b35d 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestLegacyTerms.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -23,11 +23,11 @@ import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFloatField; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyDoubleField; +import org.apache.solr.legacy.LegacyFloatField; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestMultiValuedNumericRangeQuery.java b/solr/core/src/test/org/apache/solr/legacy/TestMultiValuedNumericRangeQuery.java similarity index 96% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestMultiValuedNumericRangeQuery.java rename to solr/core/src/test/org/apache/solr/legacy/TestMultiValuedNumericRangeQuery.java index 386ec17a0e5..80b15245c0b 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestMultiValuedNumericRangeQuery.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestMultiValuedNumericRangeQuery.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import java.util.Locale; @@ -26,8 +26,8 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TopDocs; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery32.java b/solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery32.java similarity index 99% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery32.java rename to solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery32.java index acd0c04a351..5c029137a7a 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery32.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery32.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.analysis.MockAnalyzer; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery64.java b/solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery64.java similarity index 99% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery64.java rename to solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery64.java index b3ce55aa66d..99d4261def4 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericRangeQuery64.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestNumericRangeQuery64.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.analysis.MockAnalyzer; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericTokenStream.java b/solr/core/src/test/org/apache/solr/legacy/TestNumericTokenStream.java similarity index 97% rename from lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericTokenStream.java rename to solr/core/src/test/org/apache/solr/legacy/TestNumericTokenStream.java index a507af09e0d..b2e37819c22 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/legacy/TestNumericTokenStream.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestNumericTokenStream.java @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.lucene.legacy; +package org.apache.solr.legacy; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.util.BytesRef; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.legacy.LegacyNumericTokenStream; -import org.apache.lucene.legacy.LegacyNumericUtils; -import org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttributeImpl; +import org.apache.solr.legacy.LegacyNumericTokenStream; +import org.apache.solr.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttributeImpl; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; diff --git a/solr/core/src/test/org/apache/solr/rest/TestRestManager.java b/solr/core/src/test/org/apache/solr/rest/TestRestManager.java index cc6c58a4c90..a39e774aa50 100644 --- a/solr/core/src/test/org/apache/solr/rest/TestRestManager.java +++ b/solr/core/src/test/org/apache/solr/rest/TestRestManager.java @@ -174,8 +174,8 @@ public class TestRestManager extends SolrRestletTestBase { * "/managedResources/[0]/class=='org.apache.solr.rest.schema.analysis.ManagedWordSetResource'", "/managedResources/[0]/resourceId=='/schema/analysis/stopwords/english'", - "/managedResources/[1]/class=='org.apache.solr.rest.schema.analysis.ManagedSynonymFilterFactory$SynonymManager'", - "/managedResources/[1]/resourceId=='/schema/analysis/synonyms/english'"); + "/managedResources/[1]/class=='org.apache.solr.rest.schema.analysis.ManagedSynonymGraphFilterFactory$SynonymManager'", + "/managedResources/[1]/resourceId=='/schema/analysis/synonyms/englishgraph'"); */ // no pre-existing managed config components diff --git a/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java new file mode 100644 index 00000000000..9b442a8b926 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/rest/schema/analysis/TestManagedSynonymGraphFilterFactory.java @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.rest.schema.analysis; + +import java.io.File; +import java.net.URLEncoder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +import org.apache.commons.io.FileUtils; +import org.apache.solr.util.RestTestBase; +import org.eclipse.jetty.servlet.ServletHolder; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.noggit.JSONUtil; +import org.restlet.ext.servlet.ServerServlet; + +public class TestManagedSynonymGraphFilterFactory extends RestTestBase { + + private static File tmpSolrHome; + + /** + * Setup to make the schema mutable + */ + @Before + public void before() throws Exception { + tmpSolrHome = createTempDir().toFile(); + FileUtils.copyDirectory(new File(TEST_HOME()), tmpSolrHome.getAbsoluteFile()); + + final SortedMap extraServlets = new TreeMap<>(); + final ServletHolder solrRestApi = new ServletHolder("SolrSchemaRestApi", ServerServlet.class); + solrRestApi.setInitParameter("org.restlet.application", "org.apache.solr.rest.SolrSchemaRestApi"); + extraServlets.put(solrRestApi, "/schema/*"); + + System.setProperty("managed.schema.mutable", "true"); + System.setProperty("enable.update.log", "false"); + createJettyAndHarness(tmpSolrHome.getAbsolutePath(), "solrconfig-managed-schema.xml", "schema-rest.xml", + "/solr", true, extraServlets); + } + + @After + private void after() throws Exception { + jetty.stop(); + jetty = null; + FileUtils.deleteDirectory(tmpSolrHome); + System.clearProperty("managed.schema.mutable"); + System.clearProperty("enable.update.log"); + + if (restTestHarness != null) { + restTestHarness.close(); + } + restTestHarness = null; + } + + @Test + public void testManagedSynonyms() throws Exception { + // this endpoint depends on at least one field type containing the following + // declaration in the schema-rest.xml: + // + // + // + String endpoint = "/schema/analysis/synonyms/englishgraph"; + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==false", + "/synonymMappings/managedMap=={}"); + + // put a new mapping into the synonyms + Map> syns = new HashMap<>(); + syns.put("happy", Arrays.asList("glad","cheerful","joyful")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/happy==['cheerful','glad','joyful']"); + + // request to a specific mapping + assertJQ(endpoint+"/happy", + "/happy==['cheerful','glad','joyful']"); + + // does not exist + assertJQ(endpoint+"/sad", + "/error/code==404"); + + // verify the user can update the ignoreCase initArg + assertJPut(endpoint, + json("{ 'initArgs':{ 'ignoreCase':true } }"), + "responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==true"); + + syns = new HashMap<>(); + syns.put("sad", Arrays.asList("unhappy")); + syns.put("SAD", Arrays.asList("bummed")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/sad==['unhappy']"); + assertJQ(endpoint, + "/synonymMappings/managedMap/SAD==['bummed']"); + + // expect a union of values when requesting the "sad" child + assertJQ(endpoint+"/sad", + "/sad==['bummed','unhappy']"); + + // verify delete works + assertJDelete(endpoint+"/sad", + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap=={'happy':['cheerful','glad','joyful']}"); + + // should fail with 404 as foo doesn't exist + assertJDelete(endpoint+"/foo", + "/error/code==404"); + + // verify that a newly added synonym gets expanded on the query side after core reload + + String newFieldName = "managed_graph_en_field"; + // make sure the new field doesn't already exist + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 0", + "/response/lst[@name='responseHeader']/int[@name='status'] = '404'", + "/response/lst[@name='error']/int[@name='code'] = '404'"); + + // add the new field + assertJPost("/schema", "{ add-field : { name: managed_graph_en_field, type : managed_graph_en}}", + "/responseHeader/status==0"); + + // make sure the new field exists now + assertQ("/schema/fields/" + newFieldName + "?indent=on&wt=xml", + "count(/response/lst[@name='field']) = 1", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'"); + + // multi-term synonym logic - SOLR-10264 + final String multiTermOrigin; + final String multiTermSynonym; + if (random().nextBoolean()) { + multiTermOrigin = "hansestadt hamburg"; + multiTermSynonym = "hh"; + } else { + multiTermOrigin = "hh"; + multiTermSynonym = "hansestadt hamburg"; + } + // multi-term logic similar to the angry/mad logic (angry ~ origin, mad ~ synonym) + + assertU(adoc(newFieldName, "I am a happy test today but yesterday I was angry", "id", "5150")); + assertU(adoc(newFieldName, multiTermOrigin+" is in North Germany.", "id", "040")); + assertU(commit()); + + assertQ("/select?q=" + newFieldName + ":angry", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + assertQ("/select?q=" + newFieldName + ":"+URLEncoder.encode(multiTermOrigin, "UTF-8"), + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='040']"); + + // add a mapping that will expand a query for "mad" to match docs with "angry" + syns = new HashMap<>(); + syns.put("mad", Arrays.asList("angry")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint, + "/synonymMappings/managedMap/mad==['angry']"); + + // add a mapping that will expand a query for "multi-term synonym" to match docs with "acronym" + syns = new HashMap<>(); + syns.put(multiTermSynonym, Arrays.asList(multiTermOrigin)); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint+"/"+URLEncoder.encode(multiTermSynonym, "UTF-8"), + "/"+multiTermSynonym+"==['"+multiTermOrigin+"']"); + + // should not match as the synonym mapping between mad and angry does not + // get applied until core reload + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='0']"); + + // should not match as the synonym mapping between "origin" and "synonym" + // was not added before the document was indexed + assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='0']"); + + restTestHarness.reload(); + + // now query for mad and we should see our test doc + assertQ("/select?q=" + newFieldName + ":mad", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='5150']"); + + // now query for "synonym" and we should see our test doc with "origin" + assertQ("/select?q=" + newFieldName + ":("+URLEncoder.encode(multiTermSynonym, "UTF-8") + ")&sow=false", + "/response/lst[@name='responseHeader']/int[@name='status'] = '0'", + "/response/result[@name='response'][@numFound='1']", + "/response/result[@name='response']/doc/str[@name='id'][.='040']"); + + // test for SOLR-6015 + syns = new HashMap<>(); + syns.put("mb", Arrays.asList("megabyte")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + syns.put("MB", Arrays.asList("MiB", "Megabyte")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + assertJQ(endpoint + "/MB", + "/MB==['Megabyte','MiB','megabyte']"); + + // test for SOLR-6878 - by default, expand is true, but only applies when sending in a list + List m2mSyns = new ArrayList<>(); + m2mSyns.addAll(Arrays.asList("funny", "entertaining", "whimiscal", "jocular")); + assertJPut(endpoint, JSONUtil.toJSON(m2mSyns), "/responseHeader/status==0"); + + assertJQ(endpoint + "/funny", + "/funny==['entertaining','funny','jocular','whimiscal']"); + assertJQ(endpoint + "/entertaining", + "/entertaining==['entertaining','funny','jocular','whimiscal']"); + assertJQ(endpoint + "/jocular", + "/jocular==['entertaining','funny','jocular','whimiscal']"); + assertJQ(endpoint + "/whimiscal", + "/whimiscal==['entertaining','funny','jocular','whimiscal']"); + } + + /** + * Can we add and remove stopwords with umlauts + */ + @Test + public void testCanHandleDecodingAndEncodingForSynonyms() throws Exception { + String endpoint = "/schema/analysis/synonyms/germangraph"; + + assertJQ(endpoint, + "/synonymMappings/initArgs/ignoreCase==false", + "/synonymMappings/managedMap=={}"); + + // does not exist + assertJQ(endpoint+"/fröhlich", + "/error/code==404"); + + Map> syns = new HashMap<>(); + + // now put a synonym + syns.put("fröhlich", Arrays.asList("glücklick")); + assertJPut(endpoint, + JSONUtil.toJSON(syns), + "/responseHeader/status==0"); + + // and check if it exists + assertJQ(endpoint, + "/synonymMappings/managedMap/fröhlich==['glücklick']"); + + // verify delete works + assertJDelete(endpoint+"/fröhlich", + "/responseHeader/status==0"); + + + // was it really deleted? + assertJDelete(endpoint+"/fröhlich", + "/error/code==404"); + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestLegacyNumericRangeQueryBuilder.java b/solr/core/src/test/org/apache/solr/search/TestLegacyNumericRangeQueryBuilder.java index 3e147c247f5..a083bfdc370 100644 --- a/solr/core/src/test/org/apache/solr/search/TestLegacyNumericRangeQueryBuilder.java +++ b/solr/core/src/test/org/apache/solr/search/TestLegacyNumericRangeQueryBuilder.java @@ -18,7 +18,7 @@ package org.apache.solr.search; import org.apache.lucene.search.Query; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.queryparser.xml.ParserException; import org.w3c.dom.Document; import org.xml.sax.SAXException; diff --git a/solr/core/src/test/org/apache/solr/search/TestMaxScoreQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestMaxScoreQueryParser.java index e995f1e6c5e..53caf770acc 100644 --- a/solr/core/src/test/org/apache/solr/search/TestMaxScoreQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestMaxScoreQueryParser.java @@ -17,7 +17,7 @@ package org.apache.solr.search; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.search.*; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; diff --git a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java index 8cd96aea11f..895fb831708 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolr4Spatial.java @@ -24,7 +24,7 @@ import org.locationtech.spatial4j.context.SpatialContext; import org.locationtech.spatial4j.distance.DistanceUtils; import org.locationtech.spatial4j.shape.Point; import org.locationtech.spatial4j.shape.Rectangle; -import org.apache.lucene.spatial.bbox.BBoxStrategy; +import org.apache.solr.legacy.BBoxStrategy; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.core.SolrCore; diff --git a/solr/core/src/test/org/apache/solr/search/function/TestOrdValues.java b/solr/core/src/test/org/apache/solr/search/function/TestOrdValues.java index b3a70ae6d39..f7918d75f64 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestOrdValues.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestOrdValues.java @@ -29,8 +29,8 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.legacy.LegacyFloatField; -import org.apache.lucene.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyFloatField; +import org.apache.solr.legacy.LegacyIntField; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.FloatFieldSource; diff --git a/solr/core/src/test/org/apache/solr/security/hadoop/TestDelegationWithHadoopAuth.java b/solr/core/src/test/org/apache/solr/security/hadoop/TestDelegationWithHadoopAuth.java index c799296e5ba..37d9cdf1a9c 100644 --- a/solr/core/src/test/org/apache/solr/security/hadoop/TestDelegationWithHadoopAuth.java +++ b/solr/core/src/test/org/apache/solr/security/hadoop/TestDelegationWithHadoopAuth.java @@ -380,7 +380,7 @@ public class TestDelegationWithHadoopAuth extends SolrCloudTestCase { } ss = new HttpSolrClient.Builder(primarySolrClient.getBaseURL().toString()) - .withDelegationToken(token) + .withKerberosDelegationToken(token) .withResponseParser(primarySolrClient.getParser()) .build(); try { diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java index 69b89b42090..873c0955b29 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestDocTermOrds.java @@ -43,9 +43,9 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum.SeekStatus; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java index d53f610f653..9588e67dff9 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSort.java @@ -37,10 +37,10 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFloatField; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyDoubleField; +import org.apache.solr.legacy.LegacyFloatField; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestLegacyFieldCache.java b/solr/core/src/test/org/apache/solr/uninverting/TestLegacyFieldCache.java index e38e193bcc9..b75bab5e0ed 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestLegacyFieldCache.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestLegacyFieldCache.java @@ -32,10 +32,10 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.legacy.LegacyDoubleField; -import org.apache.lucene.legacy.LegacyFloatField; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyDoubleField; +import org.apache.solr.legacy.LegacyFloatField; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java index 6fed73b829f..b9392b72c27 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms32.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java index 2f341b708c2..61a357990a3 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestNumericTerms64.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericRangeQuery; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericRangeQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java b/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java index e9e94e26238..f140ce27411 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestUninvertingReader.java @@ -40,10 +40,10 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.legacy.LegacyFieldType; -import org.apache.lucene.legacy.LegacyIntField; -import org.apache.lucene.legacy.LegacyLongField; -import org.apache.lucene.legacy.LegacyNumericUtils; +import org.apache.solr.legacy.LegacyFieldType; +import org.apache.solr.legacy.LegacyIntField; +import org.apache.solr.legacy.LegacyLongField; +import org.apache.solr.legacy.LegacyNumericUtils; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; diff --git a/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java b/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java new file mode 100644 index 00000000000..c7e0dc3c8c6 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.util; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; + +/** + * Tests for: + * {@link org.apache.lucene.analysis.core.LowerCaseTokenizerFactory} + * {@link org.apache.lucene.analysis.core.LetterTokenizerFactory} + * {@link org.apache.lucene.analysis.core.KeywordTokenizerFactory} + * {@link org.apache.lucene.analysis.core.WhitespaceTokenizerFactory} + */ + +public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 { + /* field names are used in accordance with the solrconfig and schema supplied */ + private static final String ID = "id"; + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-update-processor-chains.xml", "schema-tokenizer-test.xml"); + } + + public void testSingleFieldDiffAnalyzers() throws Exception { + + clearIndex(); + + // using fields with definitions, different tokenizer factories respectively at index time and standard tokenizer at query time. + + updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter\":\"letter\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase\":\"lowerCase\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace\":\"whiteSpace in\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace\":\"unicode in\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword\":\"keyword\"}},\"commit\":{}}",null); + + assertU(commit()); + + assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]"); + + //Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","letter:let"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","letter:lett"), "//result[@numFound=0]"); + + //Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","lowerCase:low"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","lowerCase:l"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","lowerCase:lo"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","lowerCase:lower"), "//result[@numFound=0]"); + + //Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","whiteSpace:whi"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace:teS"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace:in"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace:white"), "//result[@numFound=0]"); + + //Tokens generated for "unicode in": "uni" "cod" "e" "in" "unicode" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace:uni"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace:cod"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace:e"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace:unico"), "//result[@numFound=0]"); + + //Tokens generated for "keyword": "keyword" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","keyword:keyword"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","keyword:key"), "//result[@numFound=0]"); + + } + + public void testSingleFieldSameAnalyzers() throws Exception { + + clearIndex(); + + // using fields with definitions, same tokenizers both at index and query time. + + updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter0\":\"letter\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase0\":\"lowerCase\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace0\":\"whiteSpace in\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace0\":\"unicode in\"}},\"commit\":{}}",null); + updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword0\":\"keyword\"}},\"commit\":{}}",null); + + assertU(commit()); + + assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]"); + + //Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3 + // Anything that matches the first three letters should be found when maxLen=3 + assertQ("Check the total number of docs", req("q","letter0:l"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","letter0:let"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","letter0:lett"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","letter0:letXYZ"), "//result[@numFound=1]"); + + //Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3 + // Anything that matches the first three letters should be found when maxLen=3 + assertQ("Check the total number of docs", req("q","lowerCase0:low"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","lowerCase0:l"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","lowerCase0:lo"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","lowerCase0:lowerXYZ"), "//result[@numFound=1]"); + + //Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3 + // Anything that matches the first three letters should be found when maxLen=3 + assertQ("Check the total number of docs", req("q","whiteSpace0:h"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","whiteSpace0:whi"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace0:teS"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace0:in"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","whiteSpace0:whiteZKY"), "//result[@numFound=1]"); + + //Tokens generated for "unicode in": "uni" "cod" "e" "in" "unicode" , maxTokenLen=3 + // Anything that matches the first three letters should be found when maxLen=3 + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace0:u"), "//result[@numFound=0]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace0:uni"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace0:cod"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace0:e"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","unicodeWhiteSpace0:unicoVBRT"), "//result[@numFound=1]"); + + //Tokens generated for "keyword": "keyword" , maxTokenLen=3 + assertQ("Check the total number of docs", req("q","keyword0:keyword"), "//result[@numFound=1]"); + assertQ("Check the total number of docs", req("q","keyword0:key"), "//result[@numFound=0]"); + + } +} diff --git a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml index 53b50607f37..b7de812d005 100644 --- a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml +++ b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml @@ -3,7 +3,7 @@ diff --git a/solr/licenses/icu4j-56.1.jar.sha1 b/solr/licenses/icu4j-56.1.jar.sha1 deleted file mode 100644 index 5f8e0466fde..00000000000 --- a/solr/licenses/icu4j-56.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8dd6671f52165a0419e6de5e1016400875a90fa9 diff --git a/solr/licenses/icu4j-59.1.jar.sha1 b/solr/licenses/icu4j-59.1.jar.sha1 new file mode 100644 index 00000000000..f3f0018f053 --- /dev/null +++ b/solr/licenses/icu4j-59.1.jar.sha1 @@ -0,0 +1 @@ +6f06e820cf4c8968bbbaae66ae0b33f6a256b57f diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema index 6bda7486880..1c4f1feab59 100644 --- a/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema +++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/managed-schema @@ -455,10 +455,16 @@ - + - + + + + + + + diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc index 4550685649a..09dbe23fc6d 100644 --- a/solr/solr-ref-guide/src/filter-descriptions.adoc +++ b/solr/solr-ref-guide/src/filter-descriptions.adoc @@ -843,6 +843,23 @@ See <> for example input/output. This is specialized version of the <> that uses a mapping on synonyms that is <> +.Managed Synonym Filter has been Deprecated +[WARNING] +==== +Managed Synonym Filter has been deprecated in favor of Managed Synonym Graph Filter, which is required for multi-term synonym support. +==== + +*Factory class:* `solr.ManagedSynonymFilterFactory` + +For arguments and examples, see the Managed Synonym Graph Filter below. + +[[FilterDescriptions-ManagedSynonymGraphFilter]] +== Managed Synonym Graph Filter + +This is specialized version of the <> that uses a mapping on synonyms that is <> + +This filter maps single- or multi-token synonyms, producing a fully correct graph output. This filter is a replacement for the Managed Synonym Filter, which produces incorrect graphs for multi-token synonyms. + *Arguments:* `managed`:: The name that should be used for this mapping on synonyms in the managed REST API. @@ -853,13 +870,18 @@ With this configuration the set of mappings is named "english" and can be manage [source,xml] ---- - + - + + + + + + ---- -See <> for example input/output. +See <> for example input/output. [[FilterDescriptions-N-GramFilter]] == N-Gram Filter @@ -1409,7 +1431,7 @@ By contrast, a query like "`find the popsicle`" would remove "```the```" as a st `format`:: (optional; default: `wordset`) Defines how the words file will be parsed. If `words` is not specified, then `format` must not be specified. The valid values for the format option are: -`wordset`:: This is the default format, which supports one word per line (including any intra-word whitespace) and allows whole line comments begining with the `#` character. Blank lines are ignored. +`wordset`:: This is the default format, which supports one word per line (including any intra-word whitespace) and allows whole line comments beginning with the `#` character. Blank lines are ignored. `snowball`:: This format allows for multiple words specified on each line, and trailing comments may be specified using the vertical line (`|`). Blank lines are ignored. diff --git a/solr/solr-ref-guide/src/language-analysis.adoc b/solr/solr-ref-guide/src/language-analysis.adoc index 25124e521e0..11b0b784e41 100644 --- a/solr/solr-ref-guide/src/language-analysis.adoc +++ b/solr/solr-ref-guide/src/language-analysis.adoc @@ -378,9 +378,8 @@ These factories are each designed to work with specific languages. The languages * <> * <> * <> -* <> +* <> * <> -* <> * <> * <> @@ -508,50 +507,101 @@ Solr can stem Catalan using the Snowball Porter Stemmer with an argument of `lan *Out:* "llengu"(1), "llengu"(2) -[[LanguageAnalysis-Chinese]] -=== Chinese +[[LanguageAnalysis-TraditionalChinese]] +=== Traditional Chinese -[[LanguageAnalysis-ChineseTokenizer]] -==== Chinese Tokenizer +The default configuration of the <> is suitable for Traditional Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. -The Chinese Tokenizer is deprecated as of Solr 3.4. Use the <> instead. +<> can also be used to tokenize Traditional Chinese text. Following the Word Break rules from the Unicode Text Segmentation algorithm, it produces one token per Chinese character. When combined with <>, overlapping bigrams of Chinese characters are formed. + +<> folds fullwidth ASCII variants into the equivalent Basic Latin forms. -*Factory class:* `solr.ChineseTokenizerFactory` - -*Arguments:* None - -*Example:* +*Examples:* [source,xml] ---- - - + + + + ---- -[[LanguageAnalysis-ChineseFilterFactory]] -==== Chinese Filter Factory - -The Chinese Filter Factory is deprecated as of Solr 3.4. Use the <> instead. - -*Factory class:* `solr.ChineseFilterFactory` - -*Arguments:* None - -*Example:* - [source,xml] ---- - + - + + + ---- +[[LanguageAnalysis-CJKBigramFilter]] +=== CJK Bigram Filter + +Forms bigrams (overlapping 2-character sequences) of CJK characters that are generated from <> or <>. + +By default, all CJK characters produce bigrams, but finer grained control is available by specifying orthographic type arguments `han`, `hiragana`, `katakana`, and `hangul`. When set to `false`, characters of the corresponding type will be passed through as unigrams, and will not be included in any bigrams. + +When a CJK character has no adjacent characters to form a bigram, it is output in unigram form. If you want to always output both unigrams and bigrams, set the `outputUnigrams` argument to `true`. + +In all cases, all non-CJK input is passed through unmodified. + +*Arguments:* + +`han`:: (true/false) If false, Han (Chinese) characters will not form bigrams. Default is true. + +`hiragana`:: (true/false) If false, Hiragana (Japanese) characters will not form bigrams. Default is true. + +`katakana`:: (true/false) If false, Katakana (Japanese) characters will not form bigrams. Default is true. + +`hangul`:: (true/false) If false, Hangul (Korean) characters will not form bigrams. Default is true. + +`outputUnigrams`:: (true/false) If true, in addition to forming bigrams, all characters are also passed through as unigrams. Default is false. + +See the example under <>. + [[LanguageAnalysis-SimplifiedChinese]] === Simplified Chinese -For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the `solr.HMMChineseTokenizerFactory` in the `analysis-extras` contrib module. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this filter, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. +For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the <>. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. + +The default configuration of the <> is also suitable for Simplified Chinese text. It follows the Word Break rules from the Unicode Text Segmentation algorithm for non-Chinese text, and uses a dictionary to segment Chinese words. To use this tokenizer, you must add additional .jars to Solr's classpath (as described in the section <>). See the `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`. + +Also useful for Chinese analysis: + +<> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. + +*Examples:* + +[source,xml] +---- + + + + + + + +---- + +[source,xml] +---- + + + + + + +---- + +[[LanguageAnalysis-HMMChineseTokenizer]] +=== HMM Chinese Tokenizer + +For Simplified Chinese, Solr provides support for Chinese sentence and word segmentation with the `solr.HMMChineseTokenizerFactory` in the `analysis-extras` contrib module. This component includes a large dictionary and segments Chinese text into words with the Hidden Markov Model. To use this tokenizer, see `solr/contrib/analysis-extras/README.txt` for instructions on which jars you need to add to your `solr_home/lib`. *Factory class:* `solr.HMMChineseTokenizerFactory` @@ -563,35 +613,7 @@ To use the default setup with fallback to English Porter stemmer for English wor `` -Or to configure your own analysis setup, use the `solr.HMMChineseTokenizerFactory` along with your custom filter setup. - -[source,xml] ----- - - - - - ----- - -[[LanguageAnalysis-CJK]] -=== CJK - -This tokenizer breaks Chinese, Japanese and Korean language text into tokens. These are not whitespace delimited languages. The tokens generated by this tokenizer are "doubles", overlapping pairs of CJK characters found in the field text. - -*Factory class:* `solr.CJKTokenizerFactory` - -*Arguments:* None - -*Example:* - -[source,xml] ----- - - - ----- +Or to configure your own analysis setup, use the `solr.HMMChineseTokenizerFactory` along with your custom filter setup. See an example of this in the <> section. [[LanguageAnalysis-Czech]] === Czech @@ -982,15 +1004,15 @@ Solr can stem Irish using the Snowball Porter Stemmer with an argument of `langu Solr includes support for analyzing Japanese, via the Lucene Kuromoji morphological analyzer, which includes several analysis components - more details on each below: -* `JapaneseIterationMarkCharFilter` normalizes Japanese horizontal iteration marks (odoriji) to their expanded form. -* `JapaneseTokenizer` tokenizes Japanese using morphological analysis, and annotates each term with part-of-speech, base form (a.k.a. lemma), reading and pronunciation. -* `JapaneseBaseFormFilter` replaces original terms with their base forms (a.k.a. lemmas). -* `JapanesePartOfSpeechStopFilter` removes terms that have one of the configured parts-of-speech. -* `JapaneseKatakanaStemFilter` normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. +* <> normalizes Japanese horizontal iteration marks (odoriji) to their expanded form. +* <> tokenizes Japanese using morphological analysis, and annotates each term with part-of-speech, base form (a.k.a. lemma), reading and pronunciation. +* <> replaces original terms with their base forms (a.k.a. lemmas). +* <> removes terms that have one of the configured parts-of-speech. +* <> normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. Also useful for Japanese analysis, from lucene-analyzers-common: -* `CJKWidthFilter` folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. +* <> folds fullwidth ASCII variants into the equivalent Basic Latin forms, and folds halfwidth Katakana variants into their equivalent fullwidth forms. [[LanguageAnalysis-JapaneseIterationMarkCharFilter]] ==== Japanese Iteration Mark CharFilter @@ -1057,7 +1079,7 @@ Removes terms with one of the configured parts-of-speech. `JapaneseTokenizer` an Normalizes common katakana spelling variations ending in a long sound character (U+30FC) by removing the long sound character. -`CJKWidthFilterFactory` should be specified prior to this filter to normalize half-width katakana to full-width. +<> should be specified prior to this filter to normalize half-width katakana to full-width. *Factory class:* `JapaneseKatakanaStemFilterFactory` diff --git a/solr/solr-ref-guide/src/managed-resources.adoc b/solr/solr-ref-guide/src/managed-resources.adoc index f7d062d9ab9..72b879a2548 100644 --- a/solr/solr-ref-guide/src/managed-resources.adoc +++ b/solr/solr-ref-guide/src/managed-resources.adoc @@ -22,7 +22,7 @@ Managed resources expose a REST API endpoint for performing Create-Read-Update-D Any long-lived Solr object that has configuration settings and/or data is a good candidate to be a managed resource. Managed resources complement other programmatically manageable components in Solr, such as the RESTful schema API to add fields to a managed schema. -Consider a Web-based UI that offers Solr-as-a-Service where users need to configure a set of stop words and synonym mappings as part of an initial setup process for their search application. This type of use case can easily be supported using the Managed Stop Filter & Managed Synonym Filter Factories provided by Solr, via the Managed resources REST API. +Consider a Web-based UI that offers Solr-as-a-Service where users need to configure a set of stop words and synonym mappings as part of an initial setup process for their search application. This type of use case can easily be supported using the Managed Stop Filter & Managed Synonym Graph Filter Factories provided by Solr, via the Managed resources REST API. Users can also write their own custom plugins, that leverage the same internal hooks to make additional resources REST managed. @@ -142,14 +142,16 @@ For the most part, the API for managing synonyms behaves similar to the API for [source,xml] ---- - + - - - - + + + + + + + + ---- @@ -254,7 +256,7 @@ The response body is a JSON document containing metadata about managed resources }, { "resourceId":"/schema/analysis/synonyms/english", - "class":"org.apache.solr.rest.schema.analysis.ManagedSynonymFilterFactory$SynonymManager", + "class":"org.apache.solr.rest.schema.analysis.ManagedSynonymGraphFilterFactory$SynonymManager", "numObservers":"1" } ] diff --git a/solr/solr-ref-guide/src/near-real-time-searching.adoc b/solr/solr-ref-guide/src/near-real-time-searching.adoc index 8751a49e92a..641208e029b 100644 --- a/solr/solr-ref-guide/src/near-real-time-searching.adoc +++ b/solr/solr-ref-guide/src/near-real-time-searching.adoc @@ -29,9 +29,9 @@ However, pay special attention to cache and autowarm settings as they can have a [[NearRealTimeSearching-CommitsandOptimizing]] == Commits and Optimizing -A commit operation makes index changes visible to new search requests. A *hard commit* uses the transaction log to get the id of the latest document changes, and also calls `fsync` on the index files to ensure they have been flushed to stable storage and no data loss will result from a power failure. +A commit operation makes index changes visible to new search requests. A *hard commit* uses the transaction log to get the id of the latest document changes, and also calls `fsync` on the index files to ensure they have been flushed to stable storage and no data loss will result from a power failure. The current transaction log is closed and a new one is opened. See the "transaction log" discussion below for data loss issues. -A *soft commit* is much faster since it only makes index changes visible and does not `fsync` index files or write a new index descriptor. If the JVM crashes or there is a loss of power, changes that occurred after the last *hard commit* will be lost. Search collections that have NRT requirements (that want index changes to be quickly visible to searches) will want to soft commit often but hard commit less frequently. A softCommit may be "less expensive" in terms of time, but not free, since it can slow throughput. +A *soft commit* is much faster since it only makes index changes visible and does not `fsync` index files, or write a new index descriptor or start a new transaction log. Search collections that have NRT requirements (that want index changes to be quickly visible to searches) will want to soft commit often but hard commit less frequently. A softCommit may be "less expensive", but it is not free, since it can slow throughput. See the "transaction log" discussion below for data loss issues. An *optimize* is like a *hard commit* except that it forces all of the index segments to be merged into a single segment first. Depending on the use, this operation should be performed infrequently (e.g., nightly), if at all, since it involves reading and re-writing the entire index. Segments are normally merged over time anyway (as determined by the merge policy), and optimize just forces these merges to occur immediately. @@ -48,6 +48,15 @@ Soft commit takes uses two parameters: `maxDocs` and `maxTime`. Use `maxDocs` and `maxTime` judiciously to fine-tune your commit strategies. +[[NearRealTimeSearching-TransactionLogs]] +=== Transaction Logs (tlogs) + +Transaction logs are a "rolling window" of at least the last `N` (default 100) documents indexed. Tlogs are configured in solrconfig.xml, including the value of `N`. The current transaction log is closed and a new one opened each time any variety of hard commit occurs. Soft commits have no effect on the transaction log. + +When tlogs are enabled, documents being added to the index are written to the tlog before the indexing call returns to the client. In the event of an un-graceful shutdown (power loss, JVM crash, `kill -9` etc) any documents written to the tlog that was open when Solr stopped are replayed on startup. + +When Solr is shut down gracefully (i.e. using the `bin/solr stop` command and the like) Solr will close the tlog file and index segments so no replay will be necessary on startup. + [[NearRealTimeSearching-AutoCommits]] === AutoCommits @@ -75,6 +84,7 @@ It's better to use `maxTime` rather than `maxDocs` to modify an `autoSoftCommit` |=== |Parameter |Valid Attributes |Description |`waitSearcher` |true, false |Block until a new searcher is opened and registered as the main query searcher, making the changes visible. Default is true. +|`OpenSearcher` |true, false |Open a new searcher making all documents indexed so far visible for searching. Default is true. |`softCommit` |true, false |Perform a soft commit. This will refresh the view of the index faster, but without guarantees that the document is stably stored. Default is false. |`expungeDeletes` |true, false |Valid for `commit` only. This parameter purges deleted data from segments. The default is false. |`maxSegments` |integer |Valid for `optimize` only. Optimize down to at most this number of segments. The default is 1. diff --git a/solr/solr-ref-guide/src/other-parsers.adoc b/solr/solr-ref-guide/src/other-parsers.adoc index 54b546416bc..9b438f5d23d 100644 --- a/solr/solr-ref-guide/src/other-parsers.adoc +++ b/solr/solr-ref-guide/src/other-parsers.adoc @@ -331,7 +331,7 @@ The graph is built according to linkages between documents based on the terms fo |to |The field name of matching documents to inspect to identify outgoing edges for graph traversal. Defaults to `edge_ids` . |from |The field name to of candidate documents to inspect to identify incoming graph edges. Defaults to `node_id` . |traversalFilter |An optional query that can be supplied to limit the scope of documents that are traversed. -|maxDepth |Integer specifying how deep the breadth first search of the graph should go begining with the initial query. Defaults to -1 (unlimited) +|maxDepth |Integer specifying how deep the breadth first search of the graph should go beginning with the initial query. Defaults to -1 (unlimited) |returnRoot |Boolean to indicate if the documents that matched the original query (to define the starting points for graph) should be included in the final results. Defaults to true |returnOnlyLeaf |Boolean that indicates if the results of the query should be filtered so that only documents with no outgoing edges are returned. Defaults to false |useAutn |Boolean that indicates if an Automatons should be compiled for each iteration of the breadth first search, which may be faster for some graphs. Defaults to false. diff --git a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc index 4e1c03e2953..dff118bf1b5 100644 --- a/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc +++ b/solr/solr-ref-guide/src/shards-and-indexing-data-in-solrcloud.adoc @@ -45,11 +45,6 @@ If you use the (default) "```compositeId```" router, you can send documents with Then at query time, you include the prefix(es) into your query with the `\_route_` parameter (i.e., `q=solr&_route_=IBM!`) to direct queries to specific shards. In some situations, this may improve query performance because it overcomes network latency when querying all the shards. -[IMPORTANT] -==== -The `\_route_` parameter replaces `shard.keys`, which has been deprecated and will be removed in a future Solr release. -==== - The `compositeId` router supports prefixes containing up to 2 levels of routing. For example: a prefix routing first by region, then by customer: "USA!IBM!12345" Another use case could be if the customer "IBM" has a lot of documents and you want to spread it across multiple shards. The syntax for such a use case would be : "shard_key/num!document_id" where the /num is the number of bits from the shard key to use in the composite hash. diff --git a/solr/solr-ref-guide/src/the-stats-component.adoc b/solr/solr-ref-guide/src/the-stats-component.adoc index 0e4fa4dd3dc..8014ccf5544 100644 --- a/solr/solr-ref-guide/src/the-stats-component.adoc +++ b/solr/solr-ref-guide/src/the-stats-component.adoc @@ -43,16 +43,6 @@ Specifies a field for which statistics should be generated. This parameter may b <> may be used to indicate which subset of the supported statistics should be computed, and/or that statistics should be computed over the results of an arbitrary numeric function (or query) instead of a simple field name. See the examples below. -|stats.facet a| -Returns sub-results for values within the specified facet. - -This legacy parameter is not recommended for new users - instead please consider <> - -|stats.calcdistinct a| -If **true**, the "countDistinct" and "distinctValues" statistics will be computed and included the response. These calculations can be very expensive for fields that do not have a tiny cardinality, so they are disabled by default. - -This parameter can be specified using per-filed override (ie: `f..stats.calcdistinct=true`) but users are encouraged to instead the statistics desired <> - As a top level request parameter, this option is deprecated. - |=== [[TheStatsComponent-Example]] @@ -151,7 +141,6 @@ Additional "Expert" local params are supported in some cases for affecting the b ** `hllPreHashed` - a boolean option indicating that the statistics are being computed over a "long" field that has already been hashed at index time – allowing the HLL computation to skip this step. ** `hllLog2m` - an integer value specifying an explicit "log2m" value to use, overriding the heuristic value determined by the cardinality local param and the field type – see the https://github.com/aggregateknowledge/java-hll/[java-hll] documentation for more details ** `hllRegwidth` - an integer value specifying an explicit "regwidth" value to use, overriding the heuristic value determined by the cardinality local param and the field type – see the https://github.com/aggregateknowledge/java-hll/[java-hll] documentation for more details -* `calcDistinct` - for backwards compatibility, `calcDistinct=true` may be specified as an alias for both `countDistinct=true distinctValues=true` [[TheStatsComponent-Examples]] === Examples @@ -190,6 +179,6 @@ Here we compute some statistics for the price field. The min, max, mean, 90th, a [[TheStatsComponent-TheStatsComponentandFaceting]] == The Stats Component and Faceting -Although the `stats.facet` parameter is no longer recommended, sets of `stats.field` parameters can be referenced by '`tag`' when using Pivot Faceting to compute multiple statistics at every level (i.e.: field) in the tree of pivot constraints. +Sets of `stats.field` parameters can be referenced by '`tag`' when using Pivot Faceting to compute multiple statistics at every level (i.e.: field) in the tree of pivot constraints. For more information and a detailed example, please see <>. diff --git a/solr/solr-ref-guide/src/tokenizers.adoc b/solr/solr-ref-guide/src/tokenizers.adoc index 5c7a819c6aa..7a8bdeb37f4 100644 --- a/solr/solr-ref-guide/src/tokenizers.adoc +++ b/solr/solr-ref-guide/src/tokenizers.adoc @@ -286,7 +286,7 @@ This tokenizer processes multilingual text and tokenizes it appropriately based You can customize this tokenizer's behavior by specifying http://userguide.icu-project.org/boundaryanalysis#TOC-RBBI-Rules[per-script rule files]. To add per-script rules, add a `rulefiles` argument, which should contain a comma-separated list of `code:rulefile` pairs in the following format: four-letter ISO 15924 script code, followed by a colon, then a resource path. For example, to specify rules for Latin (script code "Latn") and Cyrillic (script code "Cyrl"), you would enter `Latn:my.Latin.rules.rbbi,Cyrl:my.Cyrillic.rules.rbbi`. -The default `solr.ICUTokenizerFactory` provides UAX#29 word break rules tokenization (like `solr.StandardTokenizer`), but also includes custom tailorings for Hebrew (specializing handling of double and single quotation marks), and for syllable tokenization for Khmer, Lao, and Myanmar. +The default configuration for `solr.ICUTokenizerFactory` provides UAX#29 word break rules tokenization (like `solr.StandardTokenizer`), but also includes custom tailorings for Hebrew (specializing handling of double and single quotation marks), for syllable tokenization for Khmer, Lao, and Myanmar, and dictionary-based word segmentation for CJK characters. *Factory class:* `solr.ICUTokenizerFactory` diff --git a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc b/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc index 9004789bd45..33d23620bb5 100644 --- a/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc +++ b/solr/solr-ref-guide/src/uploading-structured-data-store-data-with-the-data-import-handler.adoc @@ -473,7 +473,7 @@ Here is an example from the `atom` collection in the `dih` example (data-config diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java index ff7b06a71cf..12716554787 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.net.ConnectException; import java.net.SocketException; -import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -234,168 +233,6 @@ public class CloudSolrClient extends SolrClient { } } - /** - * Create a new client object that connects to Zookeeper and is always aware - * of the SolrCloud state. If there is a fully redundant Zookeeper quorum and - * SolrCloud has enough replicas for every shard in a collection, there is no - * single point of failure. Updates will be sent to shard leaders by default. - * - * @param zkHost - * The client endpoint of the zookeeper quorum containing the cloud - * state. The full specification for this string is one or more comma - * separated HOST:PORT values, followed by an optional chroot value - * that starts with a forward slash. Using a chroot allows multiple - * applications to coexist in one ensemble. For full details, see the - * Zookeeper documentation. Some examples: - *

      - * "host1:2181" - *

      - * "host1:2181,host2:2181,host3:2181/mysolrchroot" - *

      - * "zoo1.example.com:2181,zoo2.example.com:2181,zoo3.example.com:2181" - * - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost) { - this.stateProvider = new ZkClientClusterStateProvider(zkHost); - this.clientIsInternal = true; - this.myClient = HttpClientUtil.createClient(null); - this.lbClient = new LBHttpSolrClient.Builder() - .withHttpClient(myClient) - .build(); - this.lbClient.setRequestWriter(new BinaryRequestWriter()); - this.lbClient.setParser(new BinaryResponseParser()); - this.updatesToLeaders = true; - this.directUpdatesToLeadersOnly = false; - shutdownLBHttpSolrServer = true; - lbClient.addQueryParams(STATE_VERSION); - } - - /** - * Create a new client object that connects to Zookeeper and is always aware - * of the SolrCloud state. If there is a fully redundant Zookeeper quorum and - * SolrCloud has enough replicas for every shard in a collection, there is no - * single point of failure. Updates will be sent to shard leaders by default. - * - * @param zkHost - * The client endpoint of the zookeeper quorum containing the cloud - * state. The full specification for this string is one or more comma - * separated HOST:PORT values, followed by an optional chroot value - * that starts with a forward slash. Using a chroot allows multiple - * applications to coexist in one ensemble. For full details, see the - * Zookeeper documentation. Some examples: - *

      - * "host1:2181" - *

      - * "host1:2181,host2:2181,host3:2181/mysolrchroot" - *

      - * "zoo1.example.com:2181,zoo2.example.com:2181,zoo3.example.com:2181" - * @param httpClient - * the {@link HttpClient} instance to be used for all requests. The - * provided httpClient should use a multi-threaded connection manager. - * - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost, HttpClient httpClient) { - this.stateProvider = new ZkClientClusterStateProvider(zkHost); - this.clientIsInternal = httpClient == null; - this.myClient = httpClient == null ? HttpClientUtil.createClient(null) : httpClient; - this.lbClient = createLBHttpSolrClient(myClient); - this.updatesToLeaders = true; - this.directUpdatesToLeadersOnly = false; - shutdownLBHttpSolrServer = true; - lbClient.addQueryParams(STATE_VERSION); - } - - /** - * Create a new client object using multiple string values in a Collection - * instead of a standard zkHost connection string. Note that this method will - * not be used if there is only one String argument - that will use - * {@link #CloudSolrClient(String)} instead. - * - * @param zkHosts - * A Java Collection (List, Set, etc) of HOST:PORT strings, one for - * each host in the zookeeper ensemble. Note that with certain - * Collection types like HashSet, the order of hosts in the final - * connect string may not be in the same order you added them. - * @param chroot - * A chroot value for zookeeper, starting with a forward slash. If no - * chroot is required, use null. - * @throws IllegalArgumentException - * if the chroot value does not start with a forward slash. - * @see #CloudSolrClient(String) - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public CloudSolrClient(Collection zkHosts, String chroot) { - this(zkHosts, chroot, null); - } - - /** - * Create a new client object using multiple string values in a Collection - * instead of a standard zkHost connection string. Note that this method will - * not be used if there is only one String argument - that will use - * {@link #CloudSolrClient(String)} instead. - * - * @param zkHosts - * A Java Collection (List, Set, etc) of HOST:PORT strings, one for - * each host in the zookeeper ensemble. Note that with certain - * Collection types like HashSet, the order of hosts in the final - * connect string may not be in the same order you added them. - * @param chroot - * A chroot value for zookeeper, starting with a forward slash. If no - * chroot is required, use null. - * @param httpClient - * the {@link HttpClient} instance to be used for all requests. The provided httpClient should use a - * multi-threaded connection manager. - * @throws IllegalArgumentException - * if the chroot value does not start with a forward slash. - * @see #CloudSolrClient(String) - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public CloudSolrClient(Collection zkHosts, String chroot, HttpClient httpClient) { - this.stateProvider = new ZkClientClusterStateProvider(zkHosts, chroot); - this.clientIsInternal = httpClient == null; - this.myClient = httpClient == null ? HttpClientUtil.createClient(null) : httpClient; - this.lbClient = createLBHttpSolrClient(myClient); - this.updatesToLeaders = true; - this.directUpdatesToLeadersOnly = false; - shutdownLBHttpSolrServer = true; - } - - /** - * Create a new client object that connects to Zookeeper and is always aware - * of the SolrCloud state. If there is a fully redundant Zookeeper quorum and - * SolrCloud has enough replicas for every shard in a collection, there is no - * single point of failure. Updates will be sent to shard leaders by default. - * - * @param zkHosts - * A Java Collection (List, Set, etc) of HOST:PORT strings, one for - * each host in the zookeeper ensemble. Note that with certain - * Collection types like HashSet, the order of hosts in the final - * connect string may not be in the same order you added them. - * @param chroot - * A chroot value for zookeeper, starting with a forward slash. If no - * chroot is required, use null. - * @param httpClient - * the {@link HttpClient} instance to be used for all requests. The provided httpClient should use a - * multi-threaded connection manager. If null, a default HttpClient will be used. - * @param lbSolrClient - * LBHttpSolrClient instance for requests. If null, a default LBHttpSolrClient will be used. - * @param updatesToLeaders - * If true, sends updates to shard leaders. - * - * @deprecated use {@link Builder} instead. This will soon be a protected method, and will only - * be available for use in implementing subclasses. - */ - @Deprecated - public CloudSolrClient(Collection zkHosts, String chroot, HttpClient httpClient, LBHttpSolrClient lbSolrClient, boolean updatesToLeaders) { - this(zkHosts, chroot, null, httpClient, lbSolrClient, null, updatesToLeaders, false, null); - } - /** * Create a new client object that connects to Zookeeper and is always aware * of the SolrCloud state. If there is a fully redundant Zookeeper quorum and @@ -467,46 +304,6 @@ public class CloudSolrClient extends SolrClient { this.updatesToLeaders = updatesToLeaders; this.directUpdatesToLeadersOnly = directUpdatesToLeadersOnly; } - - /** - * @param zkHost - * A zookeeper client endpoint. - * @param updatesToLeaders - * If true, sends updates only to shard leaders. - * @see #CloudSolrClient(String) for full description and details on zkHost - * @deprecated use {@link CloudSolrClient.Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost, boolean updatesToLeaders) { - this(zkHost, updatesToLeaders, null); - } - - /** - * @param zkHost - * A zookeeper client endpoint. - * @param updatesToLeaders - * If true, sends updates only to shard leaders. - * @param httpClient - * the {@link HttpClient} instance to be used for all requests. The provided httpClient should use a - * multi-threaded connection manager. - * @see #CloudSolrClient(String) for full description and details on zkHost - * @deprecated use {@link CloudSolrClient.Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost, boolean updatesToLeaders, HttpClient httpClient) { - this.stateProvider = new ZkClientClusterStateProvider(zkHost); - this.clientIsInternal = httpClient == null; - this.myClient = httpClient == null ? HttpClientUtil.createClient(null) : httpClient; - this.lbClient = new LBHttpSolrClient.Builder() - .withHttpClient(myClient) - .build(); - this.lbClient.setRequestWriter(new BinaryRequestWriter()); - this.lbClient.setParser(new BinaryResponseParser()); - this.updatesToLeaders = updatesToLeaders; - this.directUpdatesToLeadersOnly = false; - shutdownLBHttpSolrServer = true; - lbClient.addQueryParams(STATE_VERSION); - } /**Sets the cache ttl for DocCollection Objects cached . This is only applicable for collections which are persisted outside of clusterstate.json * @param seconds ttl value in seconds @@ -515,40 +312,6 @@ public class CloudSolrClient extends SolrClient { assert seconds > 0; this.collectionStateCache.timeToLive = seconds * 1000L; } - - /** - * @param zkHost - * A zookeeper client endpoint. - * @param lbClient - * LBHttpSolrServer instance for requests. - * @see #CloudSolrClient(String) for full description and details on zkHost - * @deprecated use {@link CloudSolrClient.Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost, LBHttpSolrClient lbClient) { - this(zkHost, lbClient, true); - } - - /** - * @param zkHost - * A zookeeper client endpoint. - * @param lbClient - * LBHttpSolrServer instance for requests. - * @param updatesToLeaders - * If true, sends updates only to shard leaders. - * @see #CloudSolrClient(String) for full description and details on zkHost - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public CloudSolrClient(String zkHost, LBHttpSolrClient lbClient, boolean updatesToLeaders) { - this.lbClient = lbClient; - this.stateProvider = new ZkClientClusterStateProvider(zkHost); - this.updatesToLeaders = updatesToLeaders; - this.directUpdatesToLeadersOnly = false; - shutdownLBHttpSolrServer = false; - this.clientIsInternal = false; - lbClient.addQueryParams(STATE_VERSION); - } public ResponseParser getParser() { return lbClient.getParser(); @@ -660,25 +423,6 @@ public class CloudSolrClient extends SolrClient { this.parallelUpdates = parallelUpdates; } - /** - * Upload a set of config files to Zookeeper and give it a name - * - * NOTE: You should only allow trusted users to upload configs. If you - * are allowing client access to zookeeper, you should protect the - * /configs node against unauthorised write access. - * - * @deprecated Please use {@link ZkClientClusterStateProvider#uploadConfig(Path, String)} instead - * - * @param configPath {@link java.nio.file.Path} to the config files - * @param configName the name of the config - * @throws IOException if an IO error occurs - */ - @Deprecated - public void uploadConfig(Path configPath, String configName) throws IOException { - stateProvider.connect(); - assertZKStateProvider().uploadConfig(configPath, configName); - } - private ZkClientClusterStateProvider assertZKStateProvider() { if (stateProvider instanceof ZkClientClusterStateProvider) { return (ZkClientClusterStateProvider) stateProvider; @@ -686,20 +430,7 @@ public class CloudSolrClient extends SolrClient { throw new IllegalArgumentException("This client does not use ZK"); } - - /** - * Download a named config from Zookeeper to a location on the filesystem - * - * @deprecated Please use {@link ZkClientClusterStateProvider#downloadConfig(String, Path)} instead - * @param configName the name of the config - * @param downloadPath the path to write config files to - * @throws IOException if an I/O exception occurs - */ - @Deprecated - public void downloadConfig(String configName, Path downloadPath) throws IOException { - assertZKStateProvider().downloadConfig(configName, downloadPath); - } - + /** * Block until a collection state matches a predicate, or a timeout * diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java index bc37c130ded..d6675f284ea 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java @@ -95,57 +95,12 @@ public class ConcurrentUpdateSolrClient extends SolrClient { AtomicInteger blockLoops; AtomicInteger emptyQueueLoops; - /** - * Uses an internally managed HttpClient instance. - * - * @param solrServerUrl - * The Solr server URL - * @param queueSize - * The buffer size before the documents are sent to the server - * @param threadCount - * The number of background threads used to empty the queue - * - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public ConcurrentUpdateSolrClient(String solrServerUrl, int queueSize, - int threadCount) { - this(solrServerUrl, null, queueSize, threadCount); - shutdownExecutor = true; - internalHttpClient = true; - } - - /** - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public ConcurrentUpdateSolrClient(String solrServerUrl, - HttpClient client, int queueSize, int threadCount) { - this(solrServerUrl, client, queueSize, threadCount, ExecutorUtil.newMDCAwareCachedThreadPool( - new SolrjNamedThreadFactory("concurrentUpdateScheduler"))); - shutdownExecutor = true; - } - /** * Uses the supplied HttpClient to send documents to the Solr server. - * - * @deprecated use {@link Builder} instead. */ - @Deprecated - public ConcurrentUpdateSolrClient(String solrServerUrl, - HttpClient client, int queueSize, int threadCount, ExecutorService es) { - this(solrServerUrl, client, queueSize, threadCount, es, false); - } - - /** - * Uses the supplied HttpClient to send documents to the Solr server. - * - * @deprecated use {@link Builder} instead. This will soon be a - * protected method, and will only be available for use in implementing subclasses. - */ - @Deprecated - public ConcurrentUpdateSolrClient(String solrServerUrl, - HttpClient client, int queueSize, int threadCount, ExecutorService es, boolean streamDeletes) { + protected ConcurrentUpdateSolrClient(String solrServerUrl, + HttpClient client, int queueSize, int threadCount, + ExecutorService es, boolean streamDeletes) { this.internalHttpClient = (client == null); this.client = new HttpSolrClient.Builder(solrServerUrl) .withHttpClient(client) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/DelegationTokenHttpSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/DelegationTokenHttpSolrClient.java index ab8175d93d5..fc83391260a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/DelegationTokenHttpSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/DelegationTokenHttpSolrClient.java @@ -35,21 +35,15 @@ public class DelegationTokenHttpSolrClient extends HttpSolrClient { public final static String DELEGATION_TOKEN_PARAM = "delegation"; /** - * This constructor is deprecated in favor of passing delegation token via - * {@linkplain org.apache.solr.client.solrj.impl.HttpSolrClient.Builder#withInvariantParams(ModifiableSolrParams)}. - * - * @param baseURL The base url to communicate with the Solr server - * @param client Http client instance to use for communication - * @param parser Response parser instance to use to decode response from Solr server - * @param allowCompression Should compression be allowed ? - * @param delegationToken The delegation token string. + * Package protected constructor for use by + * {@linkplain org.apache.solr.client.solrj.impl.HttpSolrClient.Builder}. + * @lucene.internal */ - @Deprecated - public DelegationTokenHttpSolrClient(String baseURL, - HttpClient client, - ResponseParser parser, - boolean allowCompression, - String delegationToken) { + DelegationTokenHttpSolrClient(String baseURL, + HttpClient client, + ResponseParser parser, + boolean allowCompression, + String delegationToken) { super(baseURL, client, parser, allowCompression); if (delegationToken == null) { throw new IllegalArgumentException("Delegation token cannot be null"); @@ -100,4 +94,4 @@ public class DelegationTokenHttpSolrClient extends HttpSolrClient { } super.setQueryParams(queryParams); } -} \ No newline at end of file +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrClient.java index dea1711dd83..c1e95763ea1 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrClient.java @@ -158,40 +158,7 @@ public class HttpSolrClient extends SolrClient { private volatile Integer connectionTimeout; private volatile Integer soTimeout; - /** - * @param baseURL - * The URL of the Solr server. For example, " - * http://localhost:8983/solr/" if you are using the - * standard distribution Solr webapp on your local machine. - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public HttpSolrClient(String baseURL) { - this(baseURL, null, new BinaryResponseParser()); - } - - /** - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public HttpSolrClient(String baseURL, HttpClient client) { - this(baseURL, client, new BinaryResponseParser()); - } - - /** - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public HttpSolrClient(String baseURL, HttpClient client, ResponseParser parser) { - this(baseURL, client, parser, false); - } - - /** - * @deprecated use {@link Builder} instead. This will soon be a 'protected' - * method, and will only be available for use in implementing subclasses. - */ - @Deprecated - public HttpSolrClient(String baseURL, HttpClient client, ResponseParser parser, boolean allowCompression) { + protected HttpSolrClient(String baseURL, HttpClient client, ResponseParser parser, boolean allowCompression) { this.baseUrl = baseURL; if (baseUrl.endsWith("/")) { baseUrl = baseUrl.substring(0, baseUrl.length() - 1); @@ -840,14 +807,6 @@ public class HttpSolrClient extends SolrClient { * Use a delegation token for authenticating via the KerberosPlugin */ public Builder withKerberosDelegationToken(String delegationToken) { - return withDelegationToken(delegationToken); - } - - @Deprecated - /** - * @deprecated use {@link withKerberosDelegationToken(String)} instead - */ - public Builder withDelegationToken(String delegationToken) { if (this.invariantParams.get(DelegationTokenHttpSolrClient.DELEGATION_TOKEN_PARAM) != null) { throw new IllegalStateException(DelegationTokenHttpSolrClient.DELEGATION_TOKEN_PARAM + " is already defined!"); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java index ed6ae7b99b2..7706bf6c930 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrClient.java @@ -185,11 +185,17 @@ public class LBHttpSolrClient extends SolrClient { protected SolrRequest request; protected List servers; protected int numDeadServersToTry; + private final Integer numServersToTry; public Req(SolrRequest request, List servers) { + this(request, servers, null); + } + + public Req(SolrRequest request, List servers, Integer numServersToTry) { this.request = request; this.servers = servers; this.numDeadServersToTry = servers.size(); + this.numServersToTry = numServersToTry; } public SolrRequest getRequest() { @@ -209,6 +215,10 @@ public class LBHttpSolrClient extends SolrClient { public void setNumDeadServersToTry(int numDeadServersToTry) { this.numDeadServersToTry = numDeadServersToTry; } + + public Integer getNumServersToTry() { + return numServersToTry; + } } public static class Rsp { @@ -226,29 +236,10 @@ public class LBHttpSolrClient extends SolrClient { } } - /** - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public LBHttpSolrClient(String... solrServerUrls) throws MalformedURLException { - this(null, solrServerUrls); - } - /** * The provided httpClient should use a multi-threaded connection manager - * @deprecated use {@link Builder} instead. - */ - @Deprecated - public LBHttpSolrClient(HttpClient httpClient, String... solrServerUrl) { - this(httpClient, new BinaryResponseParser(), solrServerUrl); - } - - /** - * The provided httpClient should use a multi-threaded connection manager - * @deprecated use {@link Builder} instead. This will soon be a protected - * method and will only be available for use in implementing subclasses. */ - public LBHttpSolrClient(HttpSolrClient.Builder httpSolrClientBuilder, + protected LBHttpSolrClient(HttpSolrClient.Builder httpSolrClientBuilder, HttpClient httpClient, String... solrServerUrl) { clientIsInternal = httpClient == null; this.httpSolrClientBuilder = httpSolrClientBuilder; @@ -265,11 +256,8 @@ public class LBHttpSolrClient extends SolrClient { /** * The provided httpClient should use a multi-threaded connection manager - * @deprecated use {@link Builder} instead. This will soon be a protected - * method and will only be available for use in implementing subclasses. */ - @Deprecated - public LBHttpSolrClient(HttpClient httpClient, ResponseParser parser, String... solrServerUrl) { + protected LBHttpSolrClient(HttpClient httpClient, ResponseParser parser, String... solrServerUrl) { clientIsInternal = (httpClient == null); this.httpClient = httpClient == null ? constructClient(solrServerUrl) : httpClient; this.parser = parser; @@ -360,6 +348,9 @@ public class LBHttpSolrClient extends SolrClient { boolean isNonRetryable = req.request instanceof IsUpdateRequest || ADMIN_PATHS.contains(req.request.getPath()); List skipped = null; + final Integer numServersToTry = req.getNumServersToTry(); + int numServersTried = 0; + boolean timeAllowedExceeded = false; long timeAllowedNano = getTimeAllowedInNanos(req.getRequest()); long timeOutTime = System.nanoTime() + timeAllowedNano; @@ -387,8 +378,14 @@ public class LBHttpSolrClient extends SolrClient { } try { MDC.put("LBHttpSolrClient.url", serverStr); + + if (numServersToTry != null && numServersTried > numServersToTry.intValue()) { + break; + } + HttpSolrClient client = makeSolrClient(serverStr); + ++numServersTried; ex = doRequest(client, req, rsp, isNonRetryable, false, null); if (ex == null) { return rsp; // SUCCESS @@ -405,8 +402,13 @@ public class LBHttpSolrClient extends SolrClient { break; } + if (numServersToTry != null && numServersTried > numServersToTry.intValue()) { + break; + } + try { MDC.put("LBHttpSolrClient.url", wrapper.client.getBaseURL()); + ++numServersTried; ex = doRequest(wrapper.client, req, rsp, isNonRetryable, true, wrapper.getKey()); if (ex == null) { return rsp; // SUCCESS @@ -422,7 +424,13 @@ public class LBHttpSolrClient extends SolrClient { if (timeAllowedExceeded) { solrServerExceptionMessage = "Time allowed to handle this request exceeded"; } else { - solrServerExceptionMessage = "No live SolrServers available to handle this request"; + if (numServersToTry != null && numServersTried > numServersToTry.intValue()) { + solrServerExceptionMessage = "No live SolrServers available to handle this request:" + + " numServersTried="+numServersTried + + " numServersToTry="+numServersToTry.intValue(); + } else { + solrServerExceptionMessage = "No live SolrServers available to handle this request"; + } } if (ex == null) { throw new SolrServerException(solrServerExceptionMessage); @@ -594,10 +602,16 @@ public class LBHttpSolrClient extends SolrClient { @Override public NamedList request(final SolrRequest request, String collection) throws SolrServerException, IOException { + return request(request, collection, null); + } + + public NamedList request(final SolrRequest request, String collection, + final Integer numServersToTry) throws SolrServerException, IOException { Exception ex = null; ServerWrapper[] serverList = aliveServerList; - int maxTries = serverList.length; + final int maxTries = (numServersToTry == null ? serverList.length : numServersToTry.intValue()); + int numServersTried = 0; Map justFailed = null; boolean timeAllowedExceeded = false; @@ -612,6 +626,7 @@ public class LBHttpSolrClient extends SolrClient { ServerWrapper wrapper = serverList[count % serverList.length]; try { + ++numServersTried; return wrapper.client.request(request, collection); } catch (SolrException e) { // Server is alive but the request was malformed or invalid @@ -638,6 +653,7 @@ public class LBHttpSolrClient extends SolrClient { if (wrapper.standard==false || justFailed!=null && justFailed.containsKey(wrapper.getKey())) continue; try { + ++numServersTried; NamedList rsp = wrapper.client.request(request, collection); // remove from zombie list *before* adding to alive to avoid a race that could lose a server zombieServers.remove(wrapper.getKey()); @@ -663,7 +679,13 @@ public class LBHttpSolrClient extends SolrClient { if (timeAllowedExceeded) { solrServerExceptionMessage = "Time allowed to handle this request exceeded"; } else { - solrServerExceptionMessage = "No live SolrServers available to handle this request"; + if (numServersToTry != null && numServersTried > numServersToTry.intValue()) { + solrServerExceptionMessage = "No live SolrServers available to handle this request:" + + " numServersTried="+numServersTried + + " numServersToTry="+numServersToTry.intValue(); + } else { + solrServerExceptionMessage = "No live SolrServers available to handle this request"; + } } if (ex == null) { throw new SolrServerException(solrServerExceptionMessage); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrHttpClientContextBuilder.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrHttpClientContextBuilder.java index f57848db1ed..7ae98e58b8a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrHttpClientContextBuilder.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/SolrHttpClientContextBuilder.java @@ -77,14 +77,6 @@ public class SolrHttpClientContextBuilder { return credentialsProviderProvider; } - /** - * @deprecated use {@link #createContext(Object)} - */ - @Deprecated - public HttpClientContext createContext() { - return createContext(null); - } - public HttpClientContext createContext(Object userToken) { HttpClientContext context = new HttpClientContext(); if (getCredentialsProviderProvider() != null) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/Tuple.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/Tuple.java index d82c8640191..1af5f08e9e7 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/Tuple.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/Tuple.java @@ -90,7 +90,9 @@ public class Tuple implements Cloneable, MapWriter { } if(o instanceof Long) { - return (Long)o; + return (Long) o; + } else if (o instanceof Number) { + return ((Number)o).longValue(); } else { //Attempt to parse the long return Long.parseLong(o.toString()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/AnovaEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/AnovaEvaluator.java new file mode 100644 index 00000000000..b2288219f92 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/AnovaEvaluator.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.math3.stat.inference.OneWayAnova; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class AnovaEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public AnovaEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public Tuple evaluate(Tuple tuple) throws IOException { + List list = new ArrayList(); + for(StreamEvaluator subEvaluator : subEvaluators) { + List nums = (List)subEvaluator.evaluate(tuple); + double[] darray = new double[nums.size()]; + for(int i=0; i< nums.size(); i++) { + darray[i]=nums.get(i).doubleValue(); + } + list.add(darray); + } + + OneWayAnova anova = new OneWayAnova(); + double p = anova.anovaPValue(list); + double f = anova.anovaFValue(list); + Map m = new HashMap(); + m.put("p-value", p); + m.put("f-ratio", f); + return new Tuple(m); + } + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ArrayEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ArrayEvaluator.java new file mode 100644 index 00000000000..31d89a26bde --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ArrayEvaluator.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class ArrayEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public ArrayEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public List evaluate(Tuple tuple) throws IOException { + List list = new ArrayList(); + for(StreamEvaluator subEvaluator : subEvaluators) { + Number num = (Number)subEvaluator.evaluate(tuple); + list.add(num); + } + + return list; + } + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ColumnEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ColumnEvaluator.java similarity index 96% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ColumnEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ColumnEvaluator.java index 3e56837a338..dbb17b07731 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ColumnEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ColumnEvaluator.java @@ -14,14 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.SimpleEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ConvolutionEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ConvolutionEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ConvolutionEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ConvolutionEvaluator.java index 6d6e3e33836..6ca178d480f 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ConvolutionEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ConvolutionEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.List; @@ -22,8 +22,6 @@ import java.util.ArrayList; import org.apache.commons.math3.util.MathArrays; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfEvaluator.java index 2380e8fce89..d379c41fed5 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; @@ -22,8 +22,6 @@ import java.util.Arrays; import java.util.List; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfRangeEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfRangeEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfRangeEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfRangeEvaluator.java index c4c1e779094..f1e56ddd137 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CopyOfRangeEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CopyOfRangeEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; @@ -22,8 +22,6 @@ import java.util.Arrays; import java.util.List; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CorrelationEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CorrelationEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CorrelationEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CorrelationEvaluator.java index 75d0c119e36..fc3d8c3b31e 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CorrelationEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CorrelationEvaluator.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.List; import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CovarianceEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CovarianceEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CovarianceEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CovarianceEvaluator.java index 0e3ffcc4d72..7a6de689f8f 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CovarianceEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/CovarianceEvaluator.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.List; import org.apache.commons.math3.stat.correlation.Covariance; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DescribeEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DescribeEvaluator.java similarity index 95% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DescribeEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DescribeEvaluator.java index e086ebf2df4..196afe57dc1 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DescribeEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DescribeEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.HashMap; @@ -23,8 +23,6 @@ import java.util.Map; import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DistanceEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DistanceEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DistanceEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DistanceEvaluator.java index ef45d291cc8..201da4bd5ca 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/DistanceEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/DistanceEvaluator.java @@ -15,15 +15,13 @@ * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.List; import org.apache.commons.math3.ml.distance.EuclideanDistance; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/EmpiricalDistributionEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/EmpiricalDistributionEvaluator.java similarity index 96% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/EmpiricalDistributionEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/EmpiricalDistributionEvaluator.java index 46d08f5a91f..6885352291d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/EmpiricalDistributionEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/EmpiricalDistributionEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.HashMap; @@ -25,8 +25,6 @@ import java.util.Arrays; import org.apache.commons.math3.random.EmpiricalDistribution; import org.apache.commons.math3.stat.descriptive.StatisticalSummary; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FindDelayEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FindDelayEvaluator.java new file mode 100644 index 00000000000..c5a9b8acaa0 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/FindDelayEvaluator.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.List; + +import org.apache.commons.math3.util.MathArrays; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class FindDelayEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public FindDelayEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public Number evaluate(Tuple tuple) throws IOException { + StreamEvaluator colEval1 = subEvaluators.get(0); + StreamEvaluator colEval2 = subEvaluators.get(1); + + List numbers1 = (List)colEval1.evaluate(tuple); + List numbers2 = (List)colEval2.evaluate(tuple); + double[] column1 = new double[numbers1.size()]; + double[] column2 = new double[numbers2.size()]; + + for(int i=0; i=0; i--) { + column2[rIndex++] = numbers2.get(i).doubleValue(); + } + + double[] convolution = MathArrays.convolve(column1, column2); + double max = -Double.MAX_VALUE; + double maxIndex = -1; + + for(int i=0; i< convolution.length; i++) { + double abs = Math.abs(convolution[i]); + if(abs > max) { + max = abs; + maxIndex = i; + } + } + + return (maxIndex+1)-column2.length; + } + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/HistogramEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/HistogramEvaluator.java new file mode 100644 index 00000000000..c6916984ecd --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/HistogramEvaluator.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.math3.random.EmpiricalDistribution; +import org.apache.commons.math3.stat.descriptive.SummaryStatistics; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class HistogramEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public HistogramEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public List evaluate(Tuple tuple) throws IOException { + + StreamEvaluator colEval1 = subEvaluators.get(0); + + List numbers1 = (List)colEval1.evaluate(tuple); + double[] column1 = new double[numbers1.size()]; + + for(int i=0; i binList = new ArrayList(); + + List summaries = empiricalDistribution.getBinStats(); + for(SummaryStatistics statisticalSummary : summaries) { + Map map = new HashMap(); + map.put("max", statisticalSummary.getMax()); + map.put("mean", statisticalSummary.getMean()); + map.put("min", statisticalSummary.getMin()); + map.put("stdev", statisticalSummary.getStandardDeviation()); + map.put("sum", statisticalSummary.getSum()); + map.put("N", statisticalSummary.getN()); + map.put("var", statisticalSummary.getVariance()); + binList.add(map); + } + + return binList; + } + + + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/LengthEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/LengthEvaluator.java similarity index 93% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/LengthEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/LengthEvaluator.java index 3ff0db3b30f..da55ee49316 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/LengthEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/LengthEvaluator.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.List; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MovingAverageEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MovingAverageEvaluator.java new file mode 100644 index 00000000000..2e0788f3312 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MovingAverageEvaluator.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class MovingAverageEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public MovingAverageEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public List evaluate(Tuple tuple) throws IOException { + StreamEvaluator colEval = subEvaluators.get(0); + StreamEvaluator windowEval = subEvaluators.get(1); + + int window = ((Number)windowEval.evaluate(tuple)).intValue(); + List numbers = (List)colEval.evaluate(tuple); + + if(window > numbers.size()) { + throw new IOException("The window size cannot be larger then the array"); + } + + List moving = new ArrayList(); + + DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics(window); + for(int i=0; i= window) { + moving.add(descriptiveStatistics.getMean()); + } + } + + return moving; + } + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NormalizeEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NormalizeEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java index e011933ad69..c85ac200607 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/NormalizeEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; @@ -22,8 +22,6 @@ import java.util.List; import org.apache.commons.math3.stat.StatUtils; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PercentileEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PercentileEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PercentileEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PercentileEvaluator.java index 2bf4d60d33d..19d423d22c2 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PercentileEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PercentileEvaluator.java @@ -15,13 +15,11 @@ * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PredictEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PredictEvaluator.java similarity index 93% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PredictEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PredictEvaluator.java index 3c3ab8447f0..0d1e76312e8 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/PredictEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/PredictEvaluator.java @@ -15,13 +15,11 @@ * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RankEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RankEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RankEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RankEvaluator.java index 20849289c66..8a22e940b3d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RankEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RankEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; @@ -22,8 +22,6 @@ import java.util.List; import org.apache.commons.math3.stat.ranking.NaturalRanking; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RegressionEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RegressionEvaluator.java similarity index 96% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RegressionEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RegressionEvaluator.java index 5306193355c..42a69552846 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RegressionEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/RegressionEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.HashMap; @@ -23,8 +23,6 @@ import java.util.Map; import org.apache.commons.math3.stat.regression.SimpleRegression; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ReverseEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ReverseEvaluator.java similarity index 93% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ReverseEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ReverseEvaluator.java index 5518ed0fb9f..016e995a614 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ReverseEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ReverseEvaluator.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScaleEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScaleEvaluator.java similarity index 94% rename from solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScaleEvaluator.java rename to solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScaleEvaluator.java index 04722aa60ea..8ff2a7cedc5 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScaleEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScaleEvaluator.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.client.solrj.io.stream; +package org.apache.solr.client.solrj.io.eval; import java.io.IOException; import java.util.ArrayList; @@ -22,8 +22,6 @@ import java.util.List; import org.apache.commons.math3.util.MathArrays; import org.apache.solr.client.solrj.io.Tuple; -import org.apache.solr.client.solrj.io.eval.ComplexEvaluator; -import org.apache.solr.client.solrj.io.eval.StreamEvaluator; import org.apache.solr.client.solrj.io.stream.expr.Explanation; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; import org.apache.solr.client.solrj.io.stream.expr.Expressible; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/SequenceEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/SequenceEvaluator.java new file mode 100644 index 00000000000..a88d695afaf --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/SequenceEvaluator.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.math3.util.MathArrays; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class SequenceEvaluator extends ComplexEvaluator implements Expressible { + + private static final long serialVersionUID = 1; + + public SequenceEvaluator(StreamExpression expression, StreamFactory factory) throws IOException { + super(expression, factory); + } + + public List evaluate(Tuple tuple) throws IOException { + StreamEvaluator sizeEval = subEvaluators.get(0); + StreamEvaluator startEval = subEvaluators.get(1); + StreamEvaluator strideEval = subEvaluators.get(2); + + Number sizeNum = (Number)sizeEval.evaluate(tuple); + Number startNum = (Number)startEval.evaluate(tuple); + Number strideNum = (Number)strideEval.evaluate(tuple); + + int[] sequence = MathArrays.sequence(sizeNum.intValue(), startNum.intValue(), strideNum.intValue()); + List numbers = new ArrayList(sequence.length); + for(int i : sequence) { + numbers.add(i); + } + + return numbers; + } + + @Override + public StreamExpressionParameter toExpression(StreamFactory factory) throws IOException { + StreamExpression expression = new StreamExpression(factory.getFunctionName(getClass())); + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + return new Explanation(nodeId.toString()) + .withExpressionType(ExpressionType.EVALUATOR) + .withFunctionName(factory.getFunctionName(getClass())) + .withImplementingClass(getClass().getName()) + .withExpression(toExpression(factory).toString()); + } +} \ No newline at end of file diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java index 5075330fde4..03595c2cac0 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/graph/ShortestPathStream.java @@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParameter; import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; @@ -72,30 +71,6 @@ public class ShortestPathStream extends TupleStream implements Expressible { private int threads; private SolrParams queryParams; - @Deprecated - public ShortestPathStream(String zkHost, - String collection, - String fromNode, - String toNode, - String fromField, - String toField, - Map queryParams, - int joinBatchSize, - int threads, - int maxDepth) { - - init(zkHost, - collection, - fromNode, - toNode, - fromField, - toField, - new MapSolrParams(queryParams), - joinBatchSize, - threads, - maxDepth); - } - public ShortestPathStream(String zkHost, String collection, String fromNode, diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java index 6d1764ade09..62e40791888 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/CloudSolrStream.java @@ -51,7 +51,6 @@ import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkStateReader; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; @@ -89,32 +88,12 @@ public class CloudSolrStream extends TupleStream implements Expressible { } - - /** - * @param zkHost Zookeeper ensemble connection string - * @param collectionName Name of the collection to operate on - * @param params Map<String, String> of parameter/value pairs - * @throws IOException Something went wrong - *

      - * This form does not allow specifying multiple clauses, say "fq" clauses, use the form that - * takes a SolrParams. Transition code can call the preferred method that takes SolrParams - * by calling CloudSolrStream(zkHost, collectionName, - * new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(Map<String, String>))); - * @deprecated Use the constructor that has a SolrParams obj rather than a Map - */ - - @Deprecated - public CloudSolrStream(String zkHost, String collectionName, Map params) throws IOException { - init(collectionName, zkHost, new MapSolrParams(params)); - } - /** * @param zkHost Zookeeper ensemble connection string * @param collectionName Name of the collection to operate on * @param params Map<String, String[]> of parameter/value pairs * @throws IOException Something went wrong */ - public CloudSolrStream(String zkHost, String collectionName, SolrParams params) throws IOException { init(collectionName, zkHost, params); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java index 0180764ff92..c5bd56bcb97 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/FacetStream.java @@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.metrics.Bucket; import org.apache.solr.client.solrj.io.stream.metrics.Metric; import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -72,21 +71,6 @@ public class FacetStream extends TupleStream implements Expressible { protected transient SolrClientCache cache; protected transient CloudSolrClient cloudSolrClient; - /* - * - * @deprecated. Use the form that takes a SolrParams rather than Map<String, String> - */ - @Deprecated - public FacetStream(String zkHost, - String collection, - Map props, - Bucket[] buckets, - Metric[] metrics, - FieldComparator[] bucketSorts, - int bucketSizeLimit) throws IOException { - init(collection, new MapSolrParams(props), buckets, bucketSorts, metrics, bucketSizeLimit, zkHost); - } - public FacetStream(String zkHost, String collection, SolrParams params, diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/KnnStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/KnnStream.java index 1d5f187e605..f2ba0b935a5 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/KnnStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/KnnStream.java @@ -47,6 +47,8 @@ import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.ModifiableSolrParams; import static org.apache.solr.common.params.CommonParams.Q; +import static org.apache.solr.common.params.CommonParams.ROWS; + public class KnnStream extends TupleStream implements Expressible { @@ -195,6 +197,13 @@ public class KnnStream extends TupleStream implements Expressible { } } + String k = params.get("k"); + + if(k != null) { + params.add(ROWS, k); + params.remove(k); + } + params.add(Q, "{!mlt"+builder.toString()+"}"+id); QueryRequest request = new QueryRequest(params); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java index 5ba485d9ffd..269a8006979 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/RandomStream.java @@ -174,7 +174,7 @@ public class RandomStream extends TupleStream implements Expressible { if(cache != null) { cloudSolrClient = cache.getCloudSolrClient(zkHost); } else { - cloudSolrClient = new CloudSolrClient(zkHost); + cloudSolrClient = (new CloudSolrClient.Builder()).withZkHost(zkHost).build(); } ModifiableSolrParams params = getParams(this.props); @@ -236,4 +236,4 @@ public class RandomStream extends TupleStream implements Expressible { public StreamComparator getStreamSort() { return null; } -} \ No newline at end of file +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SolrStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SolrStream.java index 31d191347e8..ab029afbb6c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SolrStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SolrStream.java @@ -41,7 +41,6 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -71,19 +70,6 @@ public class SolrStream extends TupleStream { private long checkpoint = -1; private CloseableHttpResponse closeableHttpResponse; - /** - * @param baseUrl Base URL of the stream. - * @param params Map<String, String> of parameters - * @deprecated, use the form that thakes SolrParams. Existing code can use - * new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(params))) - * for existing calls that use Map<String, String> - */ - @Deprecated - public SolrStream(String baseUrl, Map params) { - this.baseUrl = baseUrl; - this.params = new ModifiableSolrParams(new MapSolrParams(params)); - } - /** * @param baseUrl Base URL of the stream. * @param params Map<String, String> of parameters diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java index d7c10e4434b..d2296b7da3c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/SqlStream.java @@ -60,9 +60,7 @@ public class SqlStream extends TupleStream implements Expressible { * takes a SolrParams. Transition code can call the preferred method that takes SolrParams * by calling CloudSolrStream(zkHost, collectionName, * new ModifiableSolrParams(SolrParams.toMultiMap(new NamedList(Map<String, String>))); - * @deprecated Use the constructor that has a SolrParams obj rather than a Map */ - public SqlStream(String zkHost, String collectionName, SolrParams params) throws IOException { init(collectionName, zkHost, params); } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java index cb46db4d8bc..c20429db7ba 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/StatsStream.java @@ -42,7 +42,6 @@ import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; import org.apache.solr.client.solrj.io.stream.metrics.Metric; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.common.SolrDocumentList; -import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; @@ -63,15 +62,6 @@ public class StatsStream extends TupleStream implements Expressible { protected transient CloudSolrClient cloudSolrClient; protected StreamContext streamContext; - // Use StatsStream(String, String, SolrParams, Metric[] - @Deprecated - public StatsStream(String zkHost, - String collection, - Map props, - Metric[] metrics) { - init(zkHost, collection, new MapSolrParams(props), metrics); - } - public StatsStream(String zkHost, String collection, SolrParams params, diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TimeSeriesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TimeSeriesStream.java index bb965b07ce6..fe2a1066567 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TimeSeriesStream.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/TimeSeriesStream.java @@ -17,6 +17,9 @@ package org.apache.solr.client.solrj.io.stream; import java.io.IOException; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -53,6 +56,8 @@ public class TimeSeriesStream extends TupleStream implements Expressible { private String end; private String gap; private String field; + private String format; + private DateTimeFormatter formatter; private Metric[] metrics; private List tuples = new ArrayList(); @@ -70,8 +75,9 @@ public class TimeSeriesStream extends TupleStream implements Expressible { String field, String start, String end, - String gap) throws IOException { - init(collection, params, field, metrics, start, end, gap, zkHost); + String gap, + String format) throws IOException { + init(collection, params, field, metrics, start, end, gap, format, zkHost); } public TimeSeriesStream(StreamExpression expression, StreamFactory factory) throws IOException{ @@ -82,9 +88,17 @@ public class TimeSeriesStream extends TupleStream implements Expressible { StreamExpressionNamedParameter endExpression = factory.getNamedOperand(expression, "end"); StreamExpressionNamedParameter fieldExpression = factory.getNamedOperand(expression, "field"); StreamExpressionNamedParameter gapExpression = factory.getNamedOperand(expression, "gap"); + StreamExpressionNamedParameter formatExpression = factory.getNamedOperand(expression, "format"); + StreamExpressionNamedParameter qExpression = factory.getNamedOperand(expression, "q"); + StreamExpressionNamedParameter zkHostExpression = factory.getNamedOperand(expression, "zkHost"); List metricExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, Metric.class); + + if(qExpression == null) { + throw new IOException("The timeseries expression requires the q parameter"); + } + String start = null; if(startExpression != null) { start = ((StreamExpressionValue)startExpression.getParameter()).getValue(); @@ -105,6 +119,11 @@ public class TimeSeriesStream extends TupleStream implements Expressible { field = ((StreamExpressionValue)fieldExpression.getParameter()).getValue(); } + String format = null; + if(formatExpression != null) { + format = ((StreamExpressionValue)formatExpression.getParameter()).getValue(); + } + // Collection Name if(null == collectionName){ throw new IOException(String.format(Locale.ROOT,"invalid expression %s - collectionName expected as first operand",expression)); @@ -149,7 +168,7 @@ public class TimeSeriesStream extends TupleStream implements Expressible { } // We've got all the required items - init(collectionName, params, field, metrics, start, end, gap , zkHost); + init(collectionName, params, field, metrics, start, end, gap, format, zkHost); } public String getCollection() { @@ -163,6 +182,7 @@ public class TimeSeriesStream extends TupleStream implements Expressible { String start, String end, String gap, + String format, String zkHost) throws IOException { this.zkHost = zkHost; this.collection = collection; @@ -175,6 +195,10 @@ public class TimeSeriesStream extends TupleStream implements Expressible { this.field = field; this.params = params; this.end = end; + if(format != null) { + this.format = format; + formatter = DateTimeFormatter.ofPattern(format, Locale.ROOT); + } } @Override @@ -201,6 +225,8 @@ public class TimeSeriesStream extends TupleStream implements Expressible { expression.addParameter(new StreamExpressionNamedParameter("end", end)); expression.addParameter(new StreamExpressionNamedParameter("gap", gap)); expression.addParameter(new StreamExpressionNamedParameter("field", gap)); + expression.addParameter(new StreamExpressionNamedParameter("format", format)); + // zkHost expression.addParameter(new StreamExpressionNamedParameter("zkHost", zkHost)); @@ -348,6 +374,12 @@ public class TimeSeriesStream extends TupleStream implements Expressible { for(int b=0; b params) { - init(zkHost, - checkpointCollection, - collection, - id, - initialCheckpoint, - checkpointEvery, - new MapSolrParams(params)); - } - public TopicStream(String zkHost, String checkpointCollection, String collection, diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/expr/StreamFactory.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/expr/StreamFactory.java index 703acf4118d..74e1de85085 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/expr/StreamFactory.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/expr/StreamFactory.java @@ -433,7 +433,7 @@ public class StreamFactory implements Serializable { if("null".equals(lower)){ return null; } if("true".equals(lower) || "false".equals(lower)){ return Boolean.parseBoolean(lower); } try{ return Long.valueOf(original); } catch(Exception ignored){}; - try{ if (original.matches(".{1,8}")){ return Float.valueOf(original); }} catch(Exception ignored){}; + try{ if (original.matches(".{1,8}")){ return Double.valueOf(original); }} catch(Exception ignored){}; try{ if (original.matches(".{1,17}")){ return Double.valueOf(original); }} catch(Exception ignored){}; // is a string diff --git a/solr/solrj/src/java/org/apache/solr/common/params/AnalysisParams.java b/solr/solrj/src/java/org/apache/solr/common/params/AnalysisParams.java index 95a3bdcaae8..74de3f77c0c 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/AnalysisParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/AnalysisParams.java @@ -35,7 +35,7 @@ public interface AnalysisParams { static final String QUERY = PREFIX + ".query"; /** - * Set to {@code true} to indicate that the index tokens that match query tokens should be marked as "mateched". + * Set to {@code true} to indicate that the index tokens that match query tokens should be marked as "matched". */ static final String SHOW_MATCH = PREFIX + ".showmatch"; diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java index ef254ccd534..e09a2dc9871 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java @@ -24,8 +24,6 @@ public interface HighlightParams { // primary public static final String HIGHLIGHT = "hl"; public static final String METHOD = HIGHLIGHT+".method"; // original|fastVector|postings|unified - @Deprecated // see hl.method - public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter"; public static final String FIELDS = HIGHLIGHT+".fl"; public static final String SNIPPETS = HIGHLIGHT+".snippets"; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java index 5f307a8bcc0..a0457262053 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java @@ -71,47 +71,6 @@ public class ExecutorUtil { public void clean(AtomicReference ctx); } - // ** This will interrupt the threads! ** Lucene and Solr do not like this because it can close channels, so only use - // this if you know what you are doing - you probably want shutdownAndAwaitTermination. - // Marked as Deprecated to discourage use. - @Deprecated - public static void shutdownWithInterruptAndAwaitTermination(ExecutorService pool) { - pool.shutdownNow(); // Cancel currently executing tasks - NOTE: this interrupts! - boolean shutdown = false; - while (!shutdown) { - try { - // Wait a while for existing tasks to terminate - shutdown = pool.awaitTermination(60, TimeUnit.SECONDS); - } catch (InterruptedException ie) { - // Preserve interrupt status - Thread.currentThread().interrupt(); - } - } - } - - // ** This will interrupt the threads! ** Lucene and Solr do not like this because it can close channels, so only use - // this if you know what you are doing - you probably want shutdownAndAwaitTermination. - // Marked as Deprecated to discourage use. - @Deprecated - public static void shutdownAndAwaitTerminationWithInterrupt(ExecutorService pool) { - pool.shutdown(); // Disable new tasks from being submitted - boolean shutdown = false; - boolean interrupted = false; - while (!shutdown) { - try { - // Wait a while for existing tasks to terminate - shutdown = pool.awaitTermination(60, TimeUnit.SECONDS); - } catch (InterruptedException ie) { - // Preserve interrupt status - Thread.currentThread().interrupt(); - } - if (!shutdown && !interrupted) { - pool.shutdownNow(); // Cancel currently executing tasks - NOTE: this interrupts! - interrupted = true; - } - } - } - public static void shutdownAndAwaitTermination(ExecutorService pool) { pool.shutdown(); // Disable new tasks from being submitted boolean shutdown = false; diff --git a/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java b/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java index 0313d3a5bb1..d34d8e73214 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java @@ -122,12 +122,17 @@ public class NamedList implements Cloneable, Serializable, Iterable - * - * @param nameValuePairs underlying List which should be used to implement a NamedList - * @deprecated Use {@link #NamedList(java.util.Map.Entry[])} for the NamedList instantiation + *

      + * This method is package protected and exists solely so SimpleOrderedMap and clone() can utilize it + *

      + *

      + * TODO: this method was formerly public, now that it's not we can change the impl details of + * this class to be based on a Map.Entry[] + *

      + * @lucene.internal + * @see #nameValueMapToList */ - @Deprecated - public NamedList(List nameValuePairs) { + NamedList(List nameValuePairs) { nvPairs=nameValuePairs; } @@ -136,12 +141,14 @@ public class NamedList implements Cloneable, Serializable, Iterable + * NOTE: This a temporary placeholder method until the guts of the class * are actually replaced by List<String, ?>. + *

      + * + * @return Modified List as per the above description * @see SOLR-912 */ - @Deprecated private List nameValueMapToList(Map.Entry[] nameValuePairs) { List result = new ArrayList<>(); for (Map.Entry ent : nameValuePairs) { diff --git a/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java b/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java index 3fee6dac802..701cdc4a36b 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/SimpleOrderedMap.java @@ -50,10 +50,15 @@ public class SimpleOrderedMap extends NamedList { * Creates an instance backed by an explicitly specified list of * pairwise names/values. * + *

      + * TODO: this method was formerly public, now that it's not we can change the impl details of + * this class to be based on a Map.Entry[] + *

      + * * @param nameValuePairs underlying List which should be used to implement a SimpleOrderedMap; modifying this List will affect the SimpleOrderedMap. + * @lucene.internal */ - @Deprecated - public SimpleOrderedMap(List nameValuePairs) { + private SimpleOrderedMap(List nameValuePairs) { super(nameValuePairs); } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java index 02ed7be06fa..c2314f8b33f 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleStreamingTest.java @@ -50,7 +50,7 @@ public class SolrExampleStreamingTest extends SolrExampleTests { public Throwable lastError = null; public ErrorTrackingConcurrentUpdateSolrClient(String solrServerUrl, int queueSize, int threadCount) { - super(solrServerUrl, queueSize, threadCount); + super(solrServerUrl, null, queueSize, threadCount, null, false); } @Override diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java index 06ae8b81591..4addce32ce2 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/BasicHttpSolrClientTest.java @@ -839,7 +839,7 @@ public class BasicHttpSolrClientTest extends SolrJettyTestBase { try(HttpSolrClient createdClient = new HttpSolrClient.Builder() .withBaseSolrUrl(jetty.getBaseUrl().toString()) - .withDelegationToken("mydt") + .withKerberosDelegationToken("mydt") .withInvariantParams(SolrTestCaseJ4.params(DelegationTokenHttpSolrClient.DELEGATION_TOKEN_PARAM, "mydt")) .build()) { fail(); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientMultiConstructorTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientMultiConstructorTest.java index 3a132d7a171..e1831f9d34c 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientMultiConstructorTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientMultiConstructorTest.java @@ -67,7 +67,7 @@ public class CloudSolrClientMultiConstructorTest extends LuceneTestCase { clientChroot = "/mychroot"; } - try (CloudSolrClient client = new CloudSolrClient(hosts, clientChroot)) { + try (CloudSolrClient client = (new CloudSolrClient.Builder()).withZkHost(hosts).withZkChroot(clientChroot).build()) { assertEquals(sb.toString(), client.getZkHost()); } @@ -77,6 +77,6 @@ public class CloudSolrClientMultiConstructorTest extends LuceneTestCase { public void testBadChroot() { hosts = new ArrayList<>(); hosts.add("host1:2181"); - new CloudSolrClient(hosts, "foo"); + (new CloudSolrClient.Builder()).withZkHost(hosts).withZkChroot("foo").build(); } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java index 24f08d28ee7..4b061d54e6e 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClientTest.java @@ -194,7 +194,11 @@ public class ConcurrentUpdateSolrClientTest extends SolrJettyTestBase { int cussThreadCount = 2; int cussQueueSize = 10; - try (ConcurrentUpdateSolrClient concurrentClient = new ConcurrentUpdateSolrClient(jetty.getBaseUrl().toString(), cussQueueSize, cussThreadCount)) { + try (ConcurrentUpdateSolrClient concurrentClient + = (new ConcurrentUpdateSolrClient.Builder(jetty.getBaseUrl().toString())) + .withQueueSize(cussQueueSize) + .withThreadCount(cussThreadCount).build()) { + SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", "collection"); concurrentClient.add("collection1", doc); @@ -203,7 +207,11 @@ public class ConcurrentUpdateSolrClientTest extends SolrJettyTestBase { assertEquals(1, concurrentClient.query("collection1", new SolrQuery("id:collection")).getResults().getNumFound()); } - try (ConcurrentUpdateSolrClient concurrentClient = new ConcurrentUpdateSolrClient(jetty.getBaseUrl().toString() + "/collection1", cussQueueSize, cussThreadCount)) { + try (ConcurrentUpdateSolrClient concurrentClient + = (new ConcurrentUpdateSolrClient.Builder(jetty.getBaseUrl().toString() + "/collection1")) + .withQueueSize(cussQueueSize) + .withThreadCount(cussThreadCount).build()) { + assertEquals(1, concurrentClient.query(new SolrQuery("id:collection")).getResults().getNumFound()); } @@ -218,7 +226,10 @@ public class ConcurrentUpdateSolrClientTest extends SolrJettyTestBase { int numRunnables = 5; int expected = numDocs * numRunnables; - try (ConcurrentUpdateSolrClient concurrentClient = new ConcurrentUpdateSolrClient(jetty.getBaseUrl().toString(), cussQueueSize, cussThreadCount)) { + try (ConcurrentUpdateSolrClient concurrentClient + = (new ConcurrentUpdateSolrClient.Builder(jetty.getBaseUrl().toString())) + .withQueueSize(cussQueueSize) + .withThreadCount(cussThreadCount).build()) { concurrentClient.setPollQueueTime(0); // ensure it doesn't block where there's nothing to do yet @@ -246,7 +257,11 @@ public class ConcurrentUpdateSolrClientTest extends SolrJettyTestBase { concurrentClient.shutdownNow(); } - try (ConcurrentUpdateSolrClient concurrentClient = new ConcurrentUpdateSolrClient(jetty.getBaseUrl().toString() + "/collection1", cussQueueSize, cussThreadCount)) { + try (ConcurrentUpdateSolrClient concurrentClient + = (new ConcurrentUpdateSolrClient.Builder(jetty.getBaseUrl().toString() + "/collection1")) + .withQueueSize(cussQueueSize) + .withThreadCount(cussThreadCount).build()) { + assertEquals(expected, concurrentClient.query(new SolrQuery("*:*")).getResults().getNumFound()); } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java index 33781efedf0..9b11783ba4d 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java @@ -862,7 +862,7 @@ public class GraphExpressionTest extends SolrCloudTestCase { JettySolrRunner runner = runners.get(0); String url = runner.getBaseUrl().toString(); - HttpSolrClient client = new HttpSolrClient(url); + HttpSolrClient client = getHttpSolrClient(url); ModifiableSolrParams params = new ModifiableSolrParams(); diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index f76ed316ad7..395791501e1 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -942,6 +942,13 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertTrue(tuples.size() == 3); assertOrder(tuples, 2, 3, 4); + sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream")); + sParams.add("expr", "knn(" + COLLECTIONORALIAS + ", id=\"1\", qf=\"a_t\", k=\"2\", fl=\"id, score\", mintf=\"1\")"); + solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); + tuples = getTuples(solrStream); + assertTrue(tuples.size() == 2); + assertOrder(tuples, 2, 3); + sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream")); sParams.add("expr", "knn(" + COLLECTIONORALIAS + ", id=\"1\", qf=\"a_t\", rows=\"4\", fl=\"id, score\", mintf=\"1\", maxdf=\"0\")"); solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams); @@ -5199,6 +5206,76 @@ public class StreamExpressionTest extends SolrCloudTestCase { } + @Test + public void testEvaluatorOnly() throws Exception { + String expr = "sequence(20, 0, 1)"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", expr); + paramsLoc.set("qt", "/stream"); + + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List sequence = (List)tuples.get(0).get("return-value"); + assertTrue(sequence.size() == 20); + for(int i=0; i tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List hist = (List)tuples.get(0).get("return-value"); + assertTrue(hist.size() == 10); + for(int i=0; i)tuples.get(0).get("return-value"); + assertTrue(hist.size() == 5); + for(int i=0; i tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List out = (List)tuples.get(0).get("return-value"); + assertTrue(out.size() == 6); + assertTrue(out.get(0).intValue() == 1); + assertTrue(out.get(1).intValue() == 2); + assertTrue(out.get(2).intValue() == 3); + assertTrue(out.get(3).intValue() == 300); + assertTrue(out.get(4).intValue() == 2); + assertTrue(out.get(5).intValue() == 500); + + cexpr = "array(1.122, 2.222, 3.333, 300.1, 2.13, 500.23)"; + paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + solrStream = new SolrStream(url, paramsLoc); + solrStream.setStreamContext(context); + tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + out = (List)tuples.get(0).get("return-value"); + assertTrue(out.size() == 6); + assertTrue(out.get(0).doubleValue() == 1.122D); + assertTrue(out.get(1).doubleValue() == 2.222D); + assertTrue(out.get(2).doubleValue() == 3.333D); + assertTrue(out.get(3).doubleValue() == 300.1D); + assertTrue(out.get(4).doubleValue() == 2.13D); + assertTrue(out.get(5).doubleValue() == 500.23D); + } + + @Test + public void testAnova() throws Exception { + String cexpr = "anova(array(1,2,3,5,4,6), array(5,2,3,5,4,6), array(1,2,7,5,4,6))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + Map out = (Map)tuples.get(0).get("return-value"); + assertEquals((double)out.get("p-value"), 0.788298D, .0001); + assertEquals((double)out.get("f-ratio"), 0.24169D, .0001); + } + + @Test + public void testMovingAverage() throws Exception { + String cexpr = "movingAvg(array(1,2,3,4,5,6,7), 4)"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List out = (List)tuples.get(0).get("return-value"); + assertTrue(out.size()==4); + assertEquals((double)out.get(0), 2.5, .0); + assertEquals((double)out.get(1), 3.5, .0); + assertEquals((double)out.get(2), 4.5, .0); + assertEquals((double)out.get(3), 5.5, .0); + } + + @Test public void testScale() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); @@ -5864,6 +6100,100 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertTrue(prediction == 600.0D); } + + @Test + public void testFinddelay() throws Exception { + UpdateRequest updateRequest = new UpdateRequest(); + + //Pad column 1 with three zeros. + updateRequest.add(id, "10", "price_f", "0.0", "col_s", "a", "order_i", "0"); + updateRequest.add(id, "11", "price_f", "0.0", "col_s", "a", "order_i", "0"); + updateRequest.add(id, "12", "price_f", "0.0", "col_s", "a", "order_i", "0"); + updateRequest.add(id, "1", "price_f", "100.0", "col_s", "a", "order_i", "1"); + updateRequest.add(id, "2", "price_f", "200.0", "col_s", "a", "order_i", "2"); + updateRequest.add(id, "3", "price_f", "300.0", "col_s", "a", "order_i", "3"); + updateRequest.add(id, "4", "price_f", "100.0", "col_s", "a", "order_i", "4"); + updateRequest.add(id, "5", "price_f", "200.0", "col_s", "a", "order_i", "5"); + updateRequest.add(id, "6", "price_f", "400.0", "col_s", "a", "order_i", "6"); + updateRequest.add(id, "7", "price_f", "600.0", "col_s", "a", "order_i", "7"); + + updateRequest.add(id, "100", "price_f", "200.0", "col_s", "b", "order_i", "1"); + updateRequest.add(id, "101", "price_f", "400.0", "col_s", "b", "order_i", "2"); + updateRequest.add(id, "102", "price_f", "600.0", "col_s", "b", "order_i", "3"); + updateRequest.add(id, "103", "price_f", "200.0", "col_s", "b", "order_i", "4"); + updateRequest.add(id, "104", "price_f", "400.0", "col_s", "b", "order_i", "5"); + updateRequest.add(id, "105", "price_f", "800.0", "col_s", "b", "order_i", "6"); + updateRequest.add(id, "106", "price_f", "1200.0", "col_s", "b", "order_i", "7"); + + + updateRequest.add(id, "200", "price_f", "-200.0", "col_s", "c", "order_i", "1"); + updateRequest.add(id, "301", "price_f", "-400.0", "col_s", "c", "order_i", "2"); + updateRequest.add(id, "402", "price_f", "-600.0", "col_s", "c", "order_i", "3"); + updateRequest.add(id, "503", "price_f", "-200.0", "col_s", "c", "order_i", "4"); + updateRequest.add(id, "604", "price_f", "-400.0", "col_s", "c", "order_i", "5"); + updateRequest.add(id, "705", "price_f", "-800.0", "col_s", "c", "order_i", "6"); + updateRequest.add(id, "806", "price_f", "-1200.0", "col_s", "c", "order_i", "7"); + updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); + + String expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; + String expr2 = "search("+COLLECTIONORALIAS+", q=\"col_s:b\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; + + String cexpr = "let(a="+expr1+", b="+expr2+", c=col(a, price_f), d=col(b, price_f), tuple(delay=finddelay(c, d)))"; + + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + Tuple tuple = tuples.get(0); + long delay = tuple.getLong("delay"); + assert(delay == 3); + + expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fq=\"id:(1 2 3 4 5 6 7)\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; + expr2 = "search("+COLLECTIONORALIAS+", q=\"col_s:b\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; + + cexpr = "let(a="+expr1+", b="+expr2+", c=col(a, price_f), d=col(b, price_f), tuple(delay=finddelay(c, d)))"; + + paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + + solrStream = new SolrStream(url, paramsLoc); + + solrStream.setStreamContext(context); + tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + tuple = tuples.get(0); + delay = tuple.getLong("delay"); + assert(delay == 0); + + //Test negative correlation. + expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fq=\"id:(1 2 3 4 5 6 7 11 12)\",fl=\"price_f, order_i\", sort=\"order_i asc\")"; + expr2 = "search("+COLLECTIONORALIAS+", q=\"col_s:c\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; + + cexpr = "let(a="+expr1+", b="+expr2+", c=col(a, price_f), d=col(b, price_f), tuple(delay=finddelay(c, d)))"; + + paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + + solrStream = new SolrStream(url, paramsLoc); + + solrStream.setStreamContext(context); + tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + tuple = tuples.get(0); + delay = tuple.getLong("delay"); + assert(delay == 2); + } + + @Test public void testDescribe() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index a55e2ead159..419f94f0cf3 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -2222,7 +2222,14 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { public static Object skewed(Object likely, Object unlikely) { return (0 == TestUtil.nextInt(random(), 0, 9)) ? unlikely : likely; } - + + /** + * A variant of {@link org.apache.solr.client.solrj.impl.CloudSolrClient.Builder} that will randomize which nodes recieve updates + * unless otherwise specified by the caller. + * + * @see #sendDirectUpdatesToAnyShardReplica + * @see #sendDirectUpdatesToShardLeadersOnly + */ public static class CloudSolrClientBuilder extends CloudSolrClient.Builder { private boolean configuredDUTflag = false; @@ -2267,30 +2274,35 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { } } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.CloudSolrClient.Builder} class directly + */ public static CloudSolrClient getCloudSolrClient(String zkHost) { - if (random().nextBoolean()) { - return new CloudSolrClient(zkHost); - } return new CloudSolrClientBuilder() .withZkHost(zkHost) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.CloudSolrClient.Builder} class directly + */ public static CloudSolrClient getCloudSolrClient(String zkHost, HttpClient httpClient) { - if (random().nextBoolean()) { - return new CloudSolrClient(zkHost, httpClient); - } return new CloudSolrClientBuilder() .withZkHost(zkHost) .withHttpClient(httpClient) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.CloudSolrClient.Builder} class directly + */ public static CloudSolrClient getCloudSolrClient(String zkHost, boolean shardLeadersOnly) { - if (random().nextBoolean()) { - return new CloudSolrClient(zkHost, shardLeadersOnly); - } - if (shardLeadersOnly) { return new CloudSolrClientBuilder() .withZkHost(zkHost) @@ -2303,11 +2315,12 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.CloudSolrClient.Builder} class directly + */ public static CloudSolrClient getCloudSolrClient(String zkHost, boolean shardLeadersOnly, HttpClient httpClient) { - if (random().nextBoolean()) { - return new CloudSolrClient(zkHost, shardLeadersOnly, httpClient); - } - if (shardLeadersOnly) { return new CloudSolrClientBuilder() .withZkHost(zkHost) @@ -2322,20 +2335,24 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient.Builder} class directly + */ public static ConcurrentUpdateSolrClient getConcurrentUpdateSolrClient(String baseSolrUrl, int queueSize, int threadCount) { - if (random().nextBoolean()) { - return new ConcurrentUpdateSolrClient(baseSolrUrl, queueSize, threadCount); - } return new ConcurrentUpdateSolrClient.Builder(baseSolrUrl) .withQueueSize(queueSize) .withThreadCount(threadCount) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient.Builder} class directly + */ public static ConcurrentUpdateSolrClient getConcurrentUpdateSolrClient(String baseSolrUrl, HttpClient httpClient, int queueSize, int threadCount) { - if (random().nextBoolean()) { - return new ConcurrentUpdateSolrClient(baseSolrUrl, httpClient, queueSize, threadCount); - } return new ConcurrentUpdateSolrClient.Builder(baseSolrUrl) .withHttpClient(httpClient) .withQueueSize(queueSize) @@ -2343,30 +2360,35 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.LBHttpSolrClient.Builder} class directly + */ public static LBHttpSolrClient getLBHttpSolrClient(HttpClient client, String... solrUrls) { - if (random().nextBoolean()) { - return new LBHttpSolrClient(client, solrUrls); - } - return new LBHttpSolrClient.Builder() .withHttpClient(client) .withBaseSolrUrls(solrUrls) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.LBHttpSolrClient.Builder} class directly + */ public static LBHttpSolrClient getLBHttpSolrClient(String... solrUrls) throws MalformedURLException { - if (random().nextBoolean()) { - return new LBHttpSolrClient(solrUrls); - } return new LBHttpSolrClient.Builder() .withBaseSolrUrls(solrUrls) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.HttpSolrClient.Builder} class directly + */ public static HttpSolrClient getHttpSolrClient(String url, HttpClient httpClient, ResponseParser responseParser, boolean compression) { - if(random().nextBoolean()) { - return new HttpSolrClient(url, httpClient, responseParser, compression); - } return new Builder(url) .withHttpClient(httpClient) .withResponseParser(responseParser) @@ -2374,29 +2396,35 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.HttpSolrClient.Builder} class directly + */ public static HttpSolrClient getHttpSolrClient(String url, HttpClient httpClient, ResponseParser responseParser) { - if(random().nextBoolean()) { - return new HttpSolrClient(url, httpClient, responseParser); - } return new Builder(url) .withHttpClient(httpClient) .withResponseParser(responseParser) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.HttpSolrClient.Builder} class directly + */ public static HttpSolrClient getHttpSolrClient(String url, HttpClient httpClient) { - if(random().nextBoolean()) { - return new HttpSolrClient(url, httpClient); - } return new Builder(url) .withHttpClient(httpClient) .build(); } + /** + * This method may randomize unspecified aspects of the resulting SolrClient. + * Tests that do not wish to have any randomized behavior should use the + * {@link org.apache.solr.client.solrj.impl.HttpSolrClient.Builder} class directly + */ public static HttpSolrClient getHttpSolrClient(String url) { - if(random().nextBoolean()) { - return new HttpSolrClient(url); - } return new Builder(url) .build(); } diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java index 180cf6e88af..42c4577d972 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/SolrCloudTestCase.java @@ -22,11 +22,13 @@ import java.nio.charset.Charset; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Predicate; @@ -256,13 +258,15 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 { */ protected void waitForState(String message, String collection, CollectionStatePredicate predicate) { AtomicReference state = new AtomicReference<>(); + AtomicReference> liveNodesLastSeen = new AtomicReference<>(); try { cluster.getSolrClient().waitForState(collection, DEFAULT_TIMEOUT, TimeUnit.SECONDS, (n, c) -> { state.set(c); + liveNodesLastSeen.set(n); return predicate.matches(n, c); }); } catch (Exception e) { - fail(message + "\n" + e.getMessage() + "\nLast available state: " + state.get()); + fail(message + "\n" + e.getMessage() + "\nLive Nodes: " + Arrays.toString(liveNodesLastSeen.get().toArray()) + "\nLast available state: " + state.get()); } }

      Attribute name