diff --git a/dev-tools/scripts/checkJavaDocs.py b/dev-tools/scripts/checkJavaDocs.py
index ae2b440da82..355bbdd2b4f 100644
--- a/dev-tools/scripts/checkJavaDocs.py
+++ b/dev-tools/scripts/checkJavaDocs.py
@@ -296,7 +296,7 @@ def checkSummary(fullPath):
print()
print(fullPath)
printed = True
- print(' missing: %s' % unescapeHTML(lastHREF))
+ print(' missing description: %s' % unescapeHTML(lastHREF))
anyMissing = True
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
if not printed:
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 879d754c038..0c1d3519a9f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -57,6 +57,8 @@ API Changes
instead, which derived from the UH. WholeBreakIterator and
CustomSeparatorBreakIterator were moved to UH's package. (David Smiley)
+* LUCENE-7850: Removed support for legacy numerics. (Adrien Grand)
+
Bug Fixes
* LUCENE-7626: IndexWriter will no longer accept broken token offsets
@@ -88,6 +90,10 @@ Optimizations
values using different numbers of bits per value if this proves to save
storage. (Adrien Grand)
+* LUCENE-7845: Enhance spatial-extras RecursivePrefixTreeStrategy queries when the
+ query is a point (for 2D) or a is a simple date interval (e.g. 1 month). When
+ the strategy is marked as pointsOnly, the results is a TermQuery. (David Smiley)
+
Other
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
@@ -99,6 +105,8 @@ Other
* LUCENE-7753: Make fields static when possible.
(Daniel Jelinski via Adrien Grand)
+* LUCENE-7540: Upgrade ICU to 59.1 (Mike McCandless, Jim Ferenczi)
+
======================= Lucene 6.7.0 =======================
Other
@@ -107,6 +115,10 @@ Other
from methods that don't declare them ("sneaky throw" hack). (Robert Muir,
Uwe Schindler, Dawid Weiss)
+Improvements
+
+* LUCENE-7841: Normalize ґ to г in Ukrainian analyzer. (Andriy Rysin via Dawid Weiss)
+
======================= Lucene 6.6.0 =======================
New Features
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index c7936a4bd7a..89b2d7623a4 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -74,3 +74,9 @@ collecting TopDocs for each group, but instead takes a GroupReducer that will
perform any type of reduction on the top groups collected on a first-pass. To
reproduce the old behaviour of SecondPassGroupingCollector, you should instead
use TopGroupsCollector.
+
+## Removed legacy numerics (LUCENE-7850)
+
+Support for legacy numerics has been removed since legacy numerics had been
+deprecated since Lucene 6.0. Points should be used instead, see
+org.apache.lucene.index.PointValues for an introduction.
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
index 209ecee4961..eb08eeaa8c6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizer.java
@@ -24,6 +24,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeFactory;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
* Emits the entire input as a single token.
*/
@@ -41,16 +43,16 @@ public final class KeywordTokenizer extends Tokenizer {
}
public KeywordTokenizer(int bufferSize) {
- if (bufferSize <= 0) {
- throw new IllegalArgumentException("bufferSize must be > 0");
+ if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
}
termAtt.resizeBuffer(bufferSize);
}
public KeywordTokenizer(AttributeFactory factory, int bufferSize) {
super(factory);
- if (bufferSize <= 0) {
- throw new IllegalArgumentException("bufferSize must be > 0");
+ if (bufferSize > MAX_TOKEN_LENGTH_LIMIT || bufferSize <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + bufferSize);
}
termAtt.resizeBuffer(bufferSize);
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
index 3654f67beab..86f65d60246 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/KeywordTokenizerFactory.java
@@ -16,26 +16,39 @@
*/
package org.apache.lucene.analysis.core;
-
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
* Factory for {@link KeywordTokenizer}.
*
maxTokenLen: max token length, should be greater than 0 and less than
+ * MAX_TOKEN_LENGTH_LIMIT (1024*1024). It is rare to need to change this
+ * else {@link KeywordTokenizer}::DEFAULT_BUFFER_SIZE
+ *
*/
public class KeywordTokenizerFactory extends TokenizerFactory {
+ private final int maxTokenLen;
/** Creates a new KeywordTokenizerFactory */
public KeywordTokenizerFactory(Map args) {
super(args);
+ maxTokenLen = getInt(args, "maxTokenLen", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+ if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -43,6 +56,6 @@ public class KeywordTokenizerFactory extends TokenizerFactory {
@Override
public KeywordTokenizer create(AttributeFactory factory) {
- return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
+ return new KeywordTokenizer(factory, maxTokenLen);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
index df41b3777cb..8fb7d0e3f14 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizer.java
@@ -50,6 +50,20 @@ public class LetterTokenizer extends CharTokenizer {
super(factory);
}
+ /**
+ * Construct a new LetterTokenizer using a given
+ * {@link org.apache.lucene.util.AttributeFactory}.
+ *
+ * @param factory the attribute factory to use for this {@link Tokenizer}
+ * @param maxTokenLen maximum token length the tokenizer will emit.
+ * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+ * @throws IllegalArgumentException if maxTokenLen is invalid.
+
+ */
+ public LetterTokenizer(AttributeFactory factory, int maxTokenLen) {
+ super(factory, maxTokenLen);
+ }
+
/** Collects only characters which satisfy
* {@link Character#isLetter(int)}.*/
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
index 828d6cf3fed..41ada68ba52 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LetterTokenizerFactory.java
@@ -17,25 +17,40 @@
package org.apache.lucene.analysis.core;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import java.util.Map;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
* Factory for {@link LetterTokenizer}.
*
maxTokenLen: max token length, must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ * It is rare to need to change this
+ * else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN
+ *
*/
public class LetterTokenizerFactory extends TokenizerFactory {
+ private final int maxTokenLen;
/** Creates a new LetterTokenizerFactory */
public LetterTokenizerFactory(Map args) {
super(args);
+ maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+ if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -43,6 +58,6 @@ public class LetterTokenizerFactory extends TokenizerFactory {
@Override
public LetterTokenizer create(AttributeFactory factory) {
- return new LetterTokenizer(factory);
+ return new LetterTokenizer(factory, maxTokenLen);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
index 982d356533e..26b8747962b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
@@ -50,6 +50,19 @@ public final class LowerCaseTokenizer extends LetterTokenizer {
super(factory);
}
+ /**
+ * Construct a new LowerCaseTokenizer using a given
+ * {@link org.apache.lucene.util.AttributeFactory}.
+ *
+ * @param factory the attribute factory to use for this {@link Tokenizer}
+ * @param maxTokenLen maximum token length the tokenizer will emit.
+ * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+ * @throws IllegalArgumentException if maxTokenLen is invalid.
+ */
+ public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
+ super(factory, maxTokenLen);
+ }
+
/** Converts char to lower case
* {@link Character#toLowerCase(int)}.*/
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
index 3e29161a923..a3e06c7a608 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
@@ -25,20 +26,36 @@ import org.apache.lucene.util.AttributeFactory;
import java.util.HashMap;
import java.util.Map;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
- * Factory for {@link LowerCaseTokenizer}.
+ * Factory for {@link LowerCaseTokenizer}.
*
maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ * It is rare to need to change this
+ * else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN
+ *
*/
public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
-
- /** Creates a new LowerCaseTokenizerFactory */
- public LowerCaseTokenizerFactory(Map args) {
+ private final int maxTokenLen;
+
+ /**
+ * Creates a new LowerCaseTokenizerFactory
+ */
+ public LowerCaseTokenizerFactory(Map args) {
super(args);
+ maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+ if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -46,11 +63,13 @@ public class LowerCaseTokenizerFactory extends TokenizerFactory implements Multi
@Override
public LowerCaseTokenizer create(AttributeFactory factory) {
- return new LowerCaseTokenizer(factory);
+ return new LowerCaseTokenizer(factory, maxTokenLen);
}
@Override
public AbstractAnalysisFactory getMultiTermComponent() {
- return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs()));
+ Map map = new HashMap<>(getOriginalArgs());
+ map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
+ return new LowerCaseFilterFactory(map);
}
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
index 37e9d2b8a22..b6b8b609863 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@@ -58,7 +58,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*
*
wordset - This is the default format, which supports one word per
* line (including any intra-word whitespace) and allows whole line comments
- * begining with the "#" character. Blank lines are ignored. See
+ * beginning with the "#" character. Blank lines are ignored. See
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
*
*
snowball - This format allows for multiple words specified on each
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java
index 5e4313f6c51..00c181f1262 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UnicodeWhitespaceTokenizer.java
@@ -47,6 +47,19 @@ public final class UnicodeWhitespaceTokenizer extends CharTokenizer {
public UnicodeWhitespaceTokenizer(AttributeFactory factory) {
super(factory);
}
+
+ /**
+ * Construct a new UnicodeWhitespaceTokenizer using a given
+ * {@link org.apache.lucene.util.AttributeFactory}.
+ *
+ * @param factory the attribute factory to use for this {@link Tokenizer}
+ * @param maxTokenLen maximum token length the tokenizer will emit.
+ * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+ * @throws IllegalArgumentException if maxTokenLen is invalid.
+ */
+ public UnicodeWhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
+ super(factory, maxTokenLen);
+ }
/** Collects only characters which do not satisfy Unicode's WHITESPACE property. */
@Override
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
index 70f2d620bbd..065522761d0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizer.java
@@ -46,6 +46,19 @@ public final class WhitespaceTokenizer extends CharTokenizer {
public WhitespaceTokenizer(AttributeFactory factory) {
super(factory);
}
+
+ /**
+ * Construct a new WhitespaceTokenizer using a given
+ * {@link org.apache.lucene.util.AttributeFactory}.
+ *
+ * @param factory the attribute factory to use for this {@link Tokenizer}
+ * @param maxTokenLen maximum token length the tokenizer will emit.
+ * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+ * @throws IllegalArgumentException if maxTokenLen is invalid.
+ */
+ public WhitespaceTokenizer(AttributeFactory factory, int maxTokenLen) {
+ super(factory, maxTokenLen);
+ }
/** Collects only characters which do not satisfy
* {@link Character#isWhitespace(int)}.*/
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
index fd38b632adc..29e9ed519fa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/WhitespaceTokenizerFactory.java
@@ -22,15 +22,18 @@ import java.util.Collection;
import java.util.Map;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
* Factory for {@link WhitespaceTokenizer}.
*
rule: either "java" for {@link WhitespaceTokenizer}
* or "unicode" for {@link UnicodeWhitespaceTokenizer}
+ *
maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
+ * It is rare to need to change this
+ * else {@link CharTokenizer}::DEFAULT_MAX_TOKEN_LEN
*
*/
public class WhitespaceTokenizerFactory extends TokenizerFactory {
@@ -46,13 +52,17 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
private static final Collection RULE_NAMES = Arrays.asList(RULE_JAVA, RULE_UNICODE);
private final String rule;
+ private final int maxTokenLen;
/** Creates a new WhitespaceTokenizerFactory */
public WhitespaceTokenizerFactory(Map args) {
super(args);
rule = get(args, "rule", RULE_NAMES, RULE_JAVA);
-
+ maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
+ if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+ }
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
@@ -62,9 +72,9 @@ public class WhitespaceTokenizerFactory extends TokenizerFactory {
public Tokenizer create(AttributeFactory factory) {
switch (rule) {
case RULE_JAVA:
- return new WhitespaceTokenizer(factory);
+ return new WhitespaceTokenizer(factory, maxTokenLen);
case RULE_UNICODE:
- return new UnicodeWhitespaceTokenizer(factory);
+ return new UnicodeWhitespaceTokenizer(factory, maxTokenLen);
default:
throw new AssertionError();
}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index 13289bee1bd..ff9d6ff93c1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.AttributeFactory;
+import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
+
/**
* An abstract base class for simple, character-oriented tokenizers.
*
@@ -50,6 +52,7 @@ public abstract class CharTokenizer extends Tokenizer {
* Creates a new {@link CharTokenizer} instance
*/
public CharTokenizer() {
+ this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
}
/**
@@ -60,6 +63,23 @@ public abstract class CharTokenizer extends Tokenizer {
*/
public CharTokenizer(AttributeFactory factory) {
super(factory);
+ this.maxTokenLen = DEFAULT_MAX_WORD_LEN;
+ }
+
+ /**
+ * Creates a new {@link CharTokenizer} instance
+ *
+ * @param factory the attribute factory to use for this {@link Tokenizer}
+ * @param maxTokenLen maximum token length the tokenizer will emit.
+ * Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
+ * @throws IllegalArgumentException if maxTokenLen is invalid.
+ */
+ public CharTokenizer(AttributeFactory factory, int maxTokenLen) {
+ super(factory);
+ if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
+ throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
+ }
+ this.maxTokenLen = maxTokenLen;
}
/**
@@ -193,9 +213,10 @@ public abstract class CharTokenizer extends Tokenizer {
}
private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
- private static final int MAX_WORD_LEN = 255;
+ public static final int DEFAULT_MAX_WORD_LEN = 255;
private static final int IO_BUFFER_SIZE = 4096;
-
+ private final int maxTokenLen;
+
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@@ -256,7 +277,7 @@ public abstract class CharTokenizer extends Tokenizer {
}
end += charCount;
length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
- if (length >= MAX_WORD_LEN) { // buffer overflow! make sure to check for >= surrogate pair could break == test
+ if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
break;
}
} else if (length > 0) { // at non-Letter w/ chars
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java
index 75070d10700..00ee311a4b0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java
@@ -24,15 +24,15 @@ import org.apache.lucene.util.SparseFixedBitSet;
/**
* This file contains unicode properties used by various {@link CharTokenizer}s.
- * The data was created using ICU4J v56.1.0.0
+ * The data was created using ICU4J v59.1.0.0
*
- * Unicode version: 8.0.0.0
+ * Unicode version: 9.0.0.0
*/
public final class UnicodeProps {
private UnicodeProps() {}
/** Unicode version that was used to generate this file: {@value} */
- public static final String UNICODE_VERSION = "8.0.0.0";
+ public static final String UNICODE_VERSION = "9.0.0.0";
/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE = createBits(
diff --git a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd
index daca530737f..f413afc2f9a 100644
--- a/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd
+++ b/lucene/analysis/common/src/resources/org/apache/lucene/analysis/compound/hyphenation/hyphenation.dtd
@@ -53,7 +53,7 @@
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd
index 15bb8ca60ed..fb3db16cf67 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/hyphenation.dtd
@@ -54,7 +54,7 @@
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java
new file mode 100644
index 00000000000..3f03a008c01
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordTokenizer.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.core;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.AttributeFactory;
+
+public class TestKeywordTokenizer extends BaseTokenStreamTestCase {
+
+ public void testSimple() throws IOException {
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ KeywordTokenizer tokenizer = new KeywordTokenizer();
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"Tokenizer \ud801\udc1ctest"});
+ }
+
+ public void testFactory() {
+ Map args = new HashMap<>();
+ KeywordTokenizerFactory factory = new KeywordTokenizerFactory(args);
+ AttributeFactory attributeFactory = newAttributeFactory();
+ Tokenizer tokenizer = factory.create(attributeFactory);
+ assertEquals(KeywordTokenizer.class, tokenizer.getClass());
+ }
+
+ private Map makeArgs(String... args) {
+ Map ret = new HashMap<>();
+ for (int idx = 0; idx < args.length; idx += 2) {
+ ret.put(args[idx], args[idx + 1]);
+ }
+ return ret;
+ }
+
+ public void testParamsFactory() throws IOException {
+ // negative maxTokenLen
+ IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
+ new KeywordTokenizerFactory(makeArgs("maxTokenLen", "-1")));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
+
+ // zero maxTokenLen
+ iae = expectThrows(IllegalArgumentException.class, () ->
+ new KeywordTokenizerFactory(makeArgs("maxTokenLen", "0")));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
+
+ // Added random param, should throw illegal error
+ iae = expectThrows(IllegalArgumentException.class, () ->
+ new KeywordTokenizerFactory(makeArgs("maxTokenLen", "255", "randomParam", "rValue")));
+ assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
+
+ // tokeniser will never split, no matter what is passed,
+ // but the buffer will not be more than length of the token
+
+ KeywordTokenizerFactory factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "5"));
+ AttributeFactory attributeFactory = newAttributeFactory();
+ Tokenizer tokenizer = factory.create(attributeFactory);
+ StringReader reader = new StringReader("Tokenizertest");
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"Tokenizertest"});
+
+ // tokeniser will never split, no matter what is passed,
+ // but the buffer will not be more than length of the token
+ factory = new KeywordTokenizerFactory(makeArgs("maxTokenLen", "2"));
+ attributeFactory = newAttributeFactory();
+ tokenizer = factory.create(attributeFactory);
+ reader = new StringReader("Tokenizer\u00A0test");
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"Tokenizer\u00A0test"});
+ }
+}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java
index acdb670f7ea..16089e9eda9 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUnicodeWhitespaceTokenizer.java
@@ -54,4 +54,55 @@ public class TestUnicodeWhitespaceTokenizer extends BaseTokenStreamTestCase {
assertEquals(UnicodeWhitespaceTokenizer.class, tokenizer.getClass());
}
+ private Map makeArgs(String... args) {
+ Map ret = new HashMap<>();
+ for (int idx = 0; idx < args.length; idx += 2) {
+ ret.put(args[idx], args[idx + 1]);
+ }
+ return ret;
+ }
+
+ public void testParamsFactory() throws IOException {
+
+
+ // negative maxTokenLen
+ IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () ->
+ new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "-1")));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", iae.getMessage());
+
+ // zero maxTokenLen
+ iae = expectThrows(IllegalArgumentException.class, () ->
+ new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "0")));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", iae.getMessage());
+
+ // Added random param, should throw illegal error
+ iae = expectThrows(IllegalArgumentException.class, () ->
+ new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "255", "randomParam", "rValue")));
+ assertEquals("Unknown parameters: {randomParam=rValue}", iae.getMessage());
+
+ // tokeniser will split at 5, Token | izer, no matter what happens
+ WhitespaceTokenizerFactory factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "5"));
+ AttributeFactory attributeFactory = newAttributeFactory();
+ Tokenizer tokenizer = factory.create(attributeFactory);
+ StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"Token", "izer", "\ud801\udc1ctes", "t"});
+
+ // tokeniser will split at 2, To | ke | ni | ze | r, no matter what happens
+ factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "2"));
+ attributeFactory = newAttributeFactory();
+ tokenizer = factory.create(attributeFactory);
+ reader = new StringReader("Tokenizer\u00A0test");
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"To", "ke", "ni", "ze", "r", "te", "st"});
+
+ // tokeniser will split at 10, no matter what happens,
+ // but tokens' length are less than that
+ factory = new WhitespaceTokenizerFactory(makeArgs("rule", "unicode", "maxTokenLen", "10"));
+ attributeFactory = newAttributeFactory();
+ tokenizer = factory.create(attributeFactory);
+ reader = new StringReader("Tokenizer\u00A0test");
+ tokenizer.setReader(reader);
+ assertTokenStreamContents(tokenizer, new String[]{"Tokenizer", "test"});
+ }
}
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
index 783fc3e4b51..4596608b747 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
@@ -25,8 +25,10 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.util.TestUtil;
@@ -89,6 +91,99 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
+
+ /*
+ * tests the max word length passed as parameter - tokenizer will split at the passed position char no matter what happens
+ */
+ public void testCustomMaxTokenLength() throws IOException {
+
+ StringBuilder builder = new StringBuilder();
+ for (int i = 0; i < 100; i++) {
+ builder.append("A");
+ }
+ Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
+ // Tricky, passing two copies of the string to the reader....
+ tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT),
+ builder.toString().toLowerCase(Locale.ROOT) });
+
+ Exception e = expectThrows(IllegalArgumentException.class, () ->
+ new LowerCaseTokenizer(newAttributeFactory(), -1));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
+
+ tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
+ tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString(), builder.toString()});
+
+
+ // Let's test that we can get a token longer than 255 through.
+ builder.setLength(0);
+ for (int i = 0; i < 500; i++) {
+ builder.append("Z");
+ }
+ tokenizer = new LetterTokenizer(newAttributeFactory(), 500);
+ tokenizer.setReader(new StringReader(builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+
+ // Just to be sure what is happening here, token lengths of zero make no sense,
+ // Let's try the edge cases, token > I/O buffer (4096)
+ builder.setLength(0);
+ for (int i = 0; i < 600; i++) {
+ builder.append("aUrOkIjq"); // 600 * 8 = 4800 chars.
+ }
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new LowerCaseTokenizer(newAttributeFactory(), 0));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
+
+ tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
+ tokenizer.setReader(new StringReader(builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
+
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new KeywordTokenizer(newAttributeFactory(), 0));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new KeywordTokenizer(newAttributeFactory(), 10_000_000));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
+
+
+ tokenizer = new KeywordTokenizer(newAttributeFactory(), 4800);
+ tokenizer.setReader(new StringReader(builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new LetterTokenizer(newAttributeFactory(), 0));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new LetterTokenizer(newAttributeFactory(), 2_000_000));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 2000000", e.getMessage());
+
+ tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
+ tokenizer.setReader(new StringReader(builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new WhitespaceTokenizer(newAttributeFactory(), 0));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
+
+ e = expectThrows(IllegalArgumentException.class, () ->
+ new WhitespaceTokenizer(newAttributeFactory(), 3_000_000));
+ assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 3000000", e.getMessage());
+
+ tokenizer = new WhitespaceTokenizer(newAttributeFactory(), 4800);
+ tokenizer.setReader(new StringReader(builder.toString()));
+ assertTokenStreamContents(tokenizer, new String[]{builder.toString()});
+
+ }
/*
* tests the max word length of 255 with a surrogate pair at position 255
diff --git a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
index 3772daf1aeb..eb5b78e0ea2 100644
--- a/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
@@ -168,11 +168,14 @@ FFE3>
1134D>
11366..1136C>
11370..11374>
+11442>
+11446>
114C2..114C3>
115BF..115C0>
1163F>
116B6..116B7>
1172B>
+11C3F>
16AF0..16AF4>
16F8F..16F9F>
1D167..1D169>
@@ -181,6 +184,8 @@ FFE3>
1D185..1D18B>
1D1AA..1D1AD>
1E8D0..1E8D6>
+1E944..1E946>
+1E948..1E94A>
# Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter
diff --git a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
index 62e6aefdf1c..fb8cf1ac66b 100644
--- a/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
+++ b/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
@@ -510,6 +510,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
112F7>0037 # KHUDAWADI DIGIT SEVEN
112F8>0038 # KHUDAWADI DIGIT EIGHT
112F9>0039 # KHUDAWADI DIGIT NINE
+11450>0030 # NEWA DIGIT ZERO
+11451>0031 # NEWA DIGIT ONE
+11452>0032 # NEWA DIGIT TWO
+11453>0033 # NEWA DIGIT THREE
+11454>0034 # NEWA DIGIT FOUR
+11455>0035 # NEWA DIGIT FIVE
+11456>0036 # NEWA DIGIT SIX
+11457>0037 # NEWA DIGIT SEVEN
+11458>0038 # NEWA DIGIT EIGHT
+11459>0039 # NEWA DIGIT NINE
114D0>0030 # TIRHUTA DIGIT ZERO
114D1>0031 # TIRHUTA DIGIT ONE
114D2>0032 # TIRHUTA DIGIT TWO
@@ -560,6 +570,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
118E7>0037 # WARANG CITI DIGIT SEVEN
118E8>0038 # WARANG CITI DIGIT EIGHT
118E9>0039 # WARANG CITI DIGIT NINE
+11C50>0030 # BHAIKSUKI DIGIT ZERO
+11C51>0031 # BHAIKSUKI DIGIT ONE
+11C52>0032 # BHAIKSUKI DIGIT TWO
+11C53>0033 # BHAIKSUKI DIGIT THREE
+11C54>0034 # BHAIKSUKI DIGIT FOUR
+11C55>0035 # BHAIKSUKI DIGIT FIVE
+11C56>0036 # BHAIKSUKI DIGIT SIX
+11C57>0037 # BHAIKSUKI DIGIT SEVEN
+11C58>0038 # BHAIKSUKI DIGIT EIGHT
+11C59>0039 # BHAIKSUKI DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
@@ -580,4 +600,14 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
16B59>0039 # PAHAWH HMONG DIGIT NINE
+1E950>0030 # ADLAM DIGIT ZERO
+1E951>0031 # ADLAM DIGIT ONE
+1E952>0032 # ADLAM DIGIT TWO
+1E953>0033 # ADLAM DIGIT THREE
+1E954>0034 # ADLAM DIGIT FOUR
+1E955>0035 # ADLAM DIGIT FIVE
+1E956>0036 # ADLAM DIGIT SIX
+1E957>0037 # ADLAM DIGIT SEVEN
+1E958>0038 # ADLAM DIGIT EIGHT
+1E959>0039 # ADLAM DIGIT NINE
diff --git a/lucene/analysis/icu/src/data/utr30/nfc.txt b/lucene/analysis/icu/src/data/utr30/nfc.txt
index 5b7374f2cd5..5f9b1821760 100644
--- a/lucene/analysis/icu/src/data/utr30/nfc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfc.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2014, International Business Machines
+# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfc.txt
@@ -7,7 +7,7 @@
#
# Complete data for Unicode NFC normalization.
-* Unicode 7.0.0
+* Unicode 9.0.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
@@ -129,6 +129,8 @@
0825..0827:230
0829..082D:230
0859..085B:220
+08D4..08E1:230
+08E3:220
08E4..08E5:230
08E6:220
08E7..08E8:230
@@ -232,6 +234,7 @@
1DCF:220
1DD0:202
1DD1..1DF5:230
+1DFB:230
1DFC:233
1DFD:220
1DFE:230
@@ -260,7 +263,7 @@
3099..309A:8
A66F:230
A674..A67D:230
-A69F:230
+A69E..A69F:230
A6F0..A6F1:230
A806:9
A8C4:9
@@ -280,6 +283,7 @@ ABED:9
FB1E:26
FE20..FE26:230
FE27..FE2D:220
+FE2E..FE2F:230
101FD:220
102E0:220
10376..1037A:230
@@ -299,6 +303,7 @@ FE27..FE2D:220
11133..11134:9
11173:7
111C0:9
+111CA:7
11235:9
11236:7
112E9:7
@@ -307,6 +312,8 @@ FE27..FE2D:220
1134D:9
11366..1136C:230
11370..11374:230
+11442:9
+11446:7
114C2:9
114C3:7
115BF:9
@@ -314,6 +321,8 @@ FE27..FE2D:220
1163F:9
116B6:9
116B7:7
+1172B:9
+11C3F:9
16AF0..16AF4:1
16B30..16B36:230
1BC9E:1
@@ -326,7 +335,14 @@ FE27..FE2D:220
1D18A..1D18B:220
1D1AA..1D1AD:230
1D242..1D244:230
+1E000..1E006:230
+1E008..1E018:230
+1E01B..1E021:230
+1E023..1E024:230
+1E026..1E02A:230
1E8D0..1E8D6:220
+1E944..1E949:230
+1E94A:7
# Canonical decomposition mappings
00C0>0041 0300 # one-way: diacritic 0300
diff --git a/lucene/analysis/icu/src/data/utr30/nfkc.txt b/lucene/analysis/icu/src/data/utr30/nfkc.txt
index fea41298bc0..f51fa5db4b7 100644
--- a/lucene/analysis/icu/src/data/utr30/nfkc.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc.txt
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2014, International Business Machines
+# Copyright (C) 1999-2016, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfkc.txt
@@ -11,7 +11,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
-* Unicode 7.0.0
+* Unicode 9.0.0
00A0>0020
00A8>0020 0308
@@ -3675,6 +3675,7 @@ FFEE>25CB
1F238>7533
1F239>5272
1F23A>55B6
+1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015
diff --git a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
index b24b4b277fa..7f33df58c84 100644
--- a/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
+++ b/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
@@ -1,5 +1,5 @@
# Unicode Character Database
-# Copyright (c) 1991-2014 Unicode, Inc.
+# Copyright (c) 1991-2016 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
-* Unicode 7.0.0
+* Unicode 9.0.0
0041>0061
0042>0062
@@ -632,8 +632,22 @@
10CD>2D2D
10FC>10DC
115F..1160>
+13F8>13F0
+13F9>13F1
+13FA>13F2
+13FB>13F3
+13FC>13F4
+13FD>13F5
17B4..17B5>
180B..180E>
+1C80>0432
+1C81>0434
+1C82>043E
+1C83>0441
+1C84..1C85>0442
+1C86>044A
+1C87>0463
+1C88>A64B
1D2C>0061
1D2D>00E6
1D2E>0062
@@ -2382,14 +2396,99 @@ A7AA>0266
A7AB>025C
A7AC>0261
A7AD>026C
+A7AE>026A
A7B0>029E
A7B1>0287
+A7B2>029D
+A7B3>AB53
+A7B4>A7B5
+A7B6>A7B7
A7F8>0127
A7F9>0153
AB5C>A727
AB5D>AB37
AB5E>026B
AB5F>AB52
+AB70>13A0
+AB71>13A1
+AB72>13A2
+AB73>13A3
+AB74>13A4
+AB75>13A5
+AB76>13A6
+AB77>13A7
+AB78>13A8
+AB79>13A9
+AB7A>13AA
+AB7B>13AB
+AB7C>13AC
+AB7D>13AD
+AB7E>13AE
+AB7F>13AF
+AB80>13B0
+AB81>13B1
+AB82>13B2
+AB83>13B3
+AB84>13B4
+AB85>13B5
+AB86>13B6
+AB87>13B7
+AB88>13B8
+AB89>13B9
+AB8A>13BA
+AB8B>13BB
+AB8C>13BC
+AB8D>13BD
+AB8E>13BE
+AB8F>13BF
+AB90>13C0
+AB91>13C1
+AB92>13C2
+AB93>13C3
+AB94>13C4
+AB95>13C5
+AB96>13C6
+AB97>13C7
+AB98>13C8
+AB99>13C9
+AB9A>13CA
+AB9B>13CB
+AB9C>13CC
+AB9D>13CD
+AB9E>13CE
+AB9F>13CF
+ABA0>13D0
+ABA1>13D1
+ABA2>13D2
+ABA3>13D3
+ABA4>13D4
+ABA5>13D5
+ABA6>13D6
+ABA7>13D7
+ABA8>13D8
+ABA9>13D9
+ABAA>13DA
+ABAB>13DB
+ABAC>13DC
+ABAD>13DD
+ABAE>13DE
+ABAF>13DF
+ABB0>13E0
+ABB1>13E1
+ABB2>13E2
+ABB3>13E3
+ABB4>13E4
+ABB5>13E5
+ABB6>13E6
+ABB7>13E7
+ABB8>13E8
+ABB9>13E9
+ABBA>13EA
+ABBB>13EB
+ABBC>13EC
+ABBD>13ED
+ABBE>13EE
+ABBF>13EF
F900>8C48
F901>66F4
F902>8ECA
@@ -3766,6 +3865,93 @@ FFF0..FFF8>
10425>1044D
10426>1044E
10427>1044F
+104B0>104D8
+104B1>104D9
+104B2>104DA
+104B3>104DB
+104B4>104DC
+104B5>104DD
+104B6>104DE
+104B7>104DF
+104B8>104E0
+104B9>104E1
+104BA>104E2
+104BB>104E3
+104BC>104E4
+104BD>104E5
+104BE>104E6
+104BF>104E7
+104C0>104E8
+104C1>104E9
+104C2>104EA
+104C3>104EB
+104C4>104EC
+104C5>104ED
+104C6>104EE
+104C7>104EF
+104C8>104F0
+104C9>104F1
+104CA>104F2
+104CB>104F3
+104CC>104F4
+104CD>104F5
+104CE>104F6
+104CF>104F7
+104D0>104F8
+104D1>104F9
+104D2>104FA
+104D3>104FB
+10C80>10CC0
+10C81>10CC1
+10C82>10CC2
+10C83>10CC3
+10C84>10CC4
+10C85>10CC5
+10C86>10CC6
+10C87>10CC7
+10C88>10CC8
+10C89>10CC9
+10C8A>10CCA
+10C8B>10CCB
+10C8C>10CCC
+10C8D>10CCD
+10C8E>10CCE
+10C8F>10CCF
+10C90>10CD0
+10C91>10CD1
+10C92>10CD2
+10C93>10CD3
+10C94>10CD4
+10C95>10CD5
+10C96>10CD6
+10C97>10CD7
+10C98>10CD8
+10C99>10CD9
+10C9A>10CDA
+10C9B>10CDB
+10C9C>10CDC
+10C9D>10CDD
+10C9E>10CDE
+10C9F>10CDF
+10CA0>10CE0
+10CA1>10CE1
+10CA2>10CE2
+10CA3>10CE3
+10CA4>10CE4
+10CA5>10CE5
+10CA6>10CE6
+10CA7>10CE7
+10CA8>10CE8
+10CA9>10CE9
+10CAA>10CEA
+10CAB>10CEB
+10CAC>10CEC
+10CAD>10CED
+10CAE>10CEE
+10CAF>10CEF
+10CB0>10CF0
+10CB1>10CF1
+10CB2>10CF2
118A0>118C0
118A1>118C1
118A2>118C2
@@ -4803,6 +4989,40 @@ FFF0..FFF8>
1D7FD>0037
1D7FE>0038
1D7FF>0039
+1E900>1E922
+1E901>1E923
+1E902>1E924
+1E903>1E925
+1E904>1E926
+1E905>1E927
+1E906>1E928
+1E907>1E929
+1E908>1E92A
+1E909>1E92B
+1E90A>1E92C
+1E90B>1E92D
+1E90C>1E92E
+1E90D>1E92F
+1E90E>1E930
+1E90F>1E931
+1E910>1E932
+1E911>1E933
+1E912>1E934
+1E913>1E935
+1E914>1E936
+1E915>1E937
+1E916>1E938
+1E917>1E939
+1E918>1E93A
+1E919>1E93B
+1E91A>1E93C
+1E91B>1E93D
+1E91C>1E93E
+1E91D>1E93F
+1E91E>1E940
+1E91F>1E941
+1E920>1E942
+1E921>1E943
1EE00>0627
1EE01>0628
1EE02>062C
@@ -5067,6 +5287,7 @@ FFF0..FFF8>
1F238>7533
1F239>5272
1F23A>55B6
+1F23B>914D
1F240>3014 672C 3015
1F241>3014 4E09 3015
1F242>3014 4E8C 3015
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk
index 5b8479751d5..c94a023c2ce 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/Default.brk differ
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk
index 41b977b259a..c3357efa7ce 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/segmentation/MyanmarSyllable.brk differ
diff --git a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm
index 2680264f181..1a16f3eb182 100644
Binary files a/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm and b/lucene/analysis/icu/src/resources/org/apache/lucene/analysis/icu/utr30.nrm differ
diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java
index 96f44d686b0..75481f1924c 100644
--- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizerCJK.java
@@ -53,7 +53,14 @@ public class TestICUTokenizerCJK extends BaseTokenStreamTestCase {
new String[] { "我", "购买", "了", "道具", "和", "服装" }
);
}
-
+
+ public void testTraditionalChinese() throws Exception {
+ assertAnalyzesTo(a, "我購買了道具和服裝。",
+ new String[] { "我", "購買", "了", "道具", "和", "服裝"});
+ assertAnalyzesTo(a, "定義切分字串的基本單位是訂定分詞標準的首要工作", // From http://godel.iis.sinica.edu.tw/CKIP/paper/wordsegment_standard.pdf
+ new String[] { "定義", "切", "分", "字串", "的", "基本", "單位", "是", "訂定", "分詞", "標準", "的", "首要", "工作" });
+ }
+
public void testChineseNumerics() throws Exception {
assertAnalyzesTo(a, "9483", new String[] { "9483" });
assertAnalyzesTo(a, "院內分機9483。",
diff --git a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
index 035a3a086b2..0f2bffecfb0 100644
--- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
+++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateUTR30DataFiles.java
@@ -63,7 +63,7 @@ import java.util.regex.Pattern;
public class GenerateUTR30DataFiles {
private static final String ICU_SVN_TAG_URL
= "http://source.icu-project.org/repos/icu/icu/tags";
- private static final String ICU_RELEASE_TAG = "release-54-1";
+ private static final String ICU_RELEASE_TAG = "release-58-1";
private static final String ICU_DATA_NORM2_PATH = "source/data/unidata/norm2";
private static final String NFC_TXT = "nfc.txt";
private static final String NFKC_TXT = "nfkc.txt";
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
index 6955fe334fc..cd502fd8291 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/uk/UkrainianMorfologikAnalyzer.java
@@ -116,6 +116,8 @@ public final class UkrainianMorfologikAnalyzer extends StopwordAnalyzerBase {
// ignored characters
builder.add("\u0301", "");
builder.add("\u00AD", "");
+ builder.add("ґ", "г");
+ builder.add("Ґ", "Г");
NormalizeCharMap normMap = builder.build();
reader = new MappingCharFilter(normMap, reader);
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
index 15b247d5af7..e9a010212e6 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
@@ -52,10 +52,17 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
public void testCapsTokenStream() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
assertAnalyzesTo(a, "Цих Чайковського і Ґете.",
- new String[] { "Чайковське", "Чайковський", "Ґете" });
+ new String[] { "Чайковське", "Чайковський", "Гете" });
a.close();
}
+ public void testCharNormalization() throws Exception {
+ Analyzer a = new UkrainianMorfologikAnalyzer();
+ assertAnalyzesTo(a, "Ґюмрі та Гюмрі.",
+ new String[] { "Гюмрі", "Гюмрі" });
+ a.close();
+ }
+
public void testSampleSentence() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.",
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
index b4f50474f0a..a876b7de81f 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@@ -60,10 +60,6 @@ import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.legacy.LegacyIntField;
-import org.apache.lucene.legacy.LegacyLongField;
-import org.apache.lucene.legacy.LegacyNumericRangeQuery;
-import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
@@ -1114,9 +1110,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
- // add numeric fields, to test if flex preserves encoding
- doc.add(new LegacyIntField("trieInt", id, Field.Store.NO));
- doc.add(new LegacyLongField("trieLong", (long) id, Field.Store.NO));
// add docvalues fields
doc.add(new NumericDocValuesField("dvByte", (byte) id));
@@ -1294,51 +1287,6 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
}
}
- public void testNumericFields() throws Exception {
- for (String name : oldNames) {
-
- Directory dir = oldIndexDirs.get(name);
- IndexReader reader = DirectoryReader.open(dir);
- IndexSearcher searcher = newSearcher(reader);
-
- for (int id=10; id<15; id++) {
- ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.valueOf(id), Integer.valueOf(id), true, true), 100).scoreDocs;
- assertEquals("wrong number of hits", 1, hits.length);
- Document d = searcher.doc(hits[0].doc);
- assertEquals(String.valueOf(id), d.get("id"));
-
- hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.valueOf(id), Long.valueOf(id), true, true), 100).scoreDocs;
- assertEquals("wrong number of hits", 1, hits.length);
- d = searcher.doc(hits[0].doc);
- assertEquals(String.valueOf(id), d.get("id"));
- }
-
- // check that also lower-precision fields are ok
- ScoreDoc[] hits = searcher.search(LegacyNumericRangeQuery.newIntRange("trieInt", LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, Integer.MIN_VALUE, Integer.MAX_VALUE, false, false), 100).scoreDocs;
- assertEquals("wrong number of hits", 34, hits.length);
-
- hits = searcher.search(LegacyNumericRangeQuery.newLongRange("trieLong", LegacyNumericUtils.PRECISION_STEP_DEFAULT, Long.MIN_VALUE, Long.MAX_VALUE, false, false), 100).scoreDocs;
- assertEquals("wrong number of hits", 34, hits.length);
-
- // check decoding of terms
- Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "trieInt");
- TermsEnum termsEnum = LegacyNumericUtils.filterPrefixCodedInts(terms.iterator());
- while (termsEnum.next() != null) {
- int val = LegacyNumericUtils.prefixCodedToInt(termsEnum.term());
- assertTrue("value in id bounds", val >= 0 && val < 35);
- }
-
- terms = MultiFields.getTerms(searcher.getIndexReader(), "trieLong");
- termsEnum = LegacyNumericUtils.filterPrefixCodedLongs(terms.iterator());
- while (termsEnum.next() != null) {
- long val = LegacyNumericUtils.prefixCodedToLong(termsEnum.term());
- assertTrue("value in id bounds", val >= 0L && val < 35L);
- }
-
- reader.close();
- }
- }
-
private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException {
final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
if (VERBOSE) {
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 7e0e7c72f58..3318c214e86 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -29,7 +29,7 @@ com.fasterxml.jackson.core.version = 2.5.4
/com.googlecode.juniversalchardet/juniversalchardet = 1.0.3
/com.googlecode.mp4parser/isoparser = 1.1.18
/com.healthmarketscience.jackcess/jackcess = 2.1.3
-/com.ibm.icu/icu4j = 56.1
+/com.ibm.icu/icu4j = 59.1
/com.pff/java-libpst = 0.8.1
com.sun.jersey.version = 1.9
@@ -276,7 +276,7 @@ org.slf4j.version = 1.7.7
/org.tukaani/xz = 1.5
/rome/rome = 1.0
-ua.net.nlp.morfologik-ukrainian-search.version = 3.7.5
+ua.net.nlp.morfologik-ukrainian-search.version = 3.7.6
/ua.net.nlp/morfologik-ukrainian-search = ${ua.net.nlp.morfologik-ukrainian-search.version}
/xerces/xercesImpl = 2.9.1
diff --git a/lucene/licenses/icu4j-56.1.jar.sha1 b/lucene/licenses/icu4j-56.1.jar.sha1
deleted file mode 100644
index 5f8e0466fde..00000000000
--- a/lucene/licenses/icu4j-56.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-8dd6671f52165a0419e6de5e1016400875a90fa9
diff --git a/lucene/licenses/icu4j-59.1.jar.sha1 b/lucene/licenses/icu4j-59.1.jar.sha1
new file mode 100644
index 00000000000..f3f0018f053
--- /dev/null
+++ b/lucene/licenses/icu4j-59.1.jar.sha1
@@ -0,0 +1 @@
+6f06e820cf4c8968bbbaae66ae0b33f6a256b57f
diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1
deleted file mode 100644
index 8794e71fbe9..00000000000
--- a/lucene/licenses/morfologik-ukrainian-search-3.7.5.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-2b8c8fbd740164d220ca7d18605b8b2092e163e9
diff --git a/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1 b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1
new file mode 100644
index 00000000000..6f0b86c8290
--- /dev/null
+++ b/lucene/licenses/morfologik-ukrainian-search-3.7.6.jar.sha1
@@ -0,0 +1 @@
+8d2c4bf006f59227bcba8885b4602b3a8b5bd799
diff --git a/lucene/spatial-extras/build.xml b/lucene/spatial-extras/build.xml
index 2e425fda202..e9cc29cc224 100644
--- a/lucene/spatial-extras/build.xml
+++ b/lucene/spatial-extras/build.xml
@@ -31,9 +31,7 @@
-
-
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java
index 90e36d835db..7536b60bf6e 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/bbox/BBoxStrategy.java
@@ -25,11 +25,6 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
-import org.apache.lucene.legacy.LegacyDoubleField;
-import org.apache.lucene.legacy.LegacyFieldType;
-import org.apache.lucene.legacy.LegacyNumericRangeQuery;
-import org.apache.lucene.legacy.LegacyNumericType;
-import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -41,8 +36,6 @@ import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.NumericUtils;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.shape.Point;
import org.locationtech.spatial4j.shape.Rectangle;
@@ -88,8 +81,6 @@ public class BBoxStrategy extends SpatialStrategy {
*/
public static FieldType DEFAULT_FIELDTYPE;
- @Deprecated
- public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
@@ -98,15 +89,6 @@ public class BBoxStrategy extends SpatialStrategy {
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
- // Legacy default: legacyNumerics + docValues
- LegacyFieldType legacyType = new LegacyFieldType();
- legacyType.setIndexOptions(IndexOptions.DOCS);
- legacyType.setNumericType(LegacyNumericType.DOUBLE);
- legacyType.setNumericPrecisionStep(8);// same as solr default
- legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
- legacyType.setStored(false);
- legacyType.freeze();
- LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_MINX = "__minX";
@@ -131,8 +113,6 @@ public class BBoxStrategy extends SpatialStrategy {
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
- // equiv to "hasLegacyNumerics":
- private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
private final FieldType xdlFieldType;
/**
@@ -142,15 +122,6 @@ public class BBoxStrategy extends SpatialStrategy {
return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
- /**
- * Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
- * @deprecated LegacyNumerics will be removed
- */
- @Deprecated
- public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
- return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
- }
-
/**
* Creates this strategy.
* {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
@@ -179,23 +150,8 @@ public class BBoxStrategy extends SpatialStrategy {
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numQuads++;
}
- if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
- if (hasPointVals) {
- throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
- }
- final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
- if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
- throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
- }
- numQuads++;
- legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
- legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
- legacyNumericFieldType.freeze();
- } else {
- legacyNumericFieldType = null;
- }
- if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
+ if (hasPointVals) { // if we have an index...
xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
xdlFieldType.setIndexOptions(IndexOptions.DOCS);
xdlFieldType.freeze();
@@ -242,12 +198,6 @@ public class BBoxStrategy extends SpatialStrategy {
fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
}
- if (legacyNumericFieldType != null) {
- fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
- fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
- fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
- fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
- }
if (xdlFieldType != null) {
fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
}
@@ -664,17 +614,12 @@ public class BBoxStrategy extends SpatialStrategy {
private Query makeNumberTermQuery(String field, double number) {
if (hasPointVals) {
return DoublePoint.newExactQuery(field, number);
- } else if (legacyNumericFieldType != null) {
- BytesRefBuilder bytes = new BytesRefBuilder();
- LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
- return new TermQuery(new Term(field, bytes.get()));
}
throw new UnsupportedOperationException("An index is required for this operation.");
}
/**
* Returns a numeric range query based on FieldType
- * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*
* @param fieldname field name. must not be null.
@@ -702,8 +647,6 @@ public class BBoxStrategy extends SpatialStrategy {
}
return DoublePoint.newRangeQuery(fieldname, min, max);
- } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
- return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
}
throw new UnsupportedOperationException("An index is required for this operation.");
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java
index 757e2bd38f7..ca38abf2400 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/BytesRefIteratorTokenStream.java
@@ -26,8 +26,6 @@ import org.apache.lucene.util.BytesRefIterator;
/**
* A TokenStream used internally by {@link org.apache.lucene.spatial.prefix.PrefixTreeStrategy}.
*
- * This is modelled after {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
- *
* @lucene.internal
*/
class BytesRefIteratorTokenStream extends TokenStream {
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
index c727c0da075..8367644e889 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/NumberRangePrefixTreeStrategy.java
@@ -18,18 +18,17 @@ package org.apache.lucene.spatial.prefix;
import java.io.IOException;
import java.util.Arrays;
-import java.util.Iterator;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
-import org.locationtech.spatial4j.shape.Point;
-import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree;
import org.apache.lucene.util.Bits;
+import org.locationtech.spatial4j.shape.Point;
+import org.locationtech.spatial4j.shape.Shape;
import static org.apache.lucene.spatial.prefix.tree.NumberRangePrefixTree.UnitNRShape;
@@ -57,9 +56,22 @@ public class NumberRangePrefixTreeStrategy extends RecursivePrefixTreeStrategy {
}
@Override
- protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) {
- //levels doesn't actually matter; NumberRange based Shapes have their own "level".
- return super.createCellIteratorToIndex(shape, grid.getMaxLevels(), reuse);
+ protected boolean isPointShape(Shape shape) {
+ if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
+ return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() == grid.getMaxLevels();
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ protected boolean isGridAlignedShape(Shape shape) {
+ // any UnitNRShape other than the world is a single cell/term
+ if (shape instanceof NumberRangePrefixTree.UnitNRShape) {
+ return ((NumberRangePrefixTree.UnitNRShape)shape).getLevel() > 0;
+ } else {
+ return false;
+ }
}
/** Unsupported. */
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
index e9f43fd43bd..43851c747b7 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/PrefixTreeStrategy.java
@@ -21,8 +21,6 @@ import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
-import org.locationtech.spatial4j.shape.Point;
-import org.locationtech.spatial4j.shape.Shape;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
@@ -34,6 +32,10 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.util.ShapeFieldCacheDistanceValueSource;
import org.apache.lucene.util.Bits;
+import org.locationtech.spatial4j.shape.Circle;
+import org.locationtech.spatial4j.shape.Point;
+import org.locationtech.spatial4j.shape.Rectangle;
+import org.locationtech.spatial4j.shape.Shape;
/**
* An abstract SpatialStrategy based on {@link SpatialPrefixTree}. The two
@@ -163,7 +165,7 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
}
protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) {
- if (pointsOnly && !(shape instanceof Point)) {
+ if (pointsOnly && !isPointShape(shape)) {
throw new IllegalArgumentException("pointsOnly is true yet a " + shape.getClass() + " is given for indexing");
}
return grid.getTreeCellIterator(shape, detailLevel);//TODO should take a re-use iterator
@@ -205,4 +207,16 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
Shape inputShape, final int facetLevel, int maxCells) throws IOException {
return HeatmapFacetCounter.calcFacets(this, context, topAcceptDocs, inputShape, facetLevel, maxCells);
}
+
+ protected boolean isPointShape(Shape shape) {
+ if (shape instanceof Point) {
+ return true;
+ } else if (shape instanceof Circle) {
+ return ((Circle) shape).getRadius() == 0.0;
+ } else if (shape instanceof Rectangle) {
+ Rectangle rect = (Rectangle) shape;
+ return rect.getWidth() == 0.0 && rect.getHeight() == 0.0;
+ }
+ return false;
+ }
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
index d3d16263b9c..7c792006b0c 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/RecursivePrefixTreeStrategy.java
@@ -20,9 +20,9 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import org.locationtech.spatial4j.shape.Point;
-import org.locationtech.spatial4j.shape.Shape;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.LegacyCell;
@@ -30,6 +30,7 @@ import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.query.SpatialArgs;
import org.apache.lucene.spatial.query.SpatialOperation;
import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
+import org.locationtech.spatial4j.shape.Shape;
/**
* A {@link PrefixTreeStrategy} which uses {@link AbstractVisitingPrefixTreeQuery}.
@@ -121,7 +122,7 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
@Override
protected Iterator createCellIteratorToIndex(Shape shape, int detailLevel, Iterator reuse) {
- if (shape instanceof Point || !pruneLeafyBranches)
+ if (!pruneLeafyBranches || isGridAlignedShape(shape))
return super.createCellIteratorToIndex(shape, detailLevel, reuse);
List cells = new ArrayList<>(4096);
@@ -177,6 +178,9 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
int detailLevel = grid.getLevelForDistance(args.resolveDistErr(ctx, distErrPct));
if (op == SpatialOperation.Intersects) {
+ if (isGridAlignedShape(args.getShape())) {
+ return makeGridShapeIntersectsQuery(args.getShape());
+ }
return new IntersectsPrefixTreeQuery(
shape, getFieldName(), grid, detailLevel, prefixGridScanLevel);
} else if (op == SpatialOperation.IsWithin) {
@@ -189,4 +193,35 @@ public class RecursivePrefixTreeStrategy extends PrefixTreeStrategy {
}
throw new UnsupportedSpatialOperation(op);
}
+
+ /**
+ * A quick check of the shape to see if it is perfectly aligned to a grid.
+ * Points always are as they are indivisible. It's okay to return false
+ * if the shape actually is aligned; this is an optimization hint.
+ */
+ protected boolean isGridAlignedShape(Shape shape) {
+ return isPointShape(shape);
+ }
+
+ /** {@link #makeQuery(SpatialArgs)} specialized for the query being a grid square. */
+ protected Query makeGridShapeIntersectsQuery(Shape gridShape) {
+ assert isGridAlignedShape(gridShape);
+ if (isPointsOnly()) {
+ // Awesome; this will be equivalent to a TermQuery.
+ Iterator cellIterator = grid.getTreeCellIterator(gridShape, grid.getMaxLevels());
+ // get last cell
+ Cell cell = cellIterator.next();
+ while (cellIterator.hasNext()) {
+ int prevLevel = cell.getLevel();
+ cell = cellIterator.next();
+ assert prevLevel < cell.getLevel();
+ }
+ return new TermQuery(new Term(getFieldName(), cell.getTokenBytesWithLeaf(null)));
+ } else {
+ // Well there could be parent cells. But we can reduce the "scan level" which will be slower for a point query.
+ // TODO: AVPTQ will still scan the bottom nonetheless; file an issue to eliminate that
+ return new IntersectsPrefixTreeQuery(
+ gridShape, getFieldName(), grid, getGrid().getMaxLevels(), getGrid().getMaxLevels() + 1);
+ }
+ }
}
diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java
index 59aff490916..ef3eaa4b24e 100644
--- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java
+++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java
@@ -22,11 +22,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.legacy.LegacyDoubleField;
-import org.apache.lucene.legacy.LegacyFieldType;
-import org.apache.lucene.legacy.LegacyNumericRangeQuery;
-import org.apache.lucene.legacy.LegacyNumericType;
import org.apache.lucene.queries.function.FunctionRangeQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.BooleanClause;
@@ -86,8 +81,6 @@ public class PointVectorStrategy extends SpatialStrategy {
*/
public static FieldType DEFAULT_FIELDTYPE;
- @Deprecated
- public static LegacyFieldType LEGACY_FIELDTYPE;
static {
// Default: pointValues + docValues
FieldType type = new FieldType();
@@ -96,15 +89,6 @@ public class PointVectorStrategy extends SpatialStrategy {
type.setStored(false);
type.freeze();
DEFAULT_FIELDTYPE = type;
- // Legacy default: legacyNumerics
- LegacyFieldType legacyType = new LegacyFieldType();
- legacyType.setIndexOptions(IndexOptions.DOCS);
- legacyType.setNumericType(LegacyNumericType.DOUBLE);
- legacyType.setNumericPrecisionStep(8);// same as solr default
- legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
- legacyType.setStored(false);
- legacyType.freeze();
- LEGACY_FIELDTYPE = legacyType;
}
public static final String SUFFIX_X = "__x";
@@ -117,8 +101,6 @@ public class PointVectorStrategy extends SpatialStrategy {
private final boolean hasStored;
private final boolean hasDocVals;
private final boolean hasPointVals;
- // equiv to "hasLegacyNumerics":
- private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
/**
* Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
@@ -127,18 +109,6 @@ public class PointVectorStrategy extends SpatialStrategy {
return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
}
- /**
- * Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
- * However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
- * since that requires docValues (the legacy config didn't have that).
- *
- * @deprecated LegacyNumerics will be removed
- */
- @Deprecated
- public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
- return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
- }
-
/**
* Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
* a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
@@ -159,21 +129,6 @@ public class PointVectorStrategy extends SpatialStrategy {
if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
numPairs++;
}
- if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
- if (hasPointVals) {
- throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
- }
- final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
- if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
- throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
- }
- numPairs++;
- legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
- legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
- legacyNumericFieldType.freeze();
- } else {
- legacyNumericFieldType = null;
- }
this.fieldsLen = numPairs * 2;
}
@@ -209,10 +164,6 @@ public class PointVectorStrategy extends SpatialStrategy {
fields[++idx] = new DoublePoint(fieldNameX, point.getX());
fields[++idx] = new DoublePoint(fieldNameY, point.getY());
}
- if (legacyNumericFieldType != null) {
- fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
- fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
- }
assert idx == fields.length - 1;
return fields;
}
@@ -268,7 +219,6 @@ public class PointVectorStrategy extends SpatialStrategy {
/**
* Returns a numeric range query based on FieldType
- * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
* {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
*/
private Query rangeQuery(String fieldName, Double min, Double max) {
@@ -283,8 +233,6 @@ public class PointVectorStrategy extends SpatialStrategy {
return DoublePoint.newRangeQuery(fieldName, min, max);
- } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
- return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
}
//TODO try doc-value range query?
throw new UnsupportedOperationException("An index is required for this operation.");
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java
index d54e1c970a0..536436b9897 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/DistanceStrategyTest.java
@@ -68,9 +68,6 @@ public class DistanceStrategyTest extends StrategyTestCase {
strategy = BBoxStrategy.newInstance(ctx, "bbox");
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
- strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy");
- ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
-
strategy = new SerializedDVStrategy(ctx, "serialized");
ctorArgs.add(new Object[]{strategy.getFieldName(), strategy});
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java
index c14fe546f97..f52ef2b444d 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/QueryEqualsHashCodeTest.java
@@ -58,9 +58,7 @@ public class QueryEqualsHashCodeTest extends LuceneTestCase {
strategies.add(recursive_geohash);
strategies.add(new TermQueryPrefixTreeStrategy(gridQuad, "termquery_quad"));
strategies.add(PointVectorStrategy.newInstance(ctx, "pointvector"));
- strategies.add(PointVectorStrategy.newLegacyInstance(ctx, "pointvector_legacy"));
strategies.add(BBoxStrategy.newInstance(ctx, "bbox"));
- strategies.add(BBoxStrategy.newLegacyInstance(ctx, "bbox_legacy"));
final SerializedDVStrategy serialized = new SerializedDVStrategy(ctx, "serialized");
strategies.add(serialized);
strategies.add(new CompositeSpatialStrategy("composite", recursive_geohash, serialized));
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java
index 20df7305cbe..210ab386fab 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/bbox/TestBBoxStrategy.java
@@ -21,8 +21,6 @@ import java.io.IOException;
import com.carrotsearch.randomizedtesting.annotations.Repeat;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DocValuesType;
-import org.apache.lucene.index.IndexOptions;
-import org.apache.lucene.legacy.LegacyFieldType;
import org.apache.lucene.search.Query;
import org.apache.lucene.spatial.SpatialMatchConcern;
import org.apache.lucene.spatial.prefix.RandomSpatialOpStrategyTestCase;
@@ -93,20 +91,10 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
factory.worldBounds = new RectangleImpl(-300, 300, -100, 100, null);
this.ctx = factory.newSpatialContext();
}
- // randomly test legacy (numeric) and point based bbox strategy
- if (random().nextBoolean()) {
- this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
- } else {
- this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
- }
+ this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
//test we can disable docValues for predicate tests
if (random().nextBoolean()) {
- FieldType fieldType = ((BBoxStrategy)strategy).getFieldType();
- if (fieldType instanceof LegacyFieldType) {
- fieldType = new LegacyFieldType((LegacyFieldType)fieldType);
- } else {
- fieldType = new FieldType(fieldType);
- }
+ FieldType fieldType = new FieldType(((BBoxStrategy)strategy).getFieldType());
fieldType.setDocValuesType(DocValuesType.NONE);
strategy = new BBoxStrategy(ctx, strategy.getFieldName(), fieldType);
}
@@ -194,11 +182,7 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
private void setupGeo() {
this.ctx = SpatialContext.GEO;
- if (random().nextBoolean()) {
- this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
- } else {
- this.strategy = BBoxStrategy.newLegacyInstance(ctx, "bbox");
- }
+ this.strategy = BBoxStrategy.newInstance(ctx, "bbox");
}
// OLD STATIC TESTS (worthless?)
@@ -239,16 +223,9 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
FieldType fieldType;
// random legacy or not legacy
String FIELD_PREFIX = "bbox";
+ fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
if (random().nextBoolean()) {
- fieldType = new FieldType(BBoxStrategy.DEFAULT_FIELDTYPE);
- if (random().nextBoolean()) {
- fieldType.setDimensions(0, 0);
- }
- } else {
- fieldType = new FieldType(BBoxStrategy.LEGACY_FIELDTYPE);
- if (random().nextBoolean()) {
- fieldType.setIndexOptions(IndexOptions.NONE);
- }
+ fieldType.setDimensions(0, 0);
}
strategy = new BBoxStrategy(ctx, FIELD_PREFIX, fieldType);
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
index 77c25298b67..54296da040c 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
@@ -27,7 +27,7 @@ import org.junit.Before;
import org.junit.Test;
import org.locationtech.spatial4j.shape.Shape;
-import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
+import static com.carrotsearch.randomizedtesting.RandomizedTest.randomInt;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@@ -42,17 +42,8 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
public void setUp() throws Exception {
super.setUp();
tree = DateRangePrefixTree.INSTANCE;
- if (randomBoolean()) {
- strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
- } else {
- //Test the format that existed <= Lucene 5.0
- strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange") {
- @Override
- protected CellToBytesRefIterator newCellToBytesRefIterator() {
- return new CellToBytesRefIterator50();
- }
- };
- }
+ strategy = new NumberRangePrefixTreeStrategy(tree, "dateRange");
+ ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(randomInt() % 5 == 0);
Calendar tmpCal = tree.newCal();
int randomCalWindowField = randomIntBetween(Calendar.YEAR, Calendar.MILLISECOND);
tmpCal.add(randomCalWindowField, 2_000);
@@ -79,15 +70,16 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Test
public void testWithinSame() throws IOException {
- final Calendar cal = tree.newCal();
+ Shape shape = randomIndexedShape();
testOperation(
- tree.toShape(cal),
+ shape,
SpatialOperation.IsWithin,
- tree.toShape(cal), true);//is within itself
+ shape, true);//is within itself
}
@Test
public void testWorld() throws IOException {
+ ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
testOperation(
tree.toShape(tree.newCal()),//world matches everything
SpatialOperation.Contains,
@@ -96,6 +88,7 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Test
public void testBugInitIterOptimization() throws Exception {
+ ((NumberRangePrefixTreeStrategy)strategy).setPointsOnly(false);
//bug due to fast path initIter() optimization
testOperation(
tree.parseShape("[2014-03-27T23 TO 2014-04-01T01]"),
@@ -114,6 +107,21 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@Override
protected Shape randomIndexedShape() {
+ if (((NumberRangePrefixTreeStrategy)strategy).isPointsOnly()) {
+ Calendar cal = tree.newCal();
+ cal.setTimeInMillis(random().nextLong());
+ return tree.toShape(cal);
+ } else {
+ return randomShape();
+ }
+ }
+
+ @Override
+ protected Shape randomQueryShape() {
+ return randomShape();
+ }
+
+ private Shape randomShape() {
Calendar cal1 = randomCalendar();
UnitNRShape s1 = tree.toShape(cal1);
if (rarely()) {
@@ -144,9 +152,4 @@ public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
}
return cal;
}
-
- @Override
- protected Shape randomQueryShape() {
- return randomIndexedShape();
- }
}
diff --git a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java
index ac5ab953455..901594ef749 100644
--- a/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java
+++ b/lucene/spatial-extras/src/test/org/apache/lucene/spatial/vector/TestPointVectorStrategy.java
@@ -63,12 +63,7 @@ public class TestPointVectorStrategy extends StrategyTestCase {
@Test
public void testCitiesIntersectsBBox() throws IOException {
// note: does not require docValues
- if (random().nextBoolean()) {
- this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
- } else {
- // switch to legacy instance sometimes, which has no docValues
- this.strategy = PointVectorStrategy.newLegacyInstance(ctx, getClass().getSimpleName());
- }
+ this.strategy = PointVectorStrategy.newInstance(ctx, getClass().getSimpleName());
getAddAndVerifyIndexedDocuments(DATA_WORLD_CITIES_POINTS);
executeQueries(SpatialMatchConcern.FILTER, QTEST_Cities_Intersects_BBox);
}
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
index 3e222bc2d52..166d80dec12 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
@@ -60,7 +60,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
*
*
wordset - This is the default format, which supports one word per
* line (including any intra-word whitespace) and allows whole line comments
- * begining with the "#" character. Blank lines are ignored. See
+ * beginning with the "#" character. Blank lines are ignored. See
* {@link WordlistLoader#getLines WordlistLoader.getLines} for details.
*
*
snowball - This format allows for multiple words specified on each
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5b92e3c4996..f57cac583d7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -194,6 +194,13 @@ Other Changes
* SOLR-10700: Deprecated and converted the PostingsSolrHighlighter to extend UnifiedSolrHighlighter and thus no
longer use the PostingsHighlighter. It should behave mostly the same. (David Smiley)
+* SOLR-10710: Fix LTR failing tests. (Diego Ceccarelli via Tomás Fernández Löbbe)
+
+* SOLR-10755: delete/refactor many solrj deprecations (hossman)
+
+* SOLR-10752: replicationFactor (nrtReplicas) default is 0 if tlogReplicas is specified when creating a collection
+ (Tomás Fernández Löbbe)
+
================== 6.7.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
@@ -217,6 +224,8 @@ Upgrade Notes
passwords via the env variables SOLR_SSL_KEY_STORE_PASSWORD and SOLR_SSL_TRUST_STORE_PASSWORD rather
than system properties.
+* SOLR-10379: ManagedSynonymFilterFactory has been deprecated in favor of ManagedSynonymGraphFilterFactory.
+
New Features
----------------------
@@ -224,12 +233,20 @@ New Features
* SOLR-10721: Provide a way to know when Core Discovery is finished and when all async cores are done loading
(Erick Erickson)
+
+* SOLR-10379: Add ManagedSynonymGraphFilterFactory, deprecate ManagedSynonymFilterFactory. (Steve Rowe)
+
+* SOLR-10479: Adds support for HttpShardHandlerFactory.loadBalancerRequests(MinimumAbsolute|MaximumFraction)
+ configuration. (Ramsey Haddad, Daniel Collins, Christine Poerschke)
Bug Fixes
----------------------
* SOLR-10723 JSON Facet API: resize() implemented incorrectly for CountSlotAcc, HllAgg.NumericAcc
resulting in exceptions when using a hashing faceting method and sorting by hll(numeric_field).
(yonik)
+
+* SOLR-10719: Creating a core.properties fails if the parent of core.properties is a symlinked dierctory
+ (Erick Erickson)
Optimizations
----------------------
@@ -238,7 +255,6 @@ Optimizations
so that the second phase which would normally involve calculating the domain for the bucket
can be skipped entirely, leading to large performance improvements. (yonik)
-
Other Changes
----------------------
@@ -250,6 +266,15 @@ Other Changes
* SOLR-10438: Assign explicit useDocValuesAsStored values to all points field types in
schema-point.xml/TestPointFields. (hossman, Steve Rowe)
+
+* LUCENE-7705: Allow CharTokenizer-derived tokenizers and KeywordTokenizer to configure the max token length.
+ (Amrit Sarkar via Erick Erickson)
+
+* SOLR-10659: Remove ResponseBuilder.getSortSpec use in SearchGroupShardResponseProcessor.
+ (Judith Silverman via Christine Poerschke)
+
+* SOLR-10741: Factor out createSliceShardsStr method from HttpShardHandler.prepDistributed.
+ (Domenico Fabio Marino via Christine Poerschke)
================== 6.6.0 ==================
@@ -458,6 +483,14 @@ Bug Fixes
"lucene"/standard query parser, should require " TO " in range queries,
and accept "TO" as endpoints in range queries. (hossman, Steve Rowe)
+* SOLR-10735: Windows script (solr.cmd) didn't work properly with directory containing spaces. Adding quotations
+ to fix (Uwe Schindler, janhoy, Tomas Fernandez-Lobbe, Ishan Chattopadhyaya)
+
+Ref Guide
+----------------------
+
+* SOLR-10758: Modernize the Solr ref guide's Chinese language analysis coverage. (Steve Rowe)
+
Other Changes
----------------------
diff --git a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java
index aadb9e2d4ce..dd64c3ff96a 100644
--- a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java
+++ b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/AnalyticsParsers.java
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.time.Instant;
import java.util.Arrays;
-import org.apache.lucene.legacy.LegacyNumericUtils;
+import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.FieldType;
diff --git a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java
index d13795d8e29..803d8e0eacf 100644
--- a/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java
+++ b/solr/contrib/analytics/src/java/org/apache/solr/analytics/util/valuesource/DateFieldSource.java
@@ -24,7 +24,7 @@ import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
-import org.apache.lucene.legacy.LegacyNumericUtils;
+import org.apache.solr.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.LongDocValues;
import org.apache.lucene.queries.function.valuesource.LongFieldSource;
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java
index 4c17affe5bc..00159b927a4 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldLengthFeature.java
@@ -76,8 +76,7 @@ public class FieldLengthFeature extends Feature {
static {
NORM_TABLE[0] = 0;
for (int i = 1; i < 256; i++) {
- float norm = SmallFloat.byte315ToFloat((byte) i);
- NORM_TABLE[i] = 1.0f / (norm * norm);
+ NORM_TABLE[i] = SmallFloat.byte4ToInt((byte) i);
}
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
index d4457a0a7a2..decb1c0888b 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestLTRQParserPlugin.java
@@ -88,15 +88,16 @@ public class TestLTRQParserPlugin extends TestRerankBase {
query.add("rows", "4");
query.add("fv", "true");
- String nonRerankedScore = "0.09271725";
+ // FIXME: design better way to test this, we cannot check an absolute score
+ // String nonRerankedScore = "0.09271725";
// Normal solr order
assertJQ("/query" + query.toQueryString(),
"/response/docs/[0]/id=='9'",
"/response/docs/[1]/id=='8'",
"/response/docs/[2]/id=='7'",
- "/response/docs/[3]/id=='6'",
- "/response/docs/[3]/score=="+nonRerankedScore
+ "/response/docs/[3]/id=='6'"
+ // "/response/docs/[3]/score=="+nonRerankedScore
);
query.add("rq", "{!ltr model=6029760550880411648 reRankDocs=3}");
@@ -106,8 +107,8 @@ public class TestLTRQParserPlugin extends TestRerankBase {
"/response/docs/[0]/id=='7'",
"/response/docs/[1]/id=='8'",
"/response/docs/[2]/id=='9'",
- "/response/docs/[3]/id=='6'",
- "/response/docs/[3]/score=="+nonRerankedScore
+ "/response/docs/[3]/id=='6'"
+ // "/response/docs/[3]/score=="+nonRerankedScore
);
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java
index 630a68cf87f..46330c9f26d 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestParallelWeightCreation.java
@@ -42,8 +42,9 @@ public class TestParallelWeightCreation extends TestRerankBase{
query.add("rows", "4");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
+ // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
aftertest();
}
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
index 7bf8373a56a..cbd0e2389da 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/TestSelectiveWeightCreation.java
@@ -210,14 +210,14 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
@Test
public void testSelectiveWeightsRequestFeaturesFromDifferentStore() throws Exception {
- final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
- "matchedTitle","1.0", "titlePhraseMatch","0.6103343");
- final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
- "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
- final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
- "popularity","3.0", "originalScore","1.0");
-
- final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
+// final String docs0fv_sparse = FeatureLoggerTestUtils.toFeatureVector(
+// "matchedTitle","1.0", "titlePhraseMatch","0.6103343");
+// final String docs0fv_dense = FeatureLoggerTestUtils.toFeatureVector(
+// "matchedTitle","1.0", "titlePhraseMatch","0.6103343", "titlePhrasesMatch","0.0");
+// final String docs0fv_fstore4= FeatureLoggerTestUtils.toFeatureVector(
+// "popularity","3.0", "originalScore","1.0");
+//
+// final String docs0fv = chooseDefaultFeatureVector(docs0fv_dense, docs0fv_sparse);
// extract all features in externalmodel's store (default store)
// rerank using externalmodel (default store)
@@ -227,11 +227,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
query.add("rows", "5");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3 efi.userTitlePhrase1=w2 efi.userTitlePhrase2=w1}");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
+ // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
+ // FIXME design better way to test this, we can't rely on absolute scores
+ // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv+"'");
// extract all features from fstore4
// rerank using externalmodel (default store)
@@ -240,11 +241,12 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
query.add("fl", "*,score,fv:[fv store=fstore4 efi.myPop=3]");
query.add("rq", "{!ltr reRankDocs=10 model=externalmodel efi.user_query=w3}");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='3'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==0.33873552");
+ // SOLR-10710, feature based on query with term w3 now scores higher on doc 4, updated
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='4'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='3'");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='1'");
+ // FIXME design better way to test this, we can't rely on absolute scores
+ // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
// extract all features from fstore4
// rerank using externalmodel2 (fstore2)
@@ -255,9 +257,9 @@ public class TestSelectiveWeightCreation extends TestRerankBase {
assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='5'");
assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='4'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
- assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/score==2.5");
+ assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='3'");
+ // FIXME design better way to test this, we can't rely on absolute scores
+ // assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/fv=='"+docs0fv_fstore4+"'");
}
}
diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
index 3d1a54e45e3..02bb0180a28 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
@@ -95,9 +95,9 @@ public class CreateCollectionCmd implements Cmd {
// look at the replication factor and see if it matches reality
// if it does not, find best nodes to create more cores
- int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, 1));
- int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
int numTlogReplicas = message.getInt(TLOG_REPLICAS, 0);
+ int numNrtReplicas = message.getInt(NRT_REPLICAS, message.getInt(REPLICATION_FACTOR, numTlogReplicas>0?0:1));
+ int numPullReplicas = message.getInt(PULL_REPLICAS, 0);
ShardHandler shardHandler = ocmh.shardHandlerFactory.getShardHandler();
final String async = message.getStr(ASYNC);
diff --git a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java
index e942c9b90ee..99c101bfac0 100644
--- a/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java
+++ b/solr/core/src/java/org/apache/solr/core/CorePropertiesLocator.java
@@ -39,6 +39,7 @@ import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import org.apache.solr.common.SolrException;
+import org.apache.solr.util.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -85,13 +86,15 @@ public class CorePropertiesLocator implements CoresLocator {
private void writePropertiesFile(CoreDescriptor cd, Path propfile) {
Properties p = buildCoreProperties(cd);
try {
- Files.createDirectories(propfile.getParent());
+ FileUtils.createDirectories(propfile.getParent()); // Handling for symlinks.
try (Writer os = new OutputStreamWriter(Files.newOutputStream(propfile), StandardCharsets.UTF_8)) {
p.store(os, "Written by CorePropertiesLocator");
}
}
catch (IOException e) {
logger.error("Couldn't persist core properties to {}: {}", propfile, e.getMessage());
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Couldn't persist core properties to " + propfile.toAbsolutePath().toString() + " : " + e.getMessage());
}
}
diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
index c045f206508..f79f86ee05e 100644
--- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
@@ -45,6 +45,9 @@ import org.apache.solr.client.solrj.io.stream.expr.Explanation;
import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
import org.apache.solr.client.solrj.io.stream.expr.Expressible;
import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionParser;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import org.apache.solr.client.solrj.io.stream.metrics.CountMetric;
import org.apache.solr.client.solrj.io.stream.metrics.MaxMetric;
@@ -185,6 +188,12 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("percentile", PercentileEvaluator.class)
.withFunctionName("empiricalDistribution", EmpiricalDistributionEvaluator.class)
.withFunctionName("describe", DescribeEvaluator.class)
+ .withFunctionName("finddelay", FindDelayEvaluator.class)
+ .withFunctionName("sequence", SequenceEvaluator.class)
+ .withFunctionName("array", ArrayEvaluator.class)
+ .withFunctionName("hist", HistogramEvaluator.class)
+ .withFunctionName("anova", AnovaEvaluator.class)
+ .withFunctionName("movingAvg", MovingAverageEvaluator.class)
// metrics
.withFunctionName("min", MinMetric.class)
@@ -296,7 +305,14 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
TupleStream tupleStream;
try {
- tupleStream = this.streamFactory.constructStream(params.get("expr"));
+ StreamExpression streamExpression = StreamExpressionParser.parse(params.get("expr"));
+ if(this.streamFactory.isEvaluator(streamExpression)) {
+ StreamExpression tupleExpression = new StreamExpression("tuple");
+ tupleExpression.addParameter(new StreamExpressionNamedParameter("return-value", streamExpression));
+ tupleStream = this.streamFactory.constructStream(tupleExpression);
+ } else {
+ tupleStream = this.streamFactory.constructStream(streamExpression);
+ }
} catch (Exception e) {
//Catch exceptions that occur while the stream is being created. This will include streaming expression parse rules.
SolrException.log(logger, e);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
index 4ec3b7924f4..bc620b61418 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
@@ -449,17 +449,7 @@ public class HttpShardHandler extends ShardHandler {
}
}
// And now recreate the | delimited list of equivalent servers
- final StringBuilder sliceShardsStr = new StringBuilder();
- boolean first = true;
- for (String shardUrl : shardUrls) {
- if (first) {
- first = false;
- } else {
- sliceShardsStr.append('|');
- }
- sliceShardsStr.append(shardUrl);
- }
- rb.shards[i] = sliceShardsStr.toString();
+ rb.shards[i] = createSliceShardsStr(shardUrls);
}
}
String shards_rows = params.get(ShardParams.SHARDS_ROWS);
@@ -472,6 +462,20 @@ public class HttpShardHandler extends ShardHandler {
}
}
+ private static String createSliceShardsStr(final List shardUrls) {
+ final StringBuilder sliceShardsStr = new StringBuilder();
+ boolean first = true;
+ for (String shardUrl : shardUrls) {
+ if (first) {
+ first = false;
+ } else {
+ sliceShardsStr.append('|');
+ }
+ sliceShardsStr.append(shardUrl);
+ }
+ return sliceShardsStr.toString();
+ }
+
private void addSlices(Map target, ClusterState state, SolrParams params, String collectionName, String shardKeys, boolean multiCollection) {
DocCollection coll = state.getCollection(collectionName);
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
index e3787cdf1ca..73d97078a09 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
@@ -97,6 +97,8 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
int maximumPoolSize = Integer.MAX_VALUE;
int keepAliveTime = 5;
int queueSize = -1;
+ int permittedLoadBalancerRequestsMinimumAbsolute = 0;
+ float permittedLoadBalancerRequestsMaximumFraction = 1.0f;
boolean accessPolicy = false;
private String scheme = null;
@@ -122,6 +124,12 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
// If the threadpool uses a backing queue, what is its maximum size (-1) to use direct handoff
static final String INIT_SIZE_OF_QUEUE = "sizeOfQueue";
+ // The minimum number of replicas that may be used
+ static final String LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE = "loadBalancerRequestsMinimumAbsolute";
+
+ // The maximum proportion of replicas to be used
+ static final String LOAD_BALANCER_REQUESTS_MAX_FRACTION = "loadBalancerRequestsMaximumFraction";
+
// Configure if the threadpool favours fairness over throughput
static final String INIT_FAIRNESS_POLICY = "fairnessPolicy";
@@ -164,6 +172,16 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
this.maximumPoolSize = getParameter(args, INIT_MAX_POOL_SIZE, maximumPoolSize,sb);
this.keepAliveTime = getParameter(args, MAX_THREAD_IDLE_TIME, keepAliveTime,sb);
this.queueSize = getParameter(args, INIT_SIZE_OF_QUEUE, queueSize,sb);
+ this.permittedLoadBalancerRequestsMinimumAbsolute = getParameter(
+ args,
+ LOAD_BALANCER_REQUESTS_MIN_ABSOLUTE,
+ permittedLoadBalancerRequestsMinimumAbsolute,
+ sb);
+ this.permittedLoadBalancerRequestsMaximumFraction = getParameter(
+ args,
+ LOAD_BALANCER_REQUESTS_MAX_FRACTION,
+ permittedLoadBalancerRequestsMaximumFraction,
+ sb);
this.accessPolicy = getParameter(args, INIT_FAIRNESS_POLICY, accessPolicy,sb);
log.debug("created with {}",sb);
@@ -252,7 +270,15 @@ public class HttpShardHandlerFactory extends ShardHandlerFactory implements org.
*/
public LBHttpSolrClient.Rsp makeLoadBalancedRequest(final QueryRequest req, List urls)
throws SolrServerException, IOException {
- return loadbalancer.request(new LBHttpSolrClient.Req(req, urls));
+ return loadbalancer.request(newLBHttpSolrClientReq(req, urls));
+ }
+
+ protected LBHttpSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List urls) {
+ int numServersToTry = (int)Math.floor(urls.size() * this.permittedLoadBalancerRequestsMaximumFraction);
+ if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) {
+ numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute;
+ }
+ return new LBHttpSolrClient.Req(req, urls, numServersToTry);
}
/**
diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
index 7e56ee44e58..18d9b446121 100644
--- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@@ -84,6 +84,12 @@ import org.slf4j.LoggerFactory;
public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized
{
+ /**
+ * This constant was formerly part of HighlightParams. After deprecation it was removed so clients
+ * would no longer use it, but we still support it server side.
+ */
+ private static final String USE_FVH = HighlightParams.HIGHLIGHT + ".useFastVectorHighlighter";
+
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected final SolrCore solrCore;
@@ -492,7 +498,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
boolean methodFvh =
HighlightComponent.HighlightMethod.FAST_VECTOR.getMethodName().equals(
params.getFieldParam(schemaField.getName(), HighlightParams.METHOD))
- || params.getFieldBool(schemaField.getName(), HighlightParams.USE_FVH, false);
+ || params.getFieldBool(schemaField.getName(), USE_FVH, false);
if (!methodFvh) return false;
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
if (!termPosOff) {
diff --git a/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java b/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java
new file mode 100644
index 00000000000..c919eb8297d
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/legacy/BBoxStrategy.java
@@ -0,0 +1,706 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.legacy;
+
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.bbox.BBoxOverlapRatioValueSource;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
+import org.apache.lucene.spatial.util.DistanceToShapeValueSource;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.NumericUtils;
+import org.locationtech.spatial4j.context.SpatialContext;
+import org.locationtech.spatial4j.shape.Point;
+import org.locationtech.spatial4j.shape.Rectangle;
+import org.locationtech.spatial4j.shape.Shape;
+
+
+/**
+ * A SpatialStrategy for indexing and searching Rectangles by storing its
+ * coordinates in numeric fields. It supports all {@link SpatialOperation}s and
+ * has a custom overlap relevancy. It is based on GeoPortal's SpatialClauseAdapter.
+ *
+ * Characteristics:
+ *
+ *
+ *
Only indexes Rectangles; just one per field value. Other shapes can be provided
+ * and the bounding box will be used.
+ *
Can query only by a Rectangle. Providing other shapes is an error.
+ *
Supports most {@link SpatialOperation}s but not Overlaps.
+ *
Uses the DocValues API for any sorting / relevancy.
+ *
+ *
+ * Implementation:
+ *
+ * This uses 4 double fields for minX, maxX, minY, maxY
+ * and a boolean to mark a dateline cross. Depending on the particular {@link
+ * SpatialOperation}s, there are a variety of range queries on {@link DoublePoint}s to be
+ * done.
+ * The {@link #makeOverlapRatioValueSource(org.locationtech.spatial4j.shape.Rectangle, double)}
+ * works by calculating the query bbox overlap percentage against the indexed
+ * shape overlap percentage. The indexed shape's coordinates are retrieved from
+ * {@link org.apache.lucene.index.LeafReader#getNumericDocValues}.
+ *
+ * @lucene.experimental
+ */
+public class BBoxStrategy extends SpatialStrategy {
+
+ // note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
+ // create more than one Field.
+
+ /**
+ * pointValues, docValues, and nothing else.
+ */
+ public static FieldType DEFAULT_FIELDTYPE;
+
+ @Deprecated
+ public static LegacyFieldType LEGACY_FIELDTYPE;
+ static {
+ // Default: pointValues + docValues
+ FieldType type = new FieldType();
+ type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
+ type.setDocValuesType(DocValuesType.NUMERIC);//docValues
+ type.setStored(false);
+ type.freeze();
+ DEFAULT_FIELDTYPE = type;
+ // Legacy default: legacyNumerics + docValues
+ LegacyFieldType legacyType = new LegacyFieldType();
+ legacyType.setIndexOptions(IndexOptions.DOCS);
+ legacyType.setNumericType(LegacyNumericType.DOUBLE);
+ legacyType.setNumericPrecisionStep(8);// same as solr default
+ legacyType.setDocValuesType(DocValuesType.NUMERIC);//docValues
+ legacyType.setStored(false);
+ legacyType.freeze();
+ LEGACY_FIELDTYPE = legacyType;
+ }
+
+ public static final String SUFFIX_MINX = "__minX";
+ public static final String SUFFIX_MAXX = "__maxX";
+ public static final String SUFFIX_MINY = "__minY";
+ public static final String SUFFIX_MAXY = "__maxY";
+ public static final String SUFFIX_XDL = "__xdl";
+
+ /*
+ * The Bounding Box gets stored as four fields for x/y min/max and a flag
+ * that says if the box crosses the dateline (xdl).
+ */
+ final String field_bbox;
+ final String field_minX;
+ final String field_minY;
+ final String field_maxX;
+ final String field_maxY;
+ final String field_xdl; // crosses dateline
+
+ private final FieldType optionsFieldType;//from constructor; aggregate field type used to express all options
+ private final int fieldsLen;
+ private final boolean hasStored;
+ private final boolean hasDocVals;
+ private final boolean hasPointVals;
+ // equiv to "hasLegacyNumerics":
+ private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
+ private final FieldType xdlFieldType;
+
+ /**
+ * Creates a new {@link BBoxStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
+ */
+ public static BBoxStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
+ return new BBoxStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
+ }
+
+ /**
+ * Creates a new {@link BBoxStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility
+ * @deprecated LegacyNumerics will be removed
+ */
+ @Deprecated
+ public static BBoxStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
+ return new BBoxStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
+ }
+
+ /**
+ * Creates this strategy.
+ * {@code fieldType} is used to customize the indexing options of the 4 number fields, and to a lesser degree the XDL
+ * field too. Search requires pointValues (or legacy numerics), and relevancy requires docValues. If these features
+ * aren't needed then disable them.
+ */
+ public BBoxStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
+ super(ctx, fieldNamePrefix);
+ field_bbox = fieldNamePrefix;
+ field_minX = fieldNamePrefix + SUFFIX_MINX;
+ field_maxX = fieldNamePrefix + SUFFIX_MAXX;
+ field_minY = fieldNamePrefix + SUFFIX_MINY;
+ field_maxY = fieldNamePrefix + SUFFIX_MAXY;
+ field_xdl = fieldNamePrefix + SUFFIX_XDL;
+
+ fieldType.freeze();
+ this.optionsFieldType = fieldType;
+
+ int numQuads = 0;
+ if ((this.hasStored = fieldType.stored())) {
+ numQuads++;
+ }
+ if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
+ numQuads++;
+ }
+ if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
+ numQuads++;
+ }
+ if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
+ if (hasPointVals) {
+ throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
+ }
+ final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
+ if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
+ throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
+ }
+ numQuads++;
+ legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
+ legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
+ legacyNumericFieldType.freeze();
+ } else {
+ legacyNumericFieldType = null;
+ }
+
+ if (hasPointVals || legacyNumericFieldType != null) { // if we have an index...
+ xdlFieldType = new FieldType(StringField.TYPE_NOT_STORED);
+ xdlFieldType.setIndexOptions(IndexOptions.DOCS);
+ xdlFieldType.freeze();
+ } else {
+ xdlFieldType = null;
+ }
+
+ this.fieldsLen = numQuads * 4 + (xdlFieldType != null ? 1 : 0);
+ }
+
+ /** Returns a field type representing the set of field options. This is identical to what was passed into the
+ * constructor. It's frozen. */
+ public FieldType getFieldType() {
+ return optionsFieldType;
+ }
+
+ //---------------------------------
+ // Indexing
+ //---------------------------------
+
+ @Override
+ public Field[] createIndexableFields(Shape shape) {
+ return createIndexableFields(shape.getBoundingBox());
+ }
+
+ private Field[] createIndexableFields(Rectangle bbox) {
+ Field[] fields = new Field[fieldsLen];
+ int idx = -1;
+ if (hasStored) {
+ fields[++idx] = new StoredField(field_minX, bbox.getMinX());
+ fields[++idx] = new StoredField(field_minY, bbox.getMinY());
+ fields[++idx] = new StoredField(field_maxX, bbox.getMaxX());
+ fields[++idx] = new StoredField(field_maxY, bbox.getMaxY());
+ }
+ if (hasDocVals) {
+ fields[++idx] = new DoubleDocValuesField(field_minX, bbox.getMinX());
+ fields[++idx] = new DoubleDocValuesField(field_minY, bbox.getMinY());
+ fields[++idx] = new DoubleDocValuesField(field_maxX, bbox.getMaxX());
+ fields[++idx] = new DoubleDocValuesField(field_maxY, bbox.getMaxY());
+ }
+ if (hasPointVals) {
+ fields[++idx] = new DoublePoint(field_minX, bbox.getMinX());
+ fields[++idx] = new DoublePoint(field_minY, bbox.getMinY());
+ fields[++idx] = new DoublePoint(field_maxX, bbox.getMaxX());
+ fields[++idx] = new DoublePoint(field_maxY, bbox.getMaxY());
+ }
+ if (legacyNumericFieldType != null) {
+ fields[++idx] = new LegacyDoubleField(field_minX, bbox.getMinX(), legacyNumericFieldType);
+ fields[++idx] = new LegacyDoubleField(field_minY, bbox.getMinY(), legacyNumericFieldType);
+ fields[++idx] = new LegacyDoubleField(field_maxX, bbox.getMaxX(), legacyNumericFieldType);
+ fields[++idx] = new LegacyDoubleField(field_maxY, bbox.getMaxY(), legacyNumericFieldType);
+ }
+ if (xdlFieldType != null) {
+ fields[++idx] = new Field(field_xdl, bbox.getCrossesDateLine()?"T":"F", xdlFieldType);
+ }
+ assert idx == fields.length - 1;
+ return fields;
+ }
+
+
+ //---------------------------------
+ // Value Source / Relevancy
+ //---------------------------------
+
+ /**
+ * Provides access to each rectangle per document as a ValueSource in which
+ * {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)} returns a {@link
+ * Shape}.
+ */ //TODO raise to SpatialStrategy
+ public ValueSource makeShapeValueSource() {
+ return new BBoxValueSource(this);
+ }
+
+ @Override
+ public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
+ //TODO if makeShapeValueSource gets lifted to the top; this could become a generic impl.
+ return new DistanceToShapeValueSource(makeShapeValueSource(), queryPoint, multiplier, ctx);
+ }
+
+ /** Returns a similarity based on {@link BBoxOverlapRatioValueSource}. This is just a
+ * convenience method. */
+ public ValueSource makeOverlapRatioValueSource(Rectangle queryBox, double queryTargetProportion) {
+ return new BBoxOverlapRatioValueSource(
+ makeShapeValueSource(), ctx.isGeo(), queryBox, queryTargetProportion, 0.0);
+ }
+
+ //---------------------------------
+ // Query Building
+ //---------------------------------
+
+ // Utility on SpatialStrategy?
+// public Query makeQueryWithValueSource(SpatialArgs args, ValueSource valueSource) {
+// return new CustomScoreQuery(makeQuery(args), new FunctionQuery(valueSource));
+ //or...
+// return new BooleanQuery.Builder()
+// .add(new FunctionQuery(valueSource), BooleanClause.Occur.MUST)//matches everything and provides score
+// .add(filterQuery, BooleanClause.Occur.FILTER)//filters (score isn't used)
+// .build();
+// }
+
+ @Override
+ public Query makeQuery(SpatialArgs args) {
+ Shape shape = args.getShape();
+ if (!(shape instanceof Rectangle))
+ throw new UnsupportedOperationException("Can only query by Rectangle, not " + shape);
+
+ Rectangle bbox = (Rectangle) shape;
+ Query spatial;
+
+ // Useful for understanding Relations:
+ // http://edndoc.esri.com/arcsde/9.1/general_topics/understand_spatial_relations.htm
+ SpatialOperation op = args.getOperation();
+ if( op == SpatialOperation.BBoxIntersects ) spatial = makeIntersects(bbox);
+ else if( op == SpatialOperation.BBoxWithin ) spatial = makeWithin(bbox);
+ else if( op == SpatialOperation.Contains ) spatial = makeContains(bbox);
+ else if( op == SpatialOperation.Intersects ) spatial = makeIntersects(bbox);
+ else if( op == SpatialOperation.IsEqualTo ) spatial = makeEquals(bbox);
+ else if( op == SpatialOperation.IsDisjointTo ) spatial = makeDisjoint(bbox);
+ else if( op == SpatialOperation.IsWithin ) spatial = makeWithin(bbox);
+ else { //no Overlaps support yet
+ throw new UnsupportedSpatialOperation(op);
+ }
+ return new ConstantScoreQuery(spatial);
+ }
+
+ /**
+ * Constructs a query to retrieve documents that fully contain the input envelope.
+ *
+ * @return the spatial query
+ */
+ Query makeContains(Rectangle bbox) {
+
+ // general case
+ // docMinX <= queryExtent.getMinX() AND docMinY <= queryExtent.getMinY() AND docMaxX >= queryExtent.getMaxX() AND docMaxY >= queryExtent.getMaxY()
+
+ // Y conditions
+ // docMinY <= queryExtent.getMinY() AND docMaxY >= queryExtent.getMaxY()
+ Query qMinY = this.makeNumericRangeQuery(field_minY, null, bbox.getMinY(), false, true);
+ Query qMaxY = this.makeNumericRangeQuery(field_maxY, bbox.getMaxY(), null, true, false);
+ Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
+
+ // X conditions
+ Query xConditions;
+
+ // queries that do not cross the date line
+ if (!bbox.getCrossesDateLine()) {
+
+ // X Conditions for documents that do not cross the date line,
+ // documents that contain the min X and max X of the query envelope,
+ // docMinX <= queryExtent.getMinX() AND docMaxX >= queryExtent.getMaxX()
+ Query qMinX = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
+ Query qMaxX = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
+ Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
+ Query qNonXDL = this.makeXDL(false, qMinMax);
+
+ if (!ctx.isGeo()) {
+ xConditions = qNonXDL;
+ } else {
+ // X Conditions for documents that cross the date line,
+ // the left portion of the document contains the min X of the query
+ // OR the right portion of the document contains the max X of the query,
+ // docMinXLeft <= queryExtent.getMinX() OR docMaxXRight >= queryExtent.getMaxX()
+ Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
+ Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
+ Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qXDLLeft, qXDLRight);
+ Query qXDL = this.makeXDL(true, qXDLLeftRight);
+
+ Query qEdgeDL = null;
+ if (bbox.getMinX() == bbox.getMaxX() && Math.abs(bbox.getMinX()) == 180) {
+ double edge = bbox.getMinX() * -1;//opposite dateline edge
+ qEdgeDL = makeQuery(BooleanClause.Occur.SHOULD,
+ makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
+ }
+
+ // apply the non-XDL and XDL conditions
+ xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL, qEdgeDL);
+ }
+ } else {
+ // queries that cross the date line
+
+ // No need to search for documents that do not cross the date line
+
+ // X Conditions for documents that cross the date line,
+ // the left portion of the document contains the min X of the query
+ // AND the right portion of the document contains the max X of the query,
+ // docMinXLeft <= queryExtent.getMinX() AND docMaxXRight >= queryExtent.getMaxX()
+ Query qXDLLeft = this.makeNumericRangeQuery(field_minX, null, bbox.getMinX(), false, true);
+ Query qXDLRight = this.makeNumericRangeQuery(field_maxX, bbox.getMaxX(), null, true, false);
+ Query qXDLLeftRight = this.makeXDL(true, this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight));
+
+ Query qWorld = makeQuery(BooleanClause.Occur.MUST,
+ makeNumberTermQuery(field_minX, -180), makeNumberTermQuery(field_maxX, 180));
+
+ xConditions = makeQuery(BooleanClause.Occur.SHOULD, qXDLLeftRight, qWorld);
+ }
+
+ // both X and Y conditions must occur
+ return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
+ }
+
+ /**
+ * Constructs a query to retrieve documents that are disjoint to the input envelope.
+ *
+ * @return the spatial query
+ */
+ Query makeDisjoint(Rectangle bbox) {
+
+ // general case
+ // docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX() OR docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
+
+ // Y conditions
+ // docMinY > queryExtent.getMaxY() OR docMaxY < queryExtent.getMinY()
+ Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMaxY(), null, false, false);
+ Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMinY(), false, false);
+ Query yConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qMinY, qMaxY);
+
+ // X conditions
+ Query xConditions;
+
+ // queries that do not cross the date line
+ if (!bbox.getCrossesDateLine()) {
+
+ // X Conditions for documents that do not cross the date line,
+ // docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()
+ Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
+ if (bbox.getMinX() == -180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ bq.add(qMinX, BooleanClause.Occur.MUST);
+ bq.add(makeNumberTermQuery(field_maxX, 180.0), BooleanClause.Occur.MUST_NOT);
+ qMinX = bq.build();
+ }
+ Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
+
+ if (bbox.getMaxX() == 180.0 && ctx.isGeo()) {//touches dateline; -180 == 180
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ bq.add(qMaxX, BooleanClause.Occur.MUST);
+ bq.add(makeNumberTermQuery(field_minX, -180.0), BooleanClause.Occur.MUST_NOT);
+ qMaxX = bq.build();
+ }
+ Query qMinMax = this.makeQuery(BooleanClause.Occur.SHOULD, qMinX, qMaxX);
+ Query qNonXDL = this.makeXDL(false, qMinMax);
+
+ if (!ctx.isGeo()) {
+ xConditions = qNonXDL;
+ } else {
+ // X Conditions for documents that cross the date line,
+
+ // both the left and right portions of the document must be disjoint to the query
+ // (docMinXLeft > queryExtent.getMaxX() OR docMaxXLeft < queryExtent.getMinX()) AND
+ // (docMinXRight > queryExtent.getMaxX() OR docMaxXRight < queryExtent.getMinX())
+ // where: docMaxXLeft = 180.0, docMinXRight = -180.0
+ // (docMaxXLeft < queryExtent.getMinX()) equates to (180.0 < queryExtent.getMinX()) and is ignored
+ // (docMinXRight > queryExtent.getMaxX()) equates to (-180.0 > queryExtent.getMaxX()) and is ignored
+ Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
+ Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
+ Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXRight);
+ Query qXDL = this.makeXDL(true, qLeftRight);
+
+ // apply the non-XDL and XDL conditions
+ xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
+ }
+ // queries that cross the date line
+ } else {
+
+ // X Conditions for documents that do not cross the date line,
+ // the document must be disjoint to both the left and right query portions
+ // (docMinX > queryExtent.getMaxX()Left OR docMaxX < queryExtent.getMinX()) AND (docMinX > queryExtent.getMaxX() OR docMaxX < queryExtent.getMinX()Left)
+ // where: queryExtent.getMaxX()Left = 180.0, queryExtent.getMinX()Left = -180.0
+ Query qMinXLeft = this.makeNumericRangeQuery(field_minX, 180.0, null, false, false);
+ Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, bbox.getMinX(), false, false);
+ Query qMinXRight = this.makeNumericRangeQuery(field_minX, bbox.getMaxX(), null, false, false);
+ Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, -180.0, false, false);
+ Query qLeft = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXLeft, qMaxXLeft);
+ Query qRight = this.makeQuery(BooleanClause.Occur.SHOULD, qMinXRight, qMaxXRight);
+ Query qLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qLeft, qRight);
+
+ // No need to search for documents that do not cross the date line
+
+ xConditions = this.makeXDL(false, qLeftRight);
+ }
+
+ // either X or Y conditions should occur
+ return this.makeQuery(BooleanClause.Occur.SHOULD, xConditions, yConditions);
+ }
+
+ /**
+ * Constructs a query to retrieve documents that equal the input envelope.
+ *
+ * @return the spatial query
+ */
+ Query makeEquals(Rectangle bbox) {
+
+ // docMinX = queryExtent.getMinX() AND docMinY = queryExtent.getMinY() AND docMaxX = queryExtent.getMaxX() AND docMaxY = queryExtent.getMaxY()
+ Query qMinX = makeNumberTermQuery(field_minX, bbox.getMinX());
+ Query qMinY = makeNumberTermQuery(field_minY, bbox.getMinY());
+ Query qMaxX = makeNumberTermQuery(field_maxX, bbox.getMaxX());
+ Query qMaxY = makeNumberTermQuery(field_maxY, bbox.getMaxY());
+ return makeQuery(BooleanClause.Occur.MUST, qMinX, qMinY, qMaxX, qMaxY);
+ }
+
+ /**
+ * Constructs a query to retrieve documents that intersect the input envelope.
+ *
+ * @return the spatial query
+ */
+ Query makeIntersects(Rectangle bbox) {
+
+ // the original intersects query does not work for envelopes that cross the date line,
+ // switch to a NOT Disjoint query
+
+ // MUST_NOT causes a problem when it's the only clause type within a BooleanQuery,
+ // to get around it we add all documents as a SHOULD
+
+ // there must be an envelope, it must not be disjoint
+ Query qHasEnv;
+ if (ctx.isGeo()) {
+ Query qIsNonXDL = this.makeXDL(false);
+ Query qIsXDL = ctx.isGeo() ? this.makeXDL(true) : null;
+ qHasEnv = this.makeQuery(BooleanClause.Occur.SHOULD, qIsNonXDL, qIsXDL);
+ } else {
+ qHasEnv = this.makeXDL(false);
+ }
+
+ BooleanQuery.Builder qNotDisjoint = new BooleanQuery.Builder();
+ qNotDisjoint.add(qHasEnv, BooleanClause.Occur.MUST);
+ Query qDisjoint = makeDisjoint(bbox);
+ qNotDisjoint.add(qDisjoint, BooleanClause.Occur.MUST_NOT);
+
+ //Query qDisjoint = makeDisjoint();
+ //BooleanQuery qNotDisjoint = new BooleanQuery();
+ //qNotDisjoint.add(new MatchAllDocsQuery(),BooleanClause.Occur.SHOULD);
+ //qNotDisjoint.add(qDisjoint,BooleanClause.Occur.MUST_NOT);
+ return qNotDisjoint.build();
+ }
+
+ /**
+ * Makes a boolean query based upon a collection of queries and a logical operator.
+ *
+ * @param occur the logical operator
+ * @param queries the query collection
+ * @return the query
+ */
+ BooleanQuery makeQuery(BooleanClause.Occur occur, Query... queries) {
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ for (Query query : queries) {
+ if (query != null)
+ bq.add(query, occur);
+ }
+ return bq.build();
+ }
+
+ /**
+ * Constructs a query to retrieve documents are fully within the input envelope.
+ *
+ * @return the spatial query
+ */
+ Query makeWithin(Rectangle bbox) {
+
+ // general case
+ // docMinX >= queryExtent.getMinX() AND docMinY >= queryExtent.getMinY() AND docMaxX <= queryExtent.getMaxX() AND docMaxY <= queryExtent.getMaxY()
+
+ // Y conditions
+ // docMinY >= queryExtent.getMinY() AND docMaxY <= queryExtent.getMaxY()
+ Query qMinY = this.makeNumericRangeQuery(field_minY, bbox.getMinY(), null, true, false);
+ Query qMaxY = this.makeNumericRangeQuery(field_maxY, null, bbox.getMaxY(), false, true);
+ Query yConditions = this.makeQuery(BooleanClause.Occur.MUST, qMinY, qMaxY);
+
+ // X conditions
+ Query xConditions;
+
+ if (ctx.isGeo() && bbox.getMinX() == -180.0 && bbox.getMaxX() == 180.0) {
+ //if query world-wraps, only the y condition matters
+ return yConditions;
+
+ } else if (!bbox.getCrossesDateLine()) {
+ // queries that do not cross the date line
+
+ // docMinX >= queryExtent.getMinX() AND docMaxX <= queryExtent.getMaxX()
+ Query qMinX = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
+ Query qMaxX = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
+ Query qMinMax = this.makeQuery(BooleanClause.Occur.MUST, qMinX, qMaxX);
+
+ double edge = 0;//none, otherwise opposite dateline of query
+ if (bbox.getMinX() == -180.0)
+ edge = 180;
+ else if (bbox.getMaxX() == 180.0)
+ edge = -180;
+ if (edge != 0 && ctx.isGeo()) {
+ Query edgeQ = makeQuery(BooleanClause.Occur.MUST,
+ makeNumberTermQuery(field_minX, edge), makeNumberTermQuery(field_maxX, edge));
+ qMinMax = makeQuery(BooleanClause.Occur.SHOULD, qMinMax, edgeQ);
+ }
+
+ xConditions = this.makeXDL(false, qMinMax);
+
+ // queries that cross the date line
+ } else {
+
+ // X Conditions for documents that do not cross the date line
+
+ // the document should be within the left portion of the query
+ // docMinX >= queryExtent.getMinX() AND docMaxX <= 180.0
+ Query qMinXLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
+ Query qMaxXLeft = this.makeNumericRangeQuery(field_maxX, null, 180.0, false, true);
+ Query qLeft = this.makeQuery(BooleanClause.Occur.MUST, qMinXLeft, qMaxXLeft);
+
+ // the document should be within the right portion of the query
+ // docMinX >= -180.0 AND docMaxX <= queryExtent.getMaxX()
+ Query qMinXRight = this.makeNumericRangeQuery(field_minX, -180.0, null, true, false);
+ Query qMaxXRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
+ Query qRight = this.makeQuery(BooleanClause.Occur.MUST, qMinXRight, qMaxXRight);
+
+ // either left or right conditions should occur,
+ // apply the left and right conditions to documents that do not cross the date line
+ Query qLeftRight = this.makeQuery(BooleanClause.Occur.SHOULD, qLeft, qRight);
+ Query qNonXDL = this.makeXDL(false, qLeftRight);
+
+ // X Conditions for documents that cross the date line,
+ // the left portion of the document must be within the left portion of the query,
+ // AND the right portion of the document must be within the right portion of the query
+ // docMinXLeft >= queryExtent.getMinX() AND docMaxXLeft <= 180.0
+ // AND docMinXRight >= -180.0 AND docMaxXRight <= queryExtent.getMaxX()
+ Query qXDLLeft = this.makeNumericRangeQuery(field_minX, bbox.getMinX(), null, true, false);
+ Query qXDLRight = this.makeNumericRangeQuery(field_maxX, null, bbox.getMaxX(), false, true);
+ Query qXDLLeftRight = this.makeQuery(BooleanClause.Occur.MUST, qXDLLeft, qXDLRight);
+ Query qXDL = this.makeXDL(true, qXDLLeftRight);
+
+ // apply the non-XDL and XDL conditions
+ xConditions = this.makeQuery(BooleanClause.Occur.SHOULD, qNonXDL, qXDL);
+ }
+
+ // both X and Y conditions must occur
+ return this.makeQuery(BooleanClause.Occur.MUST, xConditions, yConditions);
+ }
+
+ /**
+ * Constructs a query to retrieve documents that do or do not cross the date line.
+ *
+ * @param crossedDateLine true for documents that cross the date line
+ * @return the query
+ */
+ private Query makeXDL(boolean crossedDateLine) {
+ // The 'T' and 'F' values match solr fields
+ return new TermQuery(new Term(field_xdl, crossedDateLine ? "T" : "F"));
+ }
+
+ /**
+ * Constructs a query to retrieve documents that do or do not cross the date line
+ * and match the supplied spatial query.
+ *
+ * @param crossedDateLine true for documents that cross the date line
+ * @param query the spatial query
+ * @return the query
+ */
+ private Query makeXDL(boolean crossedDateLine, Query query) {
+ if (!ctx.isGeo()) {
+ assert !crossedDateLine;
+ return query;
+ }
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ bq.add(this.makeXDL(crossedDateLine), BooleanClause.Occur.MUST);
+ bq.add(query, BooleanClause.Occur.MUST);
+ return bq.build();
+ }
+
+ private Query makeNumberTermQuery(String field, double number) {
+ if (hasPointVals) {
+ return DoublePoint.newExactQuery(field, number);
+ } else if (legacyNumericFieldType != null) {
+ BytesRefBuilder bytes = new BytesRefBuilder();
+ LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
+ return new TermQuery(new Term(field, bytes.get()));
+ }
+ throw new UnsupportedOperationException("An index is required for this operation.");
+ }
+
+ /**
+ * Returns a numeric range query based on FieldType
+ * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
+ * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
+ *
+ * @param fieldname field name. must not be null.
+ * @param min minimum value of the range.
+ * @param max maximum value of the range.
+ * @param minInclusive include the minimum value if true.
+ * @param maxInclusive include the maximum value if true
+ */
+ private Query makeNumericRangeQuery(String fieldname, Double min, Double max, boolean minInclusive, boolean maxInclusive) {
+ if (hasPointVals) {
+ if (min == null) {
+ min = Double.NEGATIVE_INFINITY;
+ }
+
+ if (max == null) {
+ max = Double.POSITIVE_INFINITY;
+ }
+
+ if (minInclusive == false) {
+ min = Math.nextUp(min);
+ }
+
+ if (maxInclusive == false) {
+ max = Math.nextDown(max);
+ }
+
+ return DoublePoint.newRangeQuery(fieldname, min, max);
+ } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
+ return LegacyNumericRangeQuery.newDoubleRange(fieldname, legacyNumericFieldType.numericPrecisionStep(), min, max, minInclusive, maxInclusive);
+ }
+ throw new UnsupportedOperationException("An index is required for this operation.");
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java b/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java
new file mode 100644
index 00000000000..cd577c71a75
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/legacy/BBoxValueSource.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.legacy;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Explanation;
+import org.locationtech.spatial4j.shape.Rectangle;
+
+/**
+ * A ValueSource in which the indexed Rectangle is returned from
+ * {@link org.apache.lucene.queries.function.FunctionValues#objectVal(int)}.
+ *
+ * @lucene.internal
+ */
+class BBoxValueSource extends ValueSource {
+
+ private final BBoxStrategy strategy;
+
+ public BBoxValueSource(BBoxStrategy strategy) {
+ this.strategy = strategy;
+ }
+
+ @Override
+ public String description() {
+ return "bboxShape(" + strategy.getFieldName() + ")";
+ }
+
+ @Override
+ public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
+ LeafReader reader = readerContext.reader();
+ final NumericDocValues minX = DocValues.getNumeric(reader, strategy.field_minX);
+ final NumericDocValues minY = DocValues.getNumeric(reader, strategy.field_minY);
+ final NumericDocValues maxX = DocValues.getNumeric(reader, strategy.field_maxX);
+ final NumericDocValues maxY = DocValues.getNumeric(reader, strategy.field_maxY);
+
+ //reused
+ final Rectangle rect = strategy.getSpatialContext().makeRectangle(0,0,0,0);
+
+ return new FunctionValues() {
+ private int lastDocID = -1;
+
+ private double getDocValue(NumericDocValues values, int doc) throws IOException {
+ int curDocID = values.docID();
+ if (doc > curDocID) {
+ curDocID = values.advance(doc);
+ }
+ if (doc == curDocID) {
+ return Double.longBitsToDouble(values.longValue());
+ } else {
+ return 0.0;
+ }
+ }
+
+ @Override
+ public Object objectVal(int doc) throws IOException {
+ if (doc < lastDocID) {
+ throw new AssertionError("docs were sent out-of-order: lastDocID=" + lastDocID + " vs doc=" + doc);
+ }
+ lastDocID = doc;
+
+ double minXValue = getDocValue(minX, doc);
+ if (minX.docID() != doc) {
+ return null;
+ } else {
+ double minYValue = getDocValue(minY, doc);
+ double maxXValue = getDocValue(maxX, doc);
+ double maxYValue = getDocValue(maxY, doc);
+ rect.reset(minXValue, maxXValue, minYValue, maxYValue);
+ return rect;
+ }
+ }
+
+ @Override
+ public String strVal(int doc) throws IOException {//TODO support WKT output once Spatial4j does
+ Object v = objectVal(doc);
+ return v == null ? null : v.toString();
+ }
+
+ @Override
+ public boolean exists(int doc) throws IOException {
+ getDocValue(minX, doc);
+ return minX.docID() == doc;
+ }
+
+ @Override
+ public Explanation explain(int doc) throws IOException {
+ return Explanation.match(Float.NaN, toString(doc));
+ }
+
+ @Override
+ public String toString(int doc) throws IOException {
+ return description() + '=' + strVal(doc);
+ }
+ };
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ BBoxValueSource that = (BBoxValueSource) o;
+
+ if (!strategy.equals(that.strategy)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return strategy.hashCode();
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java b/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java
new file mode 100644
index 00000000000..8685d8824a6
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/legacy/DistanceValueSource.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.legacy;
+
+import org.apache.lucene.index.NumericDocValues;
+import org.locationtech.spatial4j.distance.DistanceCalculator;
+import org.locationtech.spatial4j.shape.Point;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+
+import java.io.IOException;
+import java.util.Map;
+
+/**
+ * An implementation of the Lucene ValueSource model that returns the distance
+ * for a {@link PointVectorStrategy}.
+ *
+ * @lucene.internal
+ */
+public class DistanceValueSource extends ValueSource {
+
+ private PointVectorStrategy strategy;
+ private final Point from;
+ private final double multiplier;
+
+ /**
+ * Constructor.
+ */
+ public DistanceValueSource(PointVectorStrategy strategy, Point from, double multiplier) {
+ this.strategy = strategy;
+ this.from = from;
+ this.multiplier = multiplier;
+ }
+
+ /**
+ * Returns the ValueSource description.
+ */
+ @Override
+ public String description() {
+ return "DistanceValueSource("+strategy+", "+from+")";
+ }
+
+ /**
+ * Returns the FunctionValues used by the function query.
+ */
+ @Override
+ public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
+ LeafReader reader = readerContext.reader();
+
+ final NumericDocValues ptX = DocValues.getNumeric(reader, strategy.getFieldNameX());
+ final NumericDocValues ptY = DocValues.getNumeric(reader, strategy.getFieldNameY());
+
+ return new FunctionValues() {
+
+ private int lastDocID = -1;
+
+ private final Point from = DistanceValueSource.this.from;
+ private final DistanceCalculator calculator = strategy.getSpatialContext().getDistCalc();
+ private final double nullValue =
+ (strategy.getSpatialContext().isGeo() ? 180 * multiplier : Double.MAX_VALUE);
+
+ private double getDocValue(NumericDocValues values, int doc) throws IOException {
+ int curDocID = values.docID();
+ if (doc > curDocID) {
+ curDocID = values.advance(doc);
+ }
+ if (doc == curDocID) {
+ return Double.longBitsToDouble(values.longValue());
+ } else {
+ return 0.0;
+ }
+ }
+
+ @Override
+ public float floatVal(int doc) throws IOException {
+ return (float) doubleVal(doc);
+ }
+
+ @Override
+ public double doubleVal(int doc) throws IOException {
+ // make sure it has minX and area
+ double x = getDocValue(ptX, doc);
+ if (ptX.docID() == doc) {
+ double y = getDocValue(ptY, doc);
+ assert ptY.docID() == doc;
+ return calculator.distance(from, x, y) * multiplier;
+ }
+ return nullValue;
+ }
+
+ @Override
+ public String toString(int doc) throws IOException {
+ return description() + "=" + floatVal(doc);
+ }
+ };
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ DistanceValueSource that = (DistanceValueSource) o;
+
+ if (!from.equals(that.from)) return false;
+ if (!strategy.equals(that.strategy)) return false;
+ if (multiplier != that.multiplier) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return from.hashCode();
+ }
+}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java
similarity index 92%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java
index e98a4f0f567..b6a2897f5c9 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyDoubleField.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyDoubleField.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@@ -51,7 +51,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFloatField}.
*
*
To perform range querying or filtering against a
- * LegacyDoubleField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * LegacyDoubleField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* LegacyDoubleField, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#DOUBLE}. LegacyDoubleField
@@ -85,7 +85,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is < 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@@ -94,8 +94,8 @@ import org.apache.lucene.index.IndexOptions;
*
For more information on the internals of numeric trie
* indexing, including the precisionStep
- * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
- * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
+ * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
+ * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
*
If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@@ -103,7 +103,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed.
*
*
More advanced users can instead use {@link
- * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
+ * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
@@ -144,7 +144,7 @@ public final class LegacyDoubleField extends LegacyField {
/** Creates a stored or un-stored LegacyDoubleField with the provided value
* and default precisionStep {@link
- * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
+ * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* @param name field name
* @param value 64-bit double value
* @param stored Store.YES if the content should also be stored
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyField.java
similarity index 98%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyField.java
index 87ac0e566cf..7a6bde026a1 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyField.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyField.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java b/solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java
similarity index 95%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java
index 1f4b0af4768..a18a00a34a6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFieldType.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyFieldType.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
@@ -60,7 +60,7 @@ public final class LegacyFieldType extends FieldType {
/**
* LegacyNumericType: if non-null then the field's value will be indexed
- * numerically so that {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} can be used at
+ * numerically so that {@link org.apache.solr.legacy.LegacyNumericRangeQuery} can be used at
* search time.
*
* The default is null (no numeric type)
@@ -97,7 +97,7 @@ public final class LegacyFieldType extends FieldType {
*
* This has no effect if {@link #numericType()} returns null.
*
- * The default is {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
+ * The default is {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT}
* @see #setNumericPrecisionStep(int)
*
* @deprecated Please switch to {@link org.apache.lucene.index.PointValues} instead
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java
similarity index 92%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java
index ea3b84ab65f..79ec0bd441a 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyFloatField.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyFloatField.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FloatPoint;
@@ -49,7 +49,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyDoubleField}.
*
*
To perform range querying or filtering against a
- * LegacyFloatField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * LegacyFloatField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* LegacyFloatField, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#FLOAT}. LegacyFloatField
@@ -83,7 +83,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is < 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@@ -92,8 +92,8 @@ import org.apache.lucene.index.IndexOptions;
*
For more information on the internals of numeric trie
* indexing, including the precisionStep
- * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
- * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
+ * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
+ * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
*
If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@@ -101,7 +101,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed.
*
*
More advanced users can instead use {@link
- * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
+ * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
@@ -144,7 +144,7 @@ public final class LegacyFloatField extends LegacyField {
/** Creates a stored or un-stored LegacyFloatField with the provided value
* and default precisionStep {@link
- * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
+ * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* @param name field name
* @param value 32-bit double value
* @param stored Store.YES if the content should also be stored
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java
similarity index 92%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java
index e3ae9658b1d..838ad4ecce0 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyIntField.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyIntField.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@@ -50,7 +50,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyDoubleField}.
*
*
To perform range querying or filtering against a
- * LegacyIntField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * LegacyIntField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* LegacyIntField, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#INT}. LegacyIntField
@@ -84,7 +84,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is < 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@@ -93,8 +93,8 @@ import org.apache.lucene.index.IndexOptions;
*
For more information on the internals of numeric trie
* indexing, including the precisionStep
- * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
- * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
+ * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
+ * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
*
If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@@ -102,7 +102,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed.
*
*
More advanced users can instead use {@link
- * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
+ * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
@@ -145,7 +145,7 @@ public final class LegacyIntField extends LegacyField {
/** Creates a stored or un-stored LegacyIntField with the provided value
* and default precisionStep {@link
- * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
+ * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* @param name field name
* @param value 32-bit integer value
* @param stored Store.YES if the content should also be stored
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java b/solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java
similarity index 93%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java
index 3e20b448b96..fb4843733ec 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyLongField.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyLongField.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import org.apache.lucene.document.Document;
@@ -61,7 +61,7 @@ import org.apache.lucene.index.IndexOptions;
* long value.
*
*
To perform range querying or filtering against a
- * LegacyLongField, use {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * LegacyLongField, use {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
* To sort according to a
* LegacyLongField, use the normal numeric sort types, eg
* {@link org.apache.lucene.search.SortField.Type#LONG}. LegacyLongField
@@ -95,7 +95,7 @@ import org.apache.lucene.index.IndexOptions;
* LegacyFieldType#setNumericPrecisionStep} method if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
* For low cardinality fields larger precision steps are good.
* If the cardinality is < 100, it is fair
* to use {@link Integer#MAX_VALUE}, which produces one
@@ -104,8 +104,8 @@ import org.apache.lucene.index.IndexOptions;
*
For more information on the internals of numeric trie
* indexing, including the precisionStep
- * configuration, see {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}. The format of
- * indexed values is described in {@link org.apache.lucene.legacy.LegacyNumericUtils}.
+ * configuration, see {@link org.apache.solr.legacy.LegacyNumericRangeQuery}. The format of
+ * indexed values is described in {@link org.apache.solr.legacy.LegacyNumericUtils}.
*
*
If you only need to sort by numeric value, and never
* run range querying/filtering, you can index using a
@@ -113,7 +113,7 @@ import org.apache.lucene.index.IndexOptions;
* This will minimize disk space consumed.
*
*
More advanced users can instead use {@link
- * org.apache.lucene.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
+ * org.apache.solr.legacy.LegacyNumericTokenStream} directly, when indexing numbers. This
* class is a wrapper around this token stream type for
* easier, more intuitive usage.
*
@@ -154,7 +154,7 @@ public final class LegacyLongField extends LegacyField {
/** Creates a stored or un-stored LegacyLongField with the provided value
* and default precisionStep {@link
- * org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
+ * org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* @param name field name
* @param value 64-bit long value
* @param stored Store.YES if the content should also be stored
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java
similarity index 95%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java
index f172a200779..d07e497da27 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericRangeQuery.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import java.io.IOException;
@@ -41,9 +41,9 @@ import org.apache.lucene.index.Term; // for javadocs
/**
*
A {@link Query} that matches numeric values within a
* specified range. To use this, you must first index the
- * numeric values using {@link org.apache.lucene.legacy.LegacyIntField}, {@link
- * org.apache.lucene.legacy.LegacyFloatField}, {@link org.apache.lucene.legacy.LegacyLongField} or {@link org.apache.lucene.legacy.LegacyDoubleField} (expert: {@link
- * org.apache.lucene.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
+ * numeric values using {@link org.apache.solr.legacy.LegacyIntField}, {@link
+ * org.apache.solr.legacy.LegacyFloatField}, {@link org.apache.solr.legacy.LegacyLongField} or {@link org.apache.solr.legacy.LegacyDoubleField} (expert: {@link
+ * org.apache.solr.legacy.LegacyNumericTokenStream}). If your terms are instead textual,
* you should use {@link TermRangeQuery}.
*
*
You create a new LegacyNumericRangeQuery with the static
@@ -97,7 +97,7 @@ import org.apache.lucene.index.Term; // for javadocs
* (all numerical values like doubles, longs, floats, and ints are converted to
* lexicographic sortable string representations and stored with different precisions
* (for a more detailed description of how the values are stored,
- * see {@link org.apache.lucene.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
+ * see {@link org.apache.solr.legacy.LegacyNumericUtils}). A range is then divided recursively into multiple intervals for searching:
* The center of the range is searched only with the lowest possible precision in the trie,
* while the boundaries are matched more exactly. This reduces the number of terms dramatically.
You can choose any precisionStep when encoding values.
* Lower step values mean more precisions and so more terms in index (and index gets larger). The number
- * of indexed terms per value is (those are generated by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}):
+ * of indexed terms per value is (those are generated by {@link org.apache.solr.legacy.LegacyNumericTokenStream}):
*
@@ -149,8 +149,8 @@ import org.apache.lucene.index.Term; // for javadocs
*
Steps ≥64 for long/double and ≥32 for int/float produces one token
* per value in the index and querying is as slow as a conventional {@link TermRangeQuery}. But it can be used
* to produce fields, that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as
- * precisionStep). Using {@link org.apache.lucene.legacy.LegacyIntField},
- * {@link org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} for sorting
+ * precisionStep). Using {@link org.apache.solr.legacy.LegacyIntField},
+ * {@link org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} for sorting
* is ideal, because building the field cache is much faster than with text-only numbers.
* These fields have one term per value and therefore also work with term enumeration for building distinct lists
* (e.g. facets / preselected values to search for).
@@ -205,7 +205,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu
/**
* Factory that creates a LegacyNumericRangeQuery, that queries a long
- * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
+ * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
* by setting the min or max value to null. By setting inclusive to false, it will
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
@@ -231,7 +231,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu
/**
* Factory that creates a LegacyNumericRangeQuery, that queries a int
- * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
+ * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
* by setting the min or max value to null. By setting inclusive to false, it will
* match all documents excluding the bounds, with inclusive on, the boundaries are hits, too.
@@ -259,7 +259,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu
/**
* Factory that creates a LegacyNumericRangeQuery, that queries a double
- * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
+ * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16).
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
* by setting the min or max value to null.
* {@link Double#NaN} will never match a half-open range, to hit {@code NaN} use a query
@@ -289,7 +289,7 @@ public final class LegacyNumericRangeQuery extends MultiTermQu
/**
* Factory that creates a LegacyNumericRangeQuery, that queries a float
- * range using the default precisionStep {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
+ * range using the default precisionStep {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT_32} (8).
* You can have half-open ranges (which are in fact </≤ or >/≥ queries)
* by setting the min or max value to null.
* {@link Float#NaN} will never match a half-open range, to hit {@code NaN} use a query
@@ -390,8 +390,8 @@ public final class LegacyNumericRangeQuery extends MultiTermQu
*
* WARNING: This term enumeration is not guaranteed to be always ordered by
* {@link Term#compareTo}.
- * The ordering depends on how {@link org.apache.lucene.legacy.LegacyNumericUtils#splitLongRange} and
- * {@link org.apache.lucene.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
+ * The ordering depends on how {@link org.apache.solr.legacy.LegacyNumericUtils#splitLongRange} and
+ * {@link org.apache.solr.legacy.LegacyNumericUtils#splitIntRange} generates the sub-ranges. For
* {@link MultiTermQuery} ordering is not relevant.
*/
private final class NumericRangeTermsEnum extends FilteredTermsEnum {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java
similarity index 95%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java
index a2aba19e2ac..c18cd595278 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericTokenStream.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericTokenStream.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import java.util.Objects;
@@ -35,10 +35,10 @@ import org.apache.lucene.util.NumericUtils;
/**
* Expert: This class provides a {@link TokenStream}
* for indexing numeric values that can be used by {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
*
- *
Note that for simple usage, {@link org.apache.lucene.legacy.LegacyIntField}, {@link
- * org.apache.lucene.legacy.LegacyLongField}, {@link org.apache.lucene.legacy.LegacyFloatField} or {@link org.apache.lucene.legacy.LegacyDoubleField} is
+ *
Note that for simple usage, {@link org.apache.solr.legacy.LegacyIntField}, {@link
+ * org.apache.solr.legacy.LegacyLongField}, {@link org.apache.solr.legacy.LegacyFloatField} or {@link org.apache.solr.legacy.LegacyDoubleField} is
* recommended. These fields disable norms and
* term freqs, as they are not usually needed during
* searching. If you need to change these settings, you
@@ -81,7 +81,7 @@ import org.apache.lucene.util.NumericUtils;
* than one numeric field, use a separate LegacyNumericTokenStream
* instance for each.
*
- *
See {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} for more details on the
+ *
See {@link org.apache.solr.legacy.LegacyNumericRangeQuery} for more details on the
* precisionStep
* parameter as well as how numeric fields work under the hood.
@@ -140,7 +140,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
}
}
- /** Implementation of {@link org.apache.lucene.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
+ /** Implementation of {@link org.apache.solr.legacy.LegacyNumericTokenStream.LegacyNumericTermAttribute}.
* @lucene.internal
* @since 4.0
*/
@@ -240,7 +240,7 @@ public final class LegacyNumericTokenStream extends TokenStream {
/**
* Creates a token stream for numeric values using the default precisionStep
- * {@link org.apache.lucene.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
+ * {@link org.apache.solr.legacy.LegacyNumericUtils#PRECISION_STEP_DEFAULT} (16). The stream is not yet initialized,
* before using set a value using the various set???Value() methods.
*/
public LegacyNumericTokenStream() {
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java
similarity index 97%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java
index 345b4974b02..8cc3fcc7ed2 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericType.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericType.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
/** Data type of the numeric value
* @since 3.2
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java
similarity index 94%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java
rename to solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java
index e6659d7e102..52fae9c8171 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/LegacyNumericUtils.java
+++ b/solr/core/src/java/org/apache/solr/legacy/LegacyNumericUtils.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
import java.io.IOException;
@@ -43,9 +43,9 @@ import org.apache.lucene.util.BytesRefBuilder;
* during encoding.
*
*
For easy usage, the trie algorithm is implemented for indexing inside
- * {@link org.apache.lucene.legacy.LegacyNumericTokenStream} that can index int, long,
+ * {@link org.apache.solr.legacy.LegacyNumericTokenStream} that can index int, long,
* float, and double. For querying,
- * {@link org.apache.lucene.legacy.LegacyNumericRangeQuery} implements the query part
+ * {@link org.apache.solr.legacy.LegacyNumericRangeQuery} implements the query part
* for the same data types.
*
* @lucene.internal
@@ -61,15 +61,15 @@ public final class LegacyNumericUtils {
private LegacyNumericUtils() {} // no instance!
/**
- * The default precision step used by {@link org.apache.lucene.legacy.LegacyLongField},
- * {@link org.apache.lucene.legacy.LegacyDoubleField}, {@link org.apache.lucene.legacy.LegacyNumericTokenStream}, {@link
- * org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ * The default precision step used by {@link org.apache.solr.legacy.LegacyLongField},
+ * {@link org.apache.solr.legacy.LegacyDoubleField}, {@link org.apache.solr.legacy.LegacyNumericTokenStream}, {@link
+ * org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static final int PRECISION_STEP_DEFAULT = 16;
/**
- * The default precision step used by {@link org.apache.lucene.legacy.LegacyIntField} and
- * {@link org.apache.lucene.legacy.LegacyFloatField}.
+ * The default precision step used by {@link org.apache.solr.legacy.LegacyIntField} and
+ * {@link org.apache.solr.legacy.LegacyFloatField}.
*/
public static final int PRECISION_STEP_DEFAULT_32 = 8;
@@ -101,7 +101,7 @@ public final class LegacyNumericUtils {
/**
* Returns prefix coded bits after reducing the precision by shift bits.
- * This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
+ * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* After encoding, {@code bytes.offset} will always be 0.
* @param val the numeric value
* @param shift how many bits to strip from the right
@@ -128,7 +128,7 @@ public final class LegacyNumericUtils {
/**
* Returns prefix coded bits after reducing the precision by shift bits.
- * This is method is used by {@link org.apache.lucene.legacy.LegacyNumericTokenStream}.
+ * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
* After encoding, {@code bytes.offset} will always be 0.
* @param val the numeric value
* @param shift how many bits to strip from the right
@@ -232,7 +232,7 @@ public final class LegacyNumericUtils {
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
* {@link LongRangeBuilder#addRange(BytesRef,BytesRef)}
* method.
- *
This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ *
This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static void splitLongRange(final LongRangeBuilder builder,
final int precisionStep, final long minBound, final long maxBound
@@ -246,7 +246,7 @@ public final class LegacyNumericUtils {
* {@link org.apache.lucene.search.BooleanQuery} for each call to its
* {@link IntRangeBuilder#addRange(BytesRef,BytesRef)}
* method.
- *
This method is used by {@link org.apache.lucene.legacy.LegacyNumericRangeQuery}.
+ *
This method is used by {@link org.apache.solr.legacy.LegacyNumericRangeQuery}.
*/
public static void splitIntRange(final IntRangeBuilder builder,
final int precisionStep, final int minBound, final int maxBound
diff --git a/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java b/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java
new file mode 100644
index 00000000000..3b29a61eea8
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/legacy/PointVectorStrategy.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.legacy;
+
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.solr.legacy.LegacyDoubleField;
+import org.apache.solr.legacy.LegacyFieldType;
+import org.apache.solr.legacy.LegacyNumericRangeQuery;
+import org.apache.solr.legacy.LegacyNumericType;
+import org.apache.lucene.queries.function.FunctionRangeQuery;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.spatial.SpatialStrategy;
+import org.apache.lucene.spatial.query.SpatialArgs;
+import org.apache.lucene.spatial.query.SpatialOperation;
+import org.apache.lucene.spatial.query.UnsupportedSpatialOperation;
+import org.locationtech.spatial4j.context.SpatialContext;
+import org.locationtech.spatial4j.shape.Circle;
+import org.locationtech.spatial4j.shape.Point;
+import org.locationtech.spatial4j.shape.Rectangle;
+import org.locationtech.spatial4j.shape.Shape;
+
+/**
+ * Simple {@link SpatialStrategy} which represents Points in two numeric fields.
+ * The Strategy's best feature is decent distance sort.
+ *
+ *
+ * Characteristics:
+ *
+ *
+ *
Only indexes points; just one per field value.
+ *
Can query by a rectangle or circle.
+ *
{@link
+ * org.apache.lucene.spatial.query.SpatialOperation#Intersects} and {@link
+ * SpatialOperation#IsWithin} is supported.
+ *
Requires DocValues for
+ * {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)} and for
+ * searching with a Circle.
+ *
+ *
+ *
+ * Implementation:
+ *
+ * This is a simple Strategy. Search works with a pair of range queries on two {@link DoublePoint}s representing
+ * x & y fields. A Circle query does the same bbox query but adds a
+ * ValueSource filter on
+ * {@link #makeDistanceValueSource(org.locationtech.spatial4j.shape.Point)}.
+ *
+ * One performance shortcoming with this strategy is that a scenario involving
+ * both a search using a Circle and sort will result in calculations for the
+ * spatial distance being done twice -- once for the filter and second for the
+ * sort.
+ *
+ * @lucene.experimental
+ */
+public class PointVectorStrategy extends SpatialStrategy {
+
+ // note: we use a FieldType to articulate the options we want on the field. We don't use it as-is with a Field, we
+ // create more than one Field.
+
+ /**
+ * pointValues, docValues, and nothing else.
+ */
+ public static FieldType DEFAULT_FIELDTYPE;
+
+ @Deprecated
+ public static LegacyFieldType LEGACY_FIELDTYPE;
+ static {
+ // Default: pointValues + docValues
+ FieldType type = new FieldType();
+ type.setDimensions(1, Double.BYTES);//pointValues (assume Double)
+ type.setDocValuesType(DocValuesType.NUMERIC);//docValues
+ type.setStored(false);
+ type.freeze();
+ DEFAULT_FIELDTYPE = type;
+ // Legacy default: legacyNumerics
+ LegacyFieldType legacyType = new LegacyFieldType();
+ legacyType.setIndexOptions(IndexOptions.DOCS);
+ legacyType.setNumericType(LegacyNumericType.DOUBLE);
+ legacyType.setNumericPrecisionStep(8);// same as solr default
+ legacyType.setDocValuesType(DocValuesType.NONE);//no docValues!
+ legacyType.setStored(false);
+ legacyType.freeze();
+ LEGACY_FIELDTYPE = legacyType;
+ }
+
+ public static final String SUFFIX_X = "__x";
+ public static final String SUFFIX_Y = "__y";
+
+ private final String fieldNameX;
+ private final String fieldNameY;
+
+ private final int fieldsLen;
+ private final boolean hasStored;
+ private final boolean hasDocVals;
+ private final boolean hasPointVals;
+ // equiv to "hasLegacyNumerics":
+ private final LegacyFieldType legacyNumericFieldType; // not stored; holds precision step.
+
+ /**
+ * Create a new {@link PointVectorStrategy} instance that uses {@link DoublePoint} and {@link DoublePoint#newRangeQuery}
+ */
+ public static PointVectorStrategy newInstance(SpatialContext ctx, String fieldNamePrefix) {
+ return new PointVectorStrategy(ctx, fieldNamePrefix, DEFAULT_FIELDTYPE);
+ }
+
+ /**
+ * Create a new {@link PointVectorStrategy} instance that uses {@link LegacyDoubleField} for backwards compatibility.
+ * However, back-compat is limited; we don't support circle queries or {@link #makeDistanceValueSource(Point, double)}
+ * since that requires docValues (the legacy config didn't have that).
+ *
+ * @deprecated LegacyNumerics will be removed
+ */
+ @Deprecated
+ public static PointVectorStrategy newLegacyInstance(SpatialContext ctx, String fieldNamePrefix) {
+ return new PointVectorStrategy(ctx, fieldNamePrefix, LEGACY_FIELDTYPE);
+ }
+
+ /**
+ * Create a new instance configured with the provided FieldType options. See {@link #DEFAULT_FIELDTYPE}.
+ * a field type is used to articulate the desired options (namely pointValues, docValues, stored). Legacy numerics
+ * is configurable this way too.
+ */
+ public PointVectorStrategy(SpatialContext ctx, String fieldNamePrefix, FieldType fieldType) {
+ super(ctx, fieldNamePrefix);
+ this.fieldNameX = fieldNamePrefix+SUFFIX_X;
+ this.fieldNameY = fieldNamePrefix+SUFFIX_Y;
+
+ int numPairs = 0;
+ if ((this.hasStored = fieldType.stored())) {
+ numPairs++;
+ }
+ if ((this.hasDocVals = fieldType.docValuesType() != DocValuesType.NONE)) {
+ numPairs++;
+ }
+ if ((this.hasPointVals = fieldType.pointDimensionCount() > 0)) {
+ numPairs++;
+ }
+ if (fieldType.indexOptions() != IndexOptions.NONE && fieldType instanceof LegacyFieldType && ((LegacyFieldType)fieldType).numericType() != null) {
+ if (hasPointVals) {
+ throw new IllegalArgumentException("pointValues and LegacyNumericType are mutually exclusive");
+ }
+ final LegacyFieldType legacyType = (LegacyFieldType) fieldType;
+ if (legacyType.numericType() != LegacyNumericType.DOUBLE) {
+ throw new IllegalArgumentException(getClass() + " does not support " + legacyType.numericType());
+ }
+ numPairs++;
+ legacyNumericFieldType = new LegacyFieldType(LegacyDoubleField.TYPE_NOT_STORED);
+ legacyNumericFieldType.setNumericPrecisionStep(legacyType.numericPrecisionStep());
+ legacyNumericFieldType.freeze();
+ } else {
+ legacyNumericFieldType = null;
+ }
+ this.fieldsLen = numPairs * 2;
+ }
+
+
+ String getFieldNameX() {
+ return fieldNameX;
+ }
+
+ String getFieldNameY() {
+ return fieldNameY;
+ }
+
+ @Override
+ public Field[] createIndexableFields(Shape shape) {
+ if (shape instanceof Point)
+ return createIndexableFields((Point) shape);
+ throw new UnsupportedOperationException("Can only index Point, not " + shape);
+ }
+
+ /** @see #createIndexableFields(org.locationtech.spatial4j.shape.Shape) */
+ public Field[] createIndexableFields(Point point) {
+ Field[] fields = new Field[fieldsLen];
+ int idx = -1;
+ if (hasStored) {
+ fields[++idx] = new StoredField(fieldNameX, point.getX());
+ fields[++idx] = new StoredField(fieldNameY, point.getY());
+ }
+ if (hasDocVals) {
+ fields[++idx] = new DoubleDocValuesField(fieldNameX, point.getX());
+ fields[++idx] = new DoubleDocValuesField(fieldNameY, point.getY());
+ }
+ if (hasPointVals) {
+ fields[++idx] = new DoublePoint(fieldNameX, point.getX());
+ fields[++idx] = new DoublePoint(fieldNameY, point.getY());
+ }
+ if (legacyNumericFieldType != null) {
+ fields[++idx] = new LegacyDoubleField(fieldNameX, point.getX(), legacyNumericFieldType);
+ fields[++idx] = new LegacyDoubleField(fieldNameY, point.getY(), legacyNumericFieldType);
+ }
+ assert idx == fields.length - 1;
+ return fields;
+ }
+
+ @Override
+ public ValueSource makeDistanceValueSource(Point queryPoint, double multiplier) {
+ return new DistanceValueSource(this, queryPoint, multiplier);
+ }
+
+ @Override
+ public ConstantScoreQuery makeQuery(SpatialArgs args) {
+ if(! SpatialOperation.is( args.getOperation(),
+ SpatialOperation.Intersects,
+ SpatialOperation.IsWithin ))
+ throw new UnsupportedSpatialOperation(args.getOperation());
+ Shape shape = args.getShape();
+ if (shape instanceof Rectangle) {
+ Rectangle bbox = (Rectangle) shape;
+ return new ConstantScoreQuery(makeWithin(bbox));
+ } else if (shape instanceof Circle) {
+ Circle circle = (Circle)shape;
+ Rectangle bbox = circle.getBoundingBox();
+ Query approxQuery = makeWithin(bbox);
+ BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ FunctionRangeQuery vsRangeQuery =
+ new FunctionRangeQuery(makeDistanceValueSource(circle.getCenter()), 0.0, circle.getRadius(), true, true);
+ bqBuilder.add(approxQuery, BooleanClause.Occur.FILTER);//should have lowest "cost" value; will drive iteration
+ bqBuilder.add(vsRangeQuery, BooleanClause.Occur.FILTER);
+ return new ConstantScoreQuery(bqBuilder.build());
+ } else {
+ throw new UnsupportedOperationException("Only Rectangles and Circles are currently supported, " +
+ "found [" + shape.getClass() + "]");//TODO
+ }
+ }
+
+ /**
+ * Constructs a query to retrieve documents that fully contain the input envelope.
+ */
+ private Query makeWithin(Rectangle bbox) {
+ BooleanQuery.Builder bq = new BooleanQuery.Builder();
+ BooleanClause.Occur MUST = BooleanClause.Occur.MUST;
+ if (bbox.getCrossesDateLine()) {
+ //use null as performance trick since no data will be beyond the world bounds
+ bq.add(rangeQuery(fieldNameX, null/*-180*/, bbox.getMaxX()), BooleanClause.Occur.SHOULD );
+ bq.add(rangeQuery(fieldNameX, bbox.getMinX(), null/*+180*/), BooleanClause.Occur.SHOULD );
+ bq.setMinimumNumberShouldMatch(1);//must match at least one of the SHOULD
+ } else {
+ bq.add(rangeQuery(fieldNameX, bbox.getMinX(), bbox.getMaxX()), MUST);
+ }
+ bq.add(rangeQuery(fieldNameY, bbox.getMinY(), bbox.getMaxY()), MUST);
+ return bq.build();
+ }
+
+ /**
+ * Returns a numeric range query based on FieldType
+ * {@link LegacyNumericRangeQuery} is used for indexes created using {@code FieldType.LegacyNumericType}
+ * {@link DoublePoint#newRangeQuery} is used for indexes created using {@link DoublePoint} fields
+ */
+ private Query rangeQuery(String fieldName, Double min, Double max) {
+ if (hasPointVals) {
+ if (min == null) {
+ min = Double.NEGATIVE_INFINITY;
+ }
+
+ if (max == null) {
+ max = Double.POSITIVE_INFINITY;
+ }
+
+ return DoublePoint.newRangeQuery(fieldName, min, max);
+
+ } else if (legacyNumericFieldType != null) {// todo remove legacy numeric support in 7.0
+ return LegacyNumericRangeQuery.newDoubleRange(fieldName, legacyNumericFieldType.numericPrecisionStep(), min, max, true, true);//inclusive
+ }
+ //TODO try doc-value range query?
+ throw new UnsupportedOperationException("An index is required for this operation.");
+ }
+}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-1.png b/solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-1.png
similarity index 100%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-1.png
rename to solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-1.png
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-2.png b/solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-2.png
similarity index 100%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/doc-files/nrq-formula-2.png
rename to solr/core/src/java/org/apache/solr/legacy/doc-files/nrq-formula-2.png
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java b/solr/core/src/java/org/apache/solr/legacy/package-info.java
similarity index 96%
rename from lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java
rename to solr/core/src/java/org/apache/solr/legacy/package-info.java
index d0167f80023..df981d0157b 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/legacy/package-info.java
+++ b/solr/core/src/java/org/apache/solr/legacy/package-info.java
@@ -18,4 +18,4 @@
/**
* Deprecated stuff!
*/
-package org.apache.lucene.legacy;
+package org.apache.solr.legacy;
diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
index ffbbb36dbd7..63b47479905 100644
--- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymFilterFactory.java
@@ -28,6 +28,7 @@ import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.FlattenGraphFilterFactory; // javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.SynonymFilterFactory;
import org.apache.lucene.analysis.synonym.SynonymMap;
@@ -50,7 +51,11 @@ import org.slf4j.LoggerFactory;
/**
* TokenFilterFactory and ManagedResource implementation for
* doing CRUD on synonyms using the REST API.
+ *
+ * @deprecated Use {@link ManagedSynonymGraphFilterFactory} instead, but be sure to also
+ * use {@link FlattenGraphFilterFactory} at index time (not at search time) as well.
*/
+@Deprecated
public class ManagedSynonymFilterFactory extends BaseManagedTokenFilterFactory {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java
new file mode 100644
index 00000000000..1f4a9707139
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedSynonymGraphFilterFactory.java
@@ -0,0 +1,437 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.rest.schema.analysis;
+import java.io.IOException;
+import java.io.Reader;
+import java.lang.invoke.MethodHandles;
+import java.text.ParseException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory;
+import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.CharsRefBuilder;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.rest.BaseSolrResource;
+import org.apache.solr.rest.ManagedResource;
+import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
+import org.restlet.data.Status;
+import org.restlet.resource.ResourceException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * TokenFilterFactory and ManagedResource implementation for
+ * doing CRUD on synonyms using the REST API.
+ */
+public class ManagedSynonymGraphFilterFactory extends BaseManagedTokenFilterFactory {
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public static final String SYNONYM_MAPPINGS = "synonymMappings";
+ public static final String IGNORE_CASE_INIT_ARG = "ignoreCase";
+
+ /**
+ * Used internally to preserve the case of synonym mappings regardless
+ * of the ignoreCase setting.
+ */
+ private static class CasePreservedSynonymMappings {
+ Map> mappings = new TreeMap<>();
+
+ /**
+ * Provides a view of the mappings for a given term; specifically, if
+ * ignoreCase is true, then the returned "view" contains the mappings
+ * for all known cases of the term, if it is false, then only the
+ * mappings for the specific case is returned.
+ */
+ Set getMappings(boolean ignoreCase, String key) {
+ Set synMappings = null;
+ if (ignoreCase) {
+ // TODO: should we return the mapped values in all lower-case here?
+ if (mappings.size() == 1) {
+ // if only one in the map (which is common) just return it directly
+ return mappings.values().iterator().next();
+ }
+
+ synMappings = new TreeSet<>();
+ for (Set next : mappings.values())
+ synMappings.addAll(next);
+ } else {
+ synMappings = mappings.get(key);
+ }
+ return synMappings;
+ }
+
+ public String toString() {
+ return mappings.toString();
+ }
+ }
+
+ /**
+ * ManagedResource implementation for synonyms, which are so specialized that
+ * it makes sense to implement this class as an inner class as it has little
+ * application outside the SynonymFilterFactory use cases.
+ */
+ public static class SynonymManager extends ManagedResource
+ implements ManagedResource.ChildResourceSupport
+ {
+ protected Map synonymMappings;
+
+ public SynonymManager(String resourceId, SolrResourceLoader loader, StorageIO storageIO)
+ throws SolrException {
+ super(resourceId, loader, storageIO);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected void onManagedDataLoadedFromStorage(NamedList> managedInitArgs, Object managedData)
+ throws SolrException
+ {
+ NamedList