diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3d29039ecec..82e81d1e352 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -5,10 +5,6 @@ http://s.apache.org/luceneversions
======================= Lucene 6.2.0 =======================
-Bug Fixes
-
-* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
-
New Features
* LUCENE-6968: LSH Filter (Tommaso Teofili, Andy Hind, Cao Manh Dat)
@@ -25,6 +21,13 @@ New Features
analyzer for the Ukrainian language (Andriy Rysin via Mike
McCandless)
+Bug Fixes
+
+* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
+
+* LUCENE-7340: MemoryIndex.toString() could throw NPE; fixed. Renamed to toStringDebug().
+ (Daniel Collins, David Smiley)
+
Improvements
* LUCENE-7323: Compound file writing now verifies the incoming
@@ -62,6 +65,10 @@ Improvements
ScandinavianNormalizationFilterFactory now implement MultiTermAwareComponent.
(Adrien Grand)
+* LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to
+ control whether to split on whitespace prior to text analysis. Default
+ behavior remains unchanged: split-on-whitespace=true. (Steve Rowe)
+
Optimizations
* LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand)
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
index f8874eb13e4..d826a60d677 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
@@ -36,6 +36,7 @@ import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
import org.apache.lucene.analysis.MockGraphTokenFilter;
import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
+import org.apache.lucene.analysis.MockSynonymFilter;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
@@ -75,6 +76,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
MockGraphTokenFilter.class,
MockHoleInjectingTokenFilter.class,
MockRandomLookaheadTokenFilter.class,
+ MockSynonymFilter.class,
MockTokenFilter.class,
MockVariableLengthPayloadFilter.class,
ValidatingTokenFilter.class,
diff --git a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt
new file mode 100644
index 00000000000..114260476e4
--- /dev/null
+++ b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt
@@ -0,0 +1,19 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# This map normalizes some characters used in Ukrainian text
+"\u2019" => "'"
+"\u02BC" => "'"
+
+# Remove accent
+"\u0301" => ""
diff --git a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict
index 679e39251c3..246897061aa 100644
Binary files a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict and b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict differ
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
index 87d3be53432..a38fc63e873 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java
@@ -37,22 +37,29 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase {
public void testReusableTokenStream() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
- assertAnalyzesTo(a, "Ця п'єса у свою чергу рухається по колу.",
- new String[] { "п'єса", "черга", "рухатися", "кола", "коло", "коло", "кіл", "кіл" });
+ assertAnalyzesTo(a, "Ця п'єса, у свою чергу, рухається по емоційно-напруженому колу за ритм-енд-блюзом.",
+ new String[] { "п'єса", "черга", "рухатися", "емоційно", "напружений", "кола", "коло", "кіл", "ритм", "енд", "блюз" });
a.close();
}
public void testSpecialCharsTokenStream() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
- assertAnalyzesTo(a, "Ця пʼєса, у сво́ю чергу, рухається по колу.",
- new String[] { "п'єса", "черга", "рухатися", "кола", "коло", "коло", "кіл", "кіл" });
+ assertAnalyzesTo(a, "Ця пʼєса, у сво́ю чергу рухається.",
+ new String[] { "п'єса", "черга", "рухатися" });
a.close();
}
public void testCapsTokenStream() throws Exception {
Analyzer a = new UkrainianMorfologikAnalyzer();
- assertAnalyzesTo(a, "Цей Чайковський.",
- new String[] { "чайковський" });
+ assertAnalyzesTo(a, "Цей Чайковський і Ґете.",
+ new String[] { "чайковський", "ґете" });
+ a.close();
+ }
+
+ public void testSampleSentence() throws Exception {
+ Analyzer a = new UkrainianMorfologikAnalyzer();
+ assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.",
+ new String[] { "проект", "генерування", "словник", "тег", "частина", "мова", "українська", "український", "мова" });
a.close();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
index a33d64072c9..2429c330f28 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -148,6 +148,10 @@ final class DocumentsWriter implements Closeable, Accountable {
return seqNo;
}
+ synchronized void setLastSeqNo(long seqNo) {
+ lastSeqNo = seqNo;
+ }
+
// TODO: we could check w/ FreqProxTermsWriter: if the
// term doesn't exist, don't bother buffering into the
// per-DWPT map (but still must go into the global map)
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index f0d756bdfb5..fd2553671ce 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -765,8 +765,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
*
*
* NOTE: after ths writer is created, the given configuration instance
- * cannot be passed to another writer. If you intend to do so, you should
- * {@link IndexWriterConfig#clone() clone} it beforehand.
+ * cannot be passed to another writer.
*
* @param d
* the index directory. The index is either created or appended
@@ -2348,7 +2347,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
globalFieldNumberMap.clear();
success = true;
- return docWriter.deleteQueue.getNextSequenceNumber();
+ long seqNo = docWriter.deleteQueue.getNextSequenceNumber();
+ docWriter.setLastSeqNo(seqNo);
+ return seqNo;
} finally {
docWriter.unlockAllAfterAbortAll(this);
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
index c2246823dcf..3e26965deee 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
@@ -22,7 +22,6 @@ import java.util.ArrayList;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -137,40 +136,4 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
System.out.println(s);
}
}
-
- // stupid filter that inserts synonym of 'hte' for 'the'
- private class MockSynonymFilter extends TokenFilter {
- State bufferedState;
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
- MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (bufferedState != null) {
- restoreState(bufferedState);
- posIncAtt.setPositionIncrement(0);
- termAtt.setEmpty().append("hte");
- bufferedState = null;
- return true;
- } else if (input.incrementToken()) {
- if (termAtt.toString().equals("the")) {
- bufferedState = captureState();
- }
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- bufferedState = null;
- }
- }
-
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java
index 779c1f21a1f..a1b2a5c2917 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java
@@ -534,4 +534,19 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc
iw.close();
dir.close();
}
+
+ public void testDeleteAll() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ SearcherManager mgr = new SearcherManager(w, new SearcherFactory());
+ nrtDeletesThread = new ControlledRealTimeReopenThread<>(w, mgr, 0.1, 0.01);
+ nrtDeletesThread.setName("NRTDeletes Reopen Thread");
+ nrtDeletesThread.setDaemon(true);
+ nrtDeletesThread.start();
+
+ long gen1 = w.addDocument(new Document());
+ long gen2 = w.deleteAll();
+ nrtDeletesThread.waitForGeneration(gen2);
+ IOUtils.close(nrtDeletesThread, nrtDeletes, w, dir);
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
index 205fbab0981..d3019e3d077 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -121,7 +122,7 @@ public class TestQueryBuilder extends LuceneTestCase {
assertNull(builder.createBooleanQuery("field", ""));
}
- /** adds synonym of "dog" for "dogs". */
+ /** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
static class MockSynonymAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
@@ -130,37 +131,6 @@ public class TestQueryBuilder extends LuceneTestCase {
}
}
- /**
- * adds synonym of "dog" for "dogs".
- */
- protected static class MockSynonymFilter extends TokenFilter {
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- boolean addSynonym = false;
-
- public MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- if (addSynonym) { // inject our synonym
- clearAttributes();
- termAtt.setEmpty().append("dog");
- posIncAtt.setPositionIncrement(0);
- addSynonym = false;
- return true;
- }
-
- if (input.incrementToken()) {
- addSynonym = termAtt.toString().equals("dogs");
- return true;
- } else {
- return false;
- }
- }
- }
-
/** simple synonyms test */
public void testSynonyms() throws Exception {
SynonymQuery expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
@@ -180,6 +150,15 @@ public class TestQueryBuilder extends LuceneTestCase {
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs"));
}
+ /** forms multiphrase query */
+ public void testMultiWordSynonymsPhrase() throws Exception {
+ MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder();
+ expectedBuilder.add(new Term[] { new Term("field", "guinea"), new Term("field", "cavy") });
+ expectedBuilder.add(new Term("field", "pig"));
+ QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
+ assertEquals(expectedBuilder.build(), queryBuilder.createPhraseQuery("field", "guinea pig"));
+ }
+
protected static class SimpleCJKTokenizer extends Tokenizer {
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index cde20e57670..cdd53ed9e2f 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -43,10 +43,21 @@ import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.*;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefArray;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.IntBlockPool.SliceReader;
import org.apache.lucene.util.IntBlockPool.SliceWriter;
+import org.apache.lucene.util.RecyclingByteBlockAllocator;
+import org.apache.lucene.util.RecyclingIntBlockAllocator;
+import org.apache.lucene.util.StringHelper;
/**
* High-performance single-document main memory Apache Lucene fulltext search index.
@@ -746,13 +757,14 @@ public class MemoryIndex {
* Returns a String representation of the index data for debugging purposes.
*
* @return the string representation
+ * @lucene.experimental
*/
- @Override
- public String toString() {
+ public String toStringDebug() {
StringBuilder result = new StringBuilder(256);
int sumPositions = 0;
int sumTerms = 0;
final BytesRef spare = new BytesRef();
+ final BytesRefBuilder payloadBuilder = storePayloads ? new BytesRefBuilder() : null;
for (Map.Entry entry : fields.entrySet()) {
String fieldName = entry.getKey();
Info info = entry.getValue();
@@ -778,9 +790,16 @@ public class MemoryIndex {
result.append(", ");
}
}
+ if (storePayloads) {
+ int payloadIndex = postingsReader.readInt();
+ if (payloadIndex != -1) {
+ result.append(", " + payloadsBytesRefs.get(payloadBuilder, payloadIndex));
+ }
+ }
result.append(")");
+
if (!postingsReader.endOfSlice()) {
- result.append(",");
+ result.append(", ");
}
}
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index 57514578b16..2f95a4e5cca 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -464,4 +464,26 @@ public class TestMemoryIndex extends LuceneTestCase {
assertEquals("term", leafReader.getBinaryDocValues("field").get(0).utf8ToString());
}
+ public void testToStringDebug() {
+ MemoryIndex mi = new MemoryIndex(true, true);
+ Analyzer analyzer = new MockPayloadAnalyzer();
+
+ mi.addField("analyzedField", "aa bb aa", analyzer);
+
+ FieldType type = new FieldType();
+ type.setDimensions(1, 4);
+ type.setDocValuesType(DocValuesType.BINARY);
+ type.freeze();
+ mi.addField(new BinaryPoint("pointAndDvField", "term".getBytes(StandardCharsets.UTF_8), type), analyzer);
+
+ assertEquals("analyzedField:\n" +
+ "\t'[61 61]':2: [(0, 0, 2, [70 6f 73 3a 20 30]), (1, 6, 8, [70 6f 73 3a 20 32])]\n" +
+ "\t'[62 62]':1: [(1, 3, 5, [70 6f 73 3a 20 31])]\n" +
+ "\tterms=2, positions=3\n" +
+ "pointAndDvField:\n" +
+ "\tterms=0, positions=0\n" +
+ "\n" +
+ "fields=2, terms=2, positions=3", mi.toStringDebug());
+ }
+
}
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
index 85b14614435..2c5fcbabde5 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java
@@ -112,4 +112,4 @@ interface CharStream {
void Done();
}
-/* JavaCC - OriginalChecksum=c847dd1920bf7901125a7244125682ad (do not edit this line) */
+/* JavaCC - OriginalChecksum=30b94cad7b10d0d81e3a59a1083939d0 (do not edit this line) */
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
index b9963ec1bd5..69a7559b71a 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java
@@ -27,6 +27,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
/**
* A QueryParser which constructs queries to search multiple fields.
@@ -148,18 +149,54 @@ public class MultiFieldQueryParser extends QueryParser
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
+ Query[] fieldQueries = new Query[fields.length];
+ int maxTerms = 0;
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText, quoted);
if (q != null) {
- //If the user passes a map of boosts
- if (boosts != null) {
- //Get the boost from the map and apply them
- Float boost = boosts.get(fields[i]);
- if (boost != null) {
- q = new BoostQuery(q, boost.floatValue());
+ if (q instanceof TermQuery) {
+ maxTerms = Math.max(1, maxTerms);
+ } else if (q instanceof BooleanQuery) {
+ maxTerms = Math.max(maxTerms, ((BooleanQuery)q).clauses().size());
+ }
+ fieldQueries[i] = q;
+ }
+ }
+ for (int termNum = 0; termNum < maxTerms; termNum++) {
+ List termClauses = new ArrayList<>();
+ for (int i = 0; i < fields.length; i++) {
+ if (fieldQueries[i] != null) {
+ Query q = null;
+ if (fieldQueries[i] instanceof BooleanQuery) {
+ List nestedClauses = ((BooleanQuery)fieldQueries[i]).clauses();
+ if (termNum < nestedClauses.size()) {
+ q = nestedClauses.get(termNum).getQuery();
+ }
+ } else if (termNum == 0) { // e.g. TermQuery-s
+ q = fieldQueries[i];
+ }
+ if (q != null) {
+ if (boosts != null) {
+ //Get the boost from the map and apply them
+ Float boost = boosts.get(fields[i]);
+ if (boost != null) {
+ q = new BoostQuery(q, boost);
+ }
+ }
+ termClauses.add(q);
}
}
- clauses.add(q);
+ }
+ if (maxTerms > 1) {
+ if (termClauses.size() > 0) {
+ BooleanQuery.Builder builder = newBooleanQuery();
+ for (Query termClause : termClauses) {
+ builder.add(termClause, BooleanClause.Occur.SHOULD);
+ }
+ clauses.add(builder.build());
+ }
+ } else {
+ clauses.addAll(termClauses);
}
}
if (clauses.size() == 0) // happens for stopwords
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
index a0ddab2d363..3c02be3f004 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java
@@ -184,4 +184,4 @@ public class ParseException extends Exception {
}
}
-/* JavaCC - OriginalChecksum=61602edcb3a15810cbc58f5593eba40d (do not edit this line) */
+/* JavaCC - OriginalChecksum=b187d97d5bb75c3fc63d642c1c26ac6e (do not edit this line) */
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
index 08a477e79b4..c137d3043b7 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java
@@ -3,8 +3,11 @@ package org.apache.lucene.queryparser.classic;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
import java.util.List;
import java.util.Locale;
+import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
@@ -81,6 +84,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
*/
static public enum Operator { OR, AND }
+ /** default split on whitespace behavior */
+ public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
+
/** Create a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
@@ -90,6 +96,28 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
init(f, a);
}
+ /**
+ * @see #setSplitOnWhitespace(boolean)
+ */
+ public boolean getSplitOnWhitespace() {
+ return splitOnWhitespace;
+ }
+
+ /**
+ * Whether query text should be split on whitespace prior to analysis.
+ * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}.
+ */
+ public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+ this.splitOnWhitespace = splitOnWhitespace;
+ }
+
+ private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
+ private static Set disallowedPostMultiTerm
+ = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
+ private static boolean allowedPostMultiTerm(int tokenKind) {
+ return disallowedPostMultiTerm.contains(tokenKind) == false;
+ }
+
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" )
final public int Conjunction() throws ParseException {
@@ -129,15 +157,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
jj_consume_token(PLUS);
- ret = MOD_REQ;
+ ret = MOD_REQ;
break;
case MINUS:
jj_consume_token(MINUS);
- ret = MOD_NOT;
+ ret = MOD_NOT;
break;
case NOT:
jj_consume_token(NOT);
- ret = MOD_NOT;
+ ret = MOD_NOT;
break;
default:
jj_la1[2] = jj_gen;
@@ -166,11 +194,37 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
List clauses = new ArrayList();
Query q, firstQuery=null;
int conj, mods;
- mods = Modifiers();
- q = Clause(field);
- addClause(clauses, CONJ_NONE, mods, q);
- if (mods == MOD_NONE)
- firstQuery=q;
+ if (jj_2_1(2)) {
+ firstQuery = MultiTerm(field, clauses);
+ } else {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case NOT:
+ case PLUS:
+ case MINUS:
+ case BAREOPER:
+ case LPAREN:
+ case STAR:
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case REGEXPTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ mods = Modifiers();
+ q = Clause(field);
+ addClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE) {
+ firstQuery = q;
+ }
+ break;
+ default:
+ jj_la1[4] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ }
label_1:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -193,39 +247,66 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
;
break;
default:
- jj_la1[4] = jj_gen;
+ jj_la1[5] = jj_gen;
break label_1;
}
- conj = Conjunction();
- mods = Modifiers();
- q = Clause(field);
- addClause(clauses, conj, mods, q);
- }
- if (clauses.size() == 1 && firstQuery != null)
- {if (true) return firstQuery;}
- else {
- {if (true) return getBooleanQuery(clauses);}
+ if (jj_2_2(2)) {
+ MultiTerm(field, clauses);
+ } else {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case AND:
+ case OR:
+ case NOT:
+ case PLUS:
+ case MINUS:
+ case BAREOPER:
+ case LPAREN:
+ case STAR:
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case REGEXPTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ conj = Conjunction();
+ mods = Modifiers();
+ q = Clause(field);
+ addClause(clauses, conj, mods, q);
+ break;
+ default:
+ jj_la1[6] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
}
+ }
+ if (clauses.size() == 1 && firstQuery != null) {
+ {if (true) return firstQuery;}
+ } else {
+ {if (true) return getBooleanQuery(clauses);}
+ }
throw new Error("Missing return statement in function");
}
final public Query Clause(String field) throws ParseException {
Query q;
Token fieldToken=null, boost=null;
- if (jj_2_1(2)) {
+ if (jj_2_3(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM:
fieldToken = jj_consume_token(TERM);
jj_consume_token(COLON);
- field=discardEscapeChar(fieldToken.image);
+ field=discardEscapeChar(fieldToken.image);
break;
case STAR:
jj_consume_token(STAR);
jj_consume_token(COLON);
- field="*";
+ field="*";
break;
default:
- jj_la1[5] = jj_gen;
+ jj_la1[7] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -255,16 +336,16 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
boost = jj_consume_token(NUMBER);
break;
default:
- jj_la1[6] = jj_gen;
+ jj_la1[8] = jj_gen;
;
}
break;
default:
- jj_la1[7] = jj_gen;
+ jj_la1[9] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
- {if (true) return handleBoost(q, boost);}
+ {if (true) return handleBoost(q, boost);}
throw new Error("Missing return statement in function");
}
@@ -291,73 +372,86 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
break;
case STAR:
term = jj_consume_token(STAR);
- wildcard=true;
+ wildcard=true;
break;
case PREFIXTERM:
term = jj_consume_token(PREFIXTERM);
- prefix=true;
+ prefix=true;
break;
case WILDTERM:
term = jj_consume_token(WILDTERM);
- wildcard=true;
+ wildcard=true;
break;
case REGEXPTERM:
term = jj_consume_token(REGEXPTERM);
- regexp=true;
+ regexp=true;
break;
case NUMBER:
term = jj_consume_token(NUMBER);
break;
case BAREOPER:
term = jj_consume_token(BAREOPER);
- term.image = term.image.substring(0,1);
+ term.image = term.image.substring(0,1);
break;
default:
- jj_la1[8] = jj_gen;
+ jj_la1[10] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY_SLOP:
- fuzzySlop = jj_consume_token(FUZZY_SLOP);
- fuzzy=true;
- break;
- default:
- jj_la1[9] = jj_gen;
- ;
- }
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
- jj_consume_token(CARAT);
- boost = jj_consume_token(NUMBER);
+ case FUZZY_SLOP:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[11] = jj_gen;
+ ;
+ }
+ break;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
- fuzzy=true;
+ fuzzy=true;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[12] = jj_gen;
+ ;
+ }
break;
default:
- jj_la1[10] = jj_gen;
- ;
+ jj_la1[13] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
}
break;
default:
- jj_la1[11] = jj_gen;
+ jj_la1[14] = jj_gen;
;
}
- q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
+ q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
break;
case RANGEIN_START:
case RANGEEX_START:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_START:
jj_consume_token(RANGEIN_START);
- startInc=true;
+ startInc = true;
break;
case RANGEEX_START:
jj_consume_token(RANGEEX_START);
break;
default:
- jj_la1[12] = jj_gen;
+ jj_la1[15] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -369,7 +463,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
goop1 = jj_consume_token(RANGE_QUOTED);
break;
default:
- jj_la1[13] = jj_gen;
+ jj_la1[16] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -378,7 +472,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_consume_token(RANGE_TO);
break;
default:
- jj_la1[14] = jj_gen;
+ jj_la1[17] = jj_gen;
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -389,20 +483,20 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
goop2 = jj_consume_token(RANGE_QUOTED);
break;
default:
- jj_la1[15] = jj_gen;
+ jj_la1[18] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_END:
jj_consume_token(RANGEIN_END);
- endInc=true;
+ endInc = true;
break;
case RANGEEX_END:
jj_consume_token(RANGEEX_END);
break;
default:
- jj_la1[16] = jj_gen;
+ jj_la1[19] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -412,46 +506,69 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
boost = jj_consume_token(NUMBER);
break;
default:
- jj_la1[17] = jj_gen;
+ jj_la1[20] = jj_gen;
;
}
- boolean startOpen=false;
- boolean endOpen=false;
- if (goop1.kind == RANGE_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- } else if ("*".equals(goop1.image)) {
- startOpen=true;
- }
- if (goop2.kind == RANGE_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- } else if ("*".equals(goop2.image)) {
- endOpen=true;
- }
- q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
+ boolean startOpen=false;
+ boolean endOpen=false;
+ if (goop1.kind == RANGE_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else if ("*".equals(goop1.image)) {
+ startOpen=true;
+ }
+ if (goop2.kind == RANGE_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else if ("*".equals(goop2.image)) {
+ endOpen=true;
+ }
+ q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
break;
case QUOTED:
term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case FUZZY_SLOP:
- fuzzySlop = jj_consume_token(FUZZY_SLOP);
- break;
- default:
- jj_la1[18] = jj_gen;
- ;
- }
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
- jj_consume_token(CARAT);
- boost = jj_consume_token(NUMBER);
+ case FUZZY_SLOP:
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[21] = jj_gen;
+ ;
+ }
+ break;
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[22] = jj_gen;
+ ;
+ }
+ break;
+ default:
+ jj_la1[23] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
break;
default:
- jj_la1[19] = jj_gen;
+ jj_la1[24] = jj_gen;
;
}
- q = handleQuotedTerm(field, term, fuzzySlop);
+ q = handleQuotedTerm(field, term, fuzzySlop);
break;
default:
- jj_la1[20] = jj_gen;
+ jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -459,6 +576,44 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
throw new Error("Missing return statement in function");
}
+/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
+ final public Query MultiTerm(String field, List clauses) throws ParseException {
+ Token text, whitespace, followingText;
+ Query firstQuery = null;
+ text = jj_consume_token(TERM);
+ if (splitOnWhitespace) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
+ }
+ if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
+
+ } else {
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ label_2:
+ while (true) {
+ followingText = jj_consume_token(TERM);
+ if (splitOnWhitespace) {
+ Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ } else { // build up the text to send to analysis
+ text.image += " " + followingText.image;
+ }
+ if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
+ ;
+ } else {
+ break label_2;
+ }
+ }
+ if (splitOnWhitespace == false) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addMultiTermClauses(clauses, firstQuery);
+ }
+ {if (true) return firstQuery;}
+ throw new Error("Missing return statement in function");
+ }
+
private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); }
@@ -466,23 +621,71 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
finally { jj_save(0, xla); }
}
- private boolean jj_3R_2() {
+ private boolean jj_2_2(int xla) {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try { return !jj_3_2(); }
+ catch(LookaheadSuccess ls) { return true; }
+ finally { jj_save(1, xla); }
+ }
+
+ private boolean jj_2_3(int xla) {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try { return !jj_3_3(); }
+ catch(LookaheadSuccess ls) { return true; }
+ finally { jj_save(2, xla); }
+ }
+
+ private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
+ private boolean jj_3_2() {
+ if (jj_3R_3()) return true;
+ return false;
+ }
+
private boolean jj_3_1() {
+ if (jj_3R_3()) return true;
+ return false;
+ }
+
+ private boolean jj_3R_7() {
+ if (jj_scan_token(TERM)) return true;
+ return false;
+ }
+
+ private boolean jj_3_3() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_2()) {
+ if (jj_3R_4()) {
jj_scanpos = xsp;
- if (jj_3R_3()) return true;
+ if (jj_3R_5()) return true;
}
return false;
}
private boolean jj_3R_3() {
+ if (jj_scan_token(TERM)) return true;
+ jj_lookingAhead = true;
+ jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
+ jj_lookingAhead = false;
+ if (!jj_semLA || jj_3R_6()) return true;
+ Token xsp;
+ if (jj_3R_7()) return true;
+ while (true) {
+ xsp = jj_scanpos;
+ if (jj_3R_7()) { jj_scanpos = xsp; break; }
+ }
+ return false;
+ }
+
+ private boolean jj_3R_6() {
+ return false;
+ }
+
+ private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
@@ -497,8 +700,11 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
private int jj_ntk;
private Token jj_scanpos, jj_lastpos;
private int jj_la;
+ /** Whether we are looking ahead. */
+ private boolean jj_lookingAhead = false;
+ private boolean jj_semLA;
private int jj_gen;
- final private int[] jj_la1 = new int[21];
+ final private int[] jj_la1 = new int[26];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
@@ -506,12 +712,12 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
jj_la1_init_1();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x200000,0x40000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0xfda2000,};
+ jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7c00,0xfda7f00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x40000,0x240000,0x240000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0x240000,0x240000,0xfda2000,};
}
private static void jj_la1_init_1() {
- jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,};
+ jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,};
}
- final private JJCalls[] jj_2_rtns = new JJCalls[1];
+ final private JJCalls[] jj_2_rtns = new JJCalls[3];
private boolean jj_rescan = false;
private int jj_gc = 0;
@@ -521,7 +727,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -530,8 +736,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
+ jj_lookingAhead = false;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -541,7 +748,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -551,7 +758,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 21; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 26; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -614,7 +821,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
/** Get the specific Token. */
final public Token getToken(int index) {
- Token t = token;
+ Token t = jj_lookingAhead ? jj_scanpos : token;
for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken();
@@ -668,7 +875,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
la1tokens[jj_kind] = true;
jj_kind = -1;
}
- for (int i = 0; i < 21; i++) {
+ for (int i = 0; i < 26; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<{@value #DEFAULT_SPLIT_ON_WHITESPACE}.
+ */
+ public void setSplitOnWhitespace(boolean splitOnWhitespace) {
+ this.splitOnWhitespace = splitOnWhitespace;
+ }
+
+ private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
+ private static Set disallowedPostMultiTerm
+ = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
+ private static boolean allowedPostMultiTerm(int tokenKind) {
+ return disallowedPostMultiTerm.contains(tokenKind) == false;
+ }
}
PARSER_END(QueryParser)
@@ -123,15 +150,14 @@ PARSER_END(QueryParser)
/* ***************** */
<*> TOKEN : {
- <#_NUM_CHAR: ["0"-"9"] >
-// every character that follows a backslash is considered as an escaped character
-| <#_ESCAPED_CHAR: "\\" ~[] >
+ <#_NUM_CHAR: ["0"-"9"] >
+| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
- | <_ESCAPED_CHAR> ) >
-| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
-| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
-| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
+ | <_ESCAPED_CHAR> ) >
+| <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) >
+| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
+| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
}
SKIP : {
@@ -139,37 +165,37 @@ PARSER_END(QueryParser)
}
TOKEN : {
-
-|
-|
-|
-|
-| >
-|
-|
-|
-|
-| : Boost
-| )* "\"">
-| (<_TERM_CHAR>)* >
-| )+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
-| (<_TERM_CHAR>)* "*" ) >
-| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
-|
+
+|
+|
+|
+|
+| >
+|
+|
+|
+|
+| : Boost
+| )* "\"">
+| (<_TERM_CHAR>)* >
+| )+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
+| (<_TERM_CHAR>)* "*" ) >
+| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
+|
| : Range
| : Range
}
TOKEN : {
-)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
+ )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
TOKEN : {
-
-| : DEFAULT
-| : DEFAULT
+
+| : DEFAULT
+| : DEFAULT
|
-|
+|
}
// * Query ::= ( Clause )*
@@ -191,23 +217,20 @@ int Modifiers() : {
}
{
[
- { ret = MOD_REQ; }
- | { ret = MOD_NOT; }
- | { ret = MOD_NOT; }
+ { ret = MOD_REQ; }
+ | { ret = MOD_NOT; }
+ | { ret = MOD_NOT; }
]
{ return ret; }
}
// This makes sure that there is no garbage after the query string
-Query TopLevelQuery(String field) :
-{
+Query TopLevelQuery(String field) : {
Query q;
}
{
q=Query(field)
- {
- return q;
- }
+ { return q; }
}
Query Query(String field) :
@@ -217,23 +240,30 @@ Query Query(String field) :
int conj, mods;
}
{
- mods=Modifiers() q=Clause(field)
- {
- addClause(clauses, CONJ_NONE, mods, q);
- if (mods == MOD_NONE)
- firstQuery=q;
- }
(
- conj=Conjunction() mods=Modifiers() q=Clause(field)
- { addClause(clauses, conj, mods, q); }
- )*
- {
- if (clauses.size() == 1 && firstQuery != null)
- return firstQuery;
- else {
- return getBooleanQuery(clauses);
+ LOOKAHEAD(2)
+ firstQuery=MultiTerm(field, clauses)
+ | mods=Modifiers() q=Clause(field)
+ {
+ addClause(clauses, CONJ_NONE, mods, q);
+ if (mods == MOD_NONE) {
+ firstQuery = q;
+ }
}
+ )
+ (
+ LOOKAHEAD(2)
+ MultiTerm(field, clauses)
+ | conj=Conjunction() mods=Modifiers() q=Clause(field)
+ { addClause(clauses, conj, mods, q); }
+ )*
+ {
+ if (clauses.size() == 1 && firstQuery != null) {
+ return firstQuery;
+ } else {
+ return getBooleanQuery(clauses);
}
+ }
}
Query Clause(String field) : {
@@ -244,20 +274,17 @@ Query Clause(String field) : {
[
LOOKAHEAD(2)
(
- fieldToken= {field=discardEscapeChar(fieldToken.image);}
- | {field="*";}
+ fieldToken= {field=discardEscapeChar(fieldToken.image);}
+ | {field="*";}
)
]
-
(
- q=Term(field)
- | q=Query(field) ( boost=)?
-
+ q=Term(field)
+ | q=Query(field) [ boost= ]
)
- { return handleBoost(q, boost); }
+ { return handleBoost(q, boost); }
}
-
Query Term(String field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
@@ -270,45 +297,85 @@ Query Term(String field) : {
}
{
(
- (
- term=
- | term= { wildcard=true; }
- | term= { prefix=true; }
- | term= { wildcard=true; }
- | term= { regexp=true; }
- | term=
- | term= { term.image = term.image.substring(0,1); }
- )
- [ fuzzySlop= { fuzzy=true; } ]
- [ boost= [ fuzzySlop= { fuzzy=true; } ] ]
- {
- q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
- }
- | ( ( {startInc=true;} | )
- ( goop1=|goop1= )
- [ ]
- ( goop2=|goop2= )
- ( {endInc=true;} | ))
- [ boost= ]
- {
- boolean startOpen=false;
- boolean endOpen=false;
- if (goop1.kind == RANGE_QUOTED) {
- goop1.image = goop1.image.substring(1, goop1.image.length()-1);
- } else if ("*".equals(goop1.image)) {
- startOpen=true;
- }
- if (goop2.kind == RANGE_QUOTED) {
- goop2.image = goop2.image.substring(1, goop2.image.length()-1);
- } else if ("*".equals(goop2.image)) {
- endOpen=true;
- }
- q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
- }
- | term=
- [ fuzzySlop= ]
- [ boost= ]
- { q = handleQuotedTerm(field, term, fuzzySlop); }
+ (
+ term=
+ | term= { wildcard=true; }
+ | term= { prefix=true; }
+ | term= { wildcard=true; }
+ | term= { regexp=true; }
+ | term=
+ | term= { term.image = term.image.substring(0,1); }
+ )
+ [
+ boost= [ fuzzySlop= { fuzzy=true; } ]
+ | fuzzySlop= { fuzzy=true; } [ boost= ]
+ ]
+ { q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
+
+ | ( { startInc = true; } | )
+ ( goop1= | goop1= )
+ [ ]
+ ( goop2= | goop2= )
+ ( { endInc = true; } | )
+ [ boost= ]
+ {
+ boolean startOpen=false;
+ boolean endOpen=false;
+ if (goop1.kind == RANGE_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else if ("*".equals(goop1.image)) {
+ startOpen=true;
+ }
+ if (goop2.kind == RANGE_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else if ("*".equals(goop2.image)) {
+ endOpen=true;
+ }
+ q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
+ }
+
+ | term=
+ [
+ boost= [ fuzzySlop= { fuzzy=true; } ]
+ | fuzzySlop= { fuzzy=true; } [ boost= ]
+ ]
+ { q = handleQuotedTerm(field, term, fuzzySlop); }
)
{ return handleBoost(q, boost); }
}
+
+/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
+Query MultiTerm(String field, List clauses) : {
+ Token text, whitespace, followingText;
+ Query firstQuery = null;
+}
+{
+ text=
+ {
+ if (splitOnWhitespace) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
+ }
+ }
+ // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
+ LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
+ (
+ LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
+ followingText=
+ {
+ if (splitOnWhitespace) {
+ Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ } else { // build up the text to send to analysis
+ text.image += " " + followingText.image;
+ }
+ }
+ )+
+ {
+ if (splitOnWhitespace == false) {
+ firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
+ addMultiTermClauses(clauses, firstQuery);
+ }
+ return firstQuery;
+ }
+}
\ No newline at end of file
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
index c00d88eecff..cdfa4776175 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java
@@ -464,6 +464,45 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
throw new RuntimeException("Clause cannot be both required and prohibited");
}
+ /**
+ * Adds clauses generated from analysis over text containing whitespace.
+ * There are no operators, so the query's clauses can either be MUST (if the
+ * default operator is AND) or SHOULD (default OR).
+ *
+ * If all of the clauses in the given Query are TermQuery-s, this method flattens the result
+ * by adding the TermQuery-s individually to the output clause list; otherwise, the given Query
+ * is added as a single clause including its nested clauses.
+ */
+ protected void addMultiTermClauses(List clauses, Query q) {
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null) {
+ return;
+ }
+ boolean allNestedTermQueries = false;
+ if (q instanceof BooleanQuery) {
+ allNestedTermQueries = true;
+ for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
+ if ( ! (clause.getQuery() instanceof TermQuery)) {
+ allNestedTermQueries = false;
+ break;
+ }
+ }
+ }
+ if (allNestedTermQueries) {
+ clauses.addAll(((BooleanQuery)q).clauses());
+ } else {
+ BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
+ if (q instanceof BooleanQuery) {
+ for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
+ clauses.add(newBooleanClause(clause.getQuery(), occur));
+ }
+ } else {
+ clauses.add(newBooleanClause(q, occur));
+ }
+ }
+ }
+
/**
* @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
*/
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
index 8c8951e1b83..065ff8b4411 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java
@@ -285,7 +285,7 @@ private int jjMoveNfa_2(int startState, int curPos)
jjCheckNAddTwoStates(33, 34);
}
else if (curChar == 92)
- jjCheckNAddTwoStates(35, 35);
+ jjCheckNAdd(35);
break;
case 0:
if ((0x97ffffff87ffffffL & l) != 0L)
@@ -384,7 +384,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 26:
if (curChar == 92)
- jjAddStates(27, 28);
+ jjstateSet[jjnewStateCnt++] = 27;
break;
case 27:
if (kind > 21)
@@ -400,7 +400,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 29:
if (curChar == 92)
- jjAddStates(29, 30);
+ jjstateSet[jjnewStateCnt++] = 30;
break;
case 30:
if (kind > 21)
@@ -423,7 +423,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 34:
if (curChar == 92)
- jjCheckNAddTwoStates(35, 35);
+ jjCheckNAdd(35);
break;
case 35:
if (kind > 23)
@@ -453,7 +453,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 43:
if (curChar == 92)
- jjCheckNAddTwoStates(44, 44);
+ jjCheckNAdd(44);
break;
case 44:
if (kind > 20)
@@ -466,7 +466,7 @@ private int jjMoveNfa_2(int startState, int curPos)
break;
case 46:
if (curChar == 92)
- jjCheckNAddTwoStates(47, 47);
+ jjCheckNAdd(47);
break;
case 47:
jjCheckNAddStates(18, 20);
@@ -645,7 +645,7 @@ private int jjMoveNfa_0(int startState, int curPos)
break;
if (kind > 27)
kind = 27;
- jjAddStates(31, 32);
+ jjAddStates(27, 28);
break;
case 1:
if (curChar == 46)
@@ -799,11 +799,11 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if ((0xfffffffbffffffffL & l) != 0L)
- jjCheckNAddStates(33, 35);
+ jjCheckNAddStates(29, 31);
break;
case 3:
if (curChar == 34)
- jjCheckNAddStates(33, 35);
+ jjCheckNAddStates(29, 31);
break;
case 5:
if (curChar == 34 && kind > 31)
@@ -836,7 +836,7 @@ private int jjMoveNfa_1(int startState, int curPos)
jjCheckNAdd(6);
break;
case 2:
- jjAddStates(33, 35);
+ jjAddStates(29, 31);
break;
case 4:
if (curChar == 92)
@@ -872,7 +872,7 @@ private int jjMoveNfa_1(int startState, int curPos)
break;
case 2:
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
- jjAddStates(33, 35);
+ jjAddStates(29, 31);
break;
case 6:
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
@@ -899,9 +899,8 @@ private int jjMoveNfa_1(int startState, int curPos)
}
}
static final int[] jjnextStates = {
- 37, 39, 40, 17, 18, 20, 42, 45, 31, 46, 43, 22, 23, 25, 26, 24,
- 25, 26, 45, 31, 46, 44, 47, 35, 22, 28, 29, 27, 27, 30, 30, 0,
- 1, 2, 4, 5,
+ 37, 39, 40, 17, 18, 20, 42, 43, 45, 46, 31, 22, 23, 25, 26, 24,
+ 25, 26, 45, 46, 31, 44, 47, 35, 22, 28, 29, 0, 1, 2, 4, 5,
};
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
{
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
index aa57487f4c2..0e52ec21969 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java
@@ -128,4 +128,4 @@ public class Token implements java.io.Serializable {
}
}
-/* JavaCC - OriginalChecksum=c1e1418b35aa9e47ef8dc98b87423d70 (do not edit this line) */
+/* JavaCC - OriginalChecksum=405bb5d2fcd84e94ac1c8f0b12c1f914 (do not edit this line) */
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
index 7101f098f6e..ad111d0cd26 100644
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
+++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java
@@ -144,4 +144,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=0c275864a1972d9a01601ab81426872d (do not edit this line) */
+/* JavaCC - OriginalChecksum=f433e1a52b8eadbf12f3fbbbf87fd140 (do not edit this line) */
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
index 5b4eba87994..c3d7b37f5c1 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java
@@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
@@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.search.TermQuery;
@@ -44,7 +46,9 @@ import java.io.IOException;
* Tests QueryParser.
*/
public class TestQueryParser extends QueryParserTestBase {
-
+
+ protected boolean splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
+
public static class QPTestParser extends QueryParser {
public QPTestParser(String f, Analyzer a) {
super(f, a);
@@ -67,6 +71,7 @@ public class TestQueryParser extends QueryParserTestBase {
if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
QueryParser qp = new QueryParser(getDefaultField(), a);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
+ qp.setSplitOnWhitespace(splitOnWhitespace);
return qp;
}
@@ -310,18 +315,7 @@ public class TestQueryParser extends QueryParserTestBase {
Query unexpanded = new TermQuery(new Term("field", "dogs"));
assertEquals(unexpanded, smart.parse("\"dogs\""));
}
-
- // TODO: fold these into QueryParserTestBase
-
- /** adds synonym of "dog" for "dogs". */
- static class MockSynonymAnalyzer extends Analyzer {
- @Override
- protected TokenStreamComponents createComponents(String fieldName) {
- MockTokenizer tokenizer = new MockTokenizer();
- return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
- }
- }
-
+
/** simple synonyms test */
public void testSynonyms() throws Exception {
Query expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
@@ -483,4 +477,229 @@ public class TestQueryParser extends QueryParserTestBase {
qp.parse("a*aaaaaaa");
});
}
-}
+
+ // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
+ @Override
+ public void testQPA() throws Exception {
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = false;
+
+ assertQueryEquals("term phrase term", qpAnalyzer, "term phrase1 phrase2 term");
+
+ CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
+ setDefaultOperatorAND(cqpc);
+ assertQueryEquals(cqpc, "field", "term phrase term", "+term +phrase1 +phrase2 +term");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
+ public void testMultiWordSynonyms() throws Exception {
+ QueryParser dumb = new QueryParser("field", new Analyzer1());
+ dumb.setSplitOnWhitespace(false);
+
+ // A multi-word synonym source will form a synonym query for the same-starting-position tokens
+ BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder();
+ Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy"));
+ multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD);
+ multiWordExpandedBqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
+ Query multiWordExpandedBq = multiWordExpandedBqBuilder.build();
+ assertEquals(multiWordExpandedBq, dumb.parse("guinea pig"));
+
+ // With the phrase operator, a multi-word synonym source will form a multiphrase query.
+ // When the number of expanded term(s) is different from that of the original term(s), this is not good.
+ MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder();
+ multiWordExpandedMpqBuilder.add(new Term[]{new Term("field", "guinea"), new Term("field", "cavy")});
+ multiWordExpandedMpqBuilder.add(new Term("field", "pig"));
+ Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build();
+ assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\""));
+
+ // custom behavior, the synonyms are expanded, unless you use quote operator
+ QueryParser smart = new SmartQueryParser();
+ smart.setSplitOnWhitespace(false);
+ assertEquals(multiWordExpandedBq, smart.parse("guinea pig"));
+
+ PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder();
+ multiWordUnexpandedPqBuilder.add(new Term("field", "guinea"));
+ multiWordUnexpandedPqBuilder.add(new Term("field", "pig"));
+ Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build();
+ assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\""));
+ }
+
+ // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
+ public void testOperatorsAndMultiWordSynonyms() throws Exception {
+ Analyzer a = new MockSynonymAnalyzer();
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = false;
+
+ // Operators should interrupt multiword analysis of adjacent words if they associate
+ assertQueryEquals("+guinea pig", a, "+guinea pig");
+ assertQueryEquals("-guinea pig", a, "-guinea pig");
+ assertQueryEquals("!guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea* pig", a, "guinea* pig");
+ assertQueryEquals("guinea? pig", a, "guinea? pig");
+ assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
+ assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
+
+ assertQueryEquals("guinea +pig", a, "guinea +pig");
+ assertQueryEquals("guinea -pig", a, "guinea -pig");
+ assertQueryEquals("guinea !pig", a, "guinea -pig");
+ assertQueryEquals("guinea pig*", a, "guinea pig*");
+ assertQueryEquals("guinea pig?", a, "guinea pig?");
+ assertQueryEquals("guinea pig~2", a, "guinea pig~2");
+ assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
+
+ assertQueryEquals("field:guinea pig", a, "guinea pig");
+ assertQueryEquals("guinea field:pig", a, "guinea pig");
+
+ assertQueryEquals("NOT guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea NOT pig", a, "guinea -pig");
+
+ assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+ assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+
+ assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
+ assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
+
+ assertQueryEquals("\"guinea\" pig", a, "guinea pig");
+ assertQueryEquals("guinea \"pig\"", a, "guinea pig");
+
+ assertQueryEquals("(guinea) pig", a, "guinea pig");
+ assertQueryEquals("guinea (pig)", a, "guinea pig");
+
+ assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
+ assertQueryEquals("guinea /pig/", a, "guinea /pig/");
+
+ // Operators should not interrupt multiword analysis if not don't associate
+ assertQueryEquals("(guinea pig)", a, "Synonym(cavy guinea) pig");
+ assertQueryEquals("+(guinea pig)", a, "+(Synonym(cavy guinea) pig)");
+ assertQueryEquals("-(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("!(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("NOT (guinea pig)", a, "-(Synonym(cavy guinea) pig)");
+ assertQueryEquals("(guinea pig)^2", a, "(Synonym(cavy guinea) pig)^2.0");
+
+ assertQueryEquals("field:(guinea pig)", a, "Synonym(cavy guinea) pig");
+
+ assertQueryEquals("+small guinea pig", a, "+small Synonym(cavy guinea) pig");
+ assertQueryEquals("-small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("!small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("NOT small guinea pig", a, "-small Synonym(cavy guinea) pig");
+ assertQueryEquals("small* guinea pig", a, "small* Synonym(cavy guinea) pig");
+ assertQueryEquals("small? guinea pig", a, "small? Synonym(cavy guinea) pig");
+ assertQueryEquals("\"small\" guinea pig", a, "small Synonym(cavy guinea) pig");
+
+ assertQueryEquals("guinea pig +running", a, "Synonym(cavy guinea) pig +running");
+ assertQueryEquals("guinea pig -running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig !running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig NOT running", a, "Synonym(cavy guinea) pig -running");
+ assertQueryEquals("guinea pig running*", a, "Synonym(cavy guinea) pig running*");
+ assertQueryEquals("guinea pig running?", a, "Synonym(cavy guinea) pig running?");
+ assertQueryEquals("guinea pig \"running\"", a, "Synonym(cavy guinea) pig running");
+
+ assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
+
+ assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ public void testOperatorsAndMultiWordSynonymsSplitOnWhitespace() throws Exception {
+ Analyzer a = new MockSynonymAnalyzer();
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = true;
+
+ assertQueryEquals("+guinea pig", a, "+guinea pig");
+ assertQueryEquals("-guinea pig", a, "-guinea pig");
+ assertQueryEquals("!guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea* pig", a, "guinea* pig");
+ assertQueryEquals("guinea? pig", a, "guinea? pig");
+ assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
+ assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
+
+ assertQueryEquals("guinea +pig", a, "guinea +pig");
+ assertQueryEquals("guinea -pig", a, "guinea -pig");
+ assertQueryEquals("guinea !pig", a, "guinea -pig");
+ assertQueryEquals("guinea pig*", a, "guinea pig*");
+ assertQueryEquals("guinea pig?", a, "guinea pig?");
+ assertQueryEquals("guinea pig~2", a, "guinea pig~2");
+ assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
+
+ assertQueryEquals("field:guinea pig", a, "guinea pig");
+ assertQueryEquals("guinea field:pig", a, "guinea pig");
+
+ assertQueryEquals("NOT guinea pig", a, "-guinea pig");
+ assertQueryEquals("guinea NOT pig", a, "guinea -pig");
+
+ assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+ assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
+ assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
+
+ assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
+ assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
+ assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
+
+ assertQueryEquals("\"guinea\" pig", a, "guinea pig");
+ assertQueryEquals("guinea \"pig\"", a, "guinea pig");
+
+ assertQueryEquals("(guinea) pig", a, "guinea pig");
+ assertQueryEquals("guinea (pig)", a, "guinea pig");
+
+ assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
+ assertQueryEquals("guinea /pig/", a, "guinea /pig/");
+
+ assertQueryEquals("(guinea pig)", a, "guinea pig");
+ assertQueryEquals("+(guinea pig)", a, "+(guinea pig)");
+ assertQueryEquals("-(guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("!(guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("NOT (guinea pig)", a, "-(guinea pig)");
+ assertQueryEquals("(guinea pig)^2", a, "(guinea pig)^2.0");
+
+ assertQueryEquals("field:(guinea pig)", a, "guinea pig");
+
+ assertQueryEquals("+small guinea pig", a, "+small guinea pig");
+ assertQueryEquals("-small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("!small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("NOT small guinea pig", a, "-small guinea pig");
+ assertQueryEquals("small* guinea pig", a, "small* guinea pig");
+ assertQueryEquals("small? guinea pig", a, "small? guinea pig");
+ assertQueryEquals("\"small\" guinea pig", a, "small guinea pig");
+
+ assertQueryEquals("guinea pig +running", a, "guinea pig +running");
+ assertQueryEquals("guinea pig -running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig !running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig NOT running", a, "guinea pig -running");
+ assertQueryEquals("guinea pig running*", a, "guinea pig running*");
+ assertQueryEquals("guinea pig running?", a, "guinea pig running?");
+ assertQueryEquals("guinea pig \"running\"", a, "guinea pig running");
+
+ assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
+
+ assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
+
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+
+ public void testDefaultSplitOnWhitespace() throws Exception {
+ QueryParser parser = new QueryParser("field", new Analyzer1());
+
+ assertTrue(parser.getSplitOnWhitespace()); // default is true
+
+ BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
+ bqBuilder.add(new TermQuery(new Term("field", "guinea")), BooleanClause.Occur.SHOULD);
+ bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
+ assertEquals(bqBuilder.build(), parser.parse("guinea pig"));
+
+ boolean oldSplitOnWhitespace = splitOnWhitespace;
+ splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
+ assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig");
+ splitOnWhitespace = oldSplitOnWhitespace;
+ }
+}
\ No newline at end of file
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
index 785dd1c23dc..934a4dac254 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java
@@ -50,6 +50,7 @@ public class TestExtendableQueryParser extends TestQueryParser {
getDefaultField(), a) : new ExtendableQueryParser(
getDefaultField(), a, extensions);
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
+ qp.setSplitOnWhitespace(splitOnWhitespace);
return qp;
}
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
index 25c737f214c..78d2bfda628 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java
@@ -203,4 +203,15 @@ public class TestStandardQP extends QueryParserTestBase {
//TODO test something like "SmartQueryParser()"
}
+ // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
+ @Override
+ public void testQPA() throws Exception {
+ super.testQPA();
+
+ assertQueryEquals("term phrase term", qpAnalyzer, "term (phrase1 phrase2) term");
+
+ CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
+ setDefaultOperatorAND(cqpc);
+ assertQueryEquals(cqpc, "field", "term phrase term", "+term +(+phrase1 +phrase2) +term");
+ }
}
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
index 70dc15a7cfe..f1eccf467ce 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java
@@ -27,7 +27,6 @@ import java.util.TimeZone;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -535,8 +534,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
assertQueryEquals("term -(stop) term", qpAnalyzer, "term term");
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
- assertQueryEquals("term phrase term", qpAnalyzer,
- "term (phrase1 phrase2) term");
+
+// TODO: Re-enable once flexible standard parser gets multi-word synonym support
+// assertQueryEquals("term phrase term", qpAnalyzer,
+// "term phrase1 phrase2 term");
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
"+term -(phrase1 phrase2) term");
assertQueryEquals("stop^3", qpAnalyzer, "");
@@ -552,8 +553,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
setDefaultOperatorAND(cqpc);
- assertQueryEquals(cqpc, "field", "term phrase term",
- "+term +(+phrase1 +phrase2) +term");
+// TODO: Re-enable once flexible standard parser gets multi-word synonym support
+// assertQueryEquals(cqpc, "field", "term phrase term",
+// "+term +phrase1 +phrase2 +term");
assertQueryEquals(cqpc, "field", "phrase",
"+phrase1 +phrase2");
}
@@ -1101,37 +1103,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
dir.close();
}
- /**
- * adds synonym of "dog" for "dogs".
- */
- protected static class MockSynonymFilter extends TokenFilter {
- CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
- boolean addSynonym = false;
-
- public MockSynonymFilter(TokenStream input) {
- super(input);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- if (addSynonym) { // inject our synonym
- clearAttributes();
- termAtt.setEmpty().append("dog");
- posIncAtt.setPositionIncrement(0);
- addSynonym = false;
- return true;
- }
-
- if (input.incrementToken()) {
- addSynonym = termAtt.toString().equals("dogs");
- return true;
- } else {
- return false;
- }
- }
- }
-
/** whitespace+lowercase analyzer with synonyms */
protected class Analyzer1 extends Analyzer {
public Analyzer1(){
@@ -1251,10 +1222,8 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
CharacterRunAutomaton stopStopList =
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
- CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
-
- qp = getParserConfig(
- new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
+ CommonQueryParserConfiguration qp
+ = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
qp.setEnablePositionIncrements(true);
PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java
new file mode 100644
index 00000000000..a2ce33e74e2
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java
@@ -0,0 +1,28 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
+public class MockSynonymAnalyzer extends Analyzer {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ MockTokenizer tokenizer = new MockTokenizer();
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
+ }
+}
+
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java
new file mode 100644
index 00000000000..b50be0735dd
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java
@@ -0,0 +1,97 @@
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
+public class MockSynonymFilter extends TokenFilter {
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
+ List tokenQueue = new ArrayList<>();
+ boolean endOfInput = false;
+
+ public MockSynonymFilter(TokenStream input) {
+ super(input);
+ }
+
+ @Override
+ public void reset() throws IOException {
+ super.reset();
+ tokenQueue.clear();
+ endOfInput = false;
+ }
+
+ @Override
+ public final boolean incrementToken() throws IOException {
+ if (tokenQueue.size() > 0) {
+ tokenQueue.remove(0).copyTo(this);
+ return true;
+ }
+ if (endOfInput == false && input.incrementToken()) {
+ if (termAtt.toString().equals("dogs")) {
+ addSynonymAndRestoreOrigToken("dog", 1, offsetAtt.endOffset());
+ } else if (termAtt.toString().equals("guinea")) {
+ AttributeSource firstSavedToken = cloneAttributes();
+ if (input.incrementToken()) {
+ if (termAtt.toString().equals("pig")) {
+ AttributeSource secondSavedToken = cloneAttributes();
+ int secondEndOffset = offsetAtt.endOffset();
+ firstSavedToken.copyTo(this);
+ addSynonym("cavy", 2, secondEndOffset);
+ tokenQueue.add(secondSavedToken);
+ } else if (termAtt.toString().equals("dogs")) {
+ tokenQueue.add(cloneAttributes());
+ addSynonym("dog", 1, offsetAtt.endOffset());
+ }
+ } else {
+ endOfInput = true;
+ }
+ firstSavedToken.copyTo(this);
+ }
+ return true;
+ } else {
+ endOfInput = true;
+ return false;
+ }
+ }
+ private void addSynonym(String synonymText, int posLen, int endOffset) {
+ termAtt.setEmpty().append(synonymText);
+ posIncAtt.setPositionIncrement(0);
+ posLenAtt.setPositionLength(posLen);
+ offsetAtt.setOffset(offsetAtt.startOffset(), endOffset);
+ tokenQueue.add(cloneAttributes());
+ }
+ private void addSynonymAndRestoreOrigToken(String synonymText, int posLen, int endOffset) {
+ AttributeSource origToken = cloneAttributes();
+ addSynonym(synonymText, posLen, endOffset);
+ origToken.copyTo(this);
+ }
+}
+
+
diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java
new file mode 100644
index 00000000000..fb0d0657744
--- /dev/null
+++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+
+/** test the mock synonym filter */
+public class TestMockSynonymFilter extends BaseTokenStreamTestCase {
+
+ /** test the mock synonym filter */
+ public void test() throws IOException {
+ Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ MockTokenizer tokenizer = new MockTokenizer();
+ return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
+ }
+ };
+
+ assertAnalyzesTo(analyzer, "dogs",
+ new String[]{"dogs", "dog"},
+ new int[]{0, 0}, // start offset
+ new int[]{4, 4}, // end offset
+ null,
+ new int[]{1, 0}, // position increment
+ new int[]{1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "small dogs",
+ new String[]{"small", "dogs", "dog"},
+ new int[]{0, 6, 6}, // start offset
+ new int[]{5, 10, 10}, // end offset
+ null,
+ new int[]{1, 1, 0}, // position increment
+ new int[]{1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "dogs running",
+ new String[]{"dogs", "dog", "running"},
+ new int[]{0, 0, 5}, // start offset
+ new int[]{4, 4, 12}, // end offset
+ null,
+ new int[]{1, 0, 1}, // position increment
+ new int[]{1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "small dogs running",
+ new String[]{"small", "dogs", "dog", "running"},
+ new int[]{0, 6, 6, 11}, // start offset
+ new int[]{5, 10, 10, 18}, // end offset
+ null,
+ new int[]{1, 1, 0, 1}, // position increment
+ new int[]{1, 1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "guinea",
+ new String[]{"guinea"},
+ new int[]{0}, // start offset
+ new int[]{6}, // end offset
+ null,
+ new int[]{1}, // position increment
+ new int[]{1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "pig",
+ new String[]{"pig"},
+ new int[]{0}, // start offset
+ new int[]{3}, // end offset
+ null,
+ new int[]{1}, // position increment
+ new int[]{1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "guinea pig",
+ new String[]{"guinea", "cavy", "pig"},
+ new int[]{0, 0, 7}, // start offset
+ new int[]{6, 10, 10}, // end offset
+ null,
+ new int[]{1, 0, 1}, // position increment
+ new int[]{1, 2, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "guinea dogs",
+ new String[]{"guinea", "dogs", "dog"},
+ new int[]{0, 7, 7}, // start offset
+ new int[]{6, 11, 11}, // end offset
+ null,
+ new int[]{1, 1, 0}, // position increment
+ new int[]{1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "dogs guinea",
+ new String[]{"dogs", "dog", "guinea"},
+ new int[]{0, 0, 5}, // start offset
+ new int[]{4, 4, 11}, // end offset
+ null,
+ new int[]{1, 0, 1}, // position increment
+ new int[]{1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "dogs guinea pig",
+ new String[]{"dogs", "dog", "guinea", "cavy", "pig"},
+ new int[]{0, 0, 5, 5, 12}, // start offset
+ new int[]{4, 4, 11, 15, 15}, // end offset
+ null,
+ new int[]{1, 0, 1, 0, 1}, // position increment
+ new int[]{1, 1, 1, 2, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "guinea pig dogs",
+ new String[]{"guinea", "cavy", "pig", "dogs", "dog"},
+ new int[]{0, 0, 7, 11, 11}, // start offset
+ new int[]{6, 10, 10, 15, 15}, // end offset
+ null,
+ new int[]{1, 0, 1, 1, 0}, // position increment
+ new int[]{1, 2, 1, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "small dogs and guinea pig running",
+ new String[]{"small", "dogs", "dog", "and", "guinea", "cavy", "pig", "running"},
+ new int[]{0, 6, 6, 11, 15, 15, 22, 26}, // start offset
+ new int[]{5, 10, 10, 14, 21, 25, 25, 33}, // end offset
+ null,
+ new int[]{1, 1, 0, 1, 1, 0, 1, 1}, // position increment
+ new int[]{1, 1, 1, 1, 1, 2, 1, 1}, // position length
+ true); // check that offsets are correct
+
+ assertAnalyzesTo(analyzer, "small guinea pig and dogs running",
+ new String[]{"small", "guinea", "cavy", "pig", "and", "dogs", "dog", "running"},
+ new int[]{0, 6, 6, 13, 17, 21, 21, 26}, // start offset
+ new int[]{5, 12, 16, 16, 20, 25, 25, 33}, // end offset
+ null,
+ new int[]{1, 1, 0, 1, 1, 1, 0, 1}, // position increment
+ new int[]{1, 1, 2, 1, 1, 1, 1, 1}, // position length
+ true); // check that offsets are correct
+ }
+}
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 272f35d730f..497d1396f72 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -46,6 +46,14 @@ New Features
* SOLR-9194: Enhance the bin/solr script to perform file operations to/from Zookeeper (Erick Erickson, janhoy)
+* SOLR-9242: Collection Backup/Restore now supports specifying the directory implementation to use
+ via the "repository" parameter. (Hrishikesh Gadre, Varun Thacker)
+
+* SOLR-9193: Add scoreNodes Streaming Expression (Joel Bernstein)
+
+* SOLR-9243: Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms
+ (Joel Bernstein)
+
Bug Fixes
----------------------
@@ -78,6 +86,14 @@ Bug Fixes
* SOLR-9181: Fix some races in CollectionStateWatcher API (Alan Woodward, Scott
Blum)
+* SOLR-9235: Fixed NPE when using non-numeric range query in deleteByQuery (hossman)
+
+* SOLR-9088: Fixed TestManagedSchemaAPI failures which exposed race conditions in the schema API ( Varun Thacker, noble)
+
+* SOLR-9207: PeerSync recovery failes if number of updates requested is high. A new useRangeVersions config option
+ is introduced (defaults to true) to send version ranges instead of individual versions for peer sync.
+ (Pushkar Raste, shalin)
+
Optimizations
----------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
index 25d5cebe4fb..2c09ebed669 100644
--- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
+++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml
@@ -24,15 +24,19 @@
-->
-
-
-
-
-
-
-
-
-
-
+
+
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml
index 25d5cebe4fb..2c09ebed669 100644
--- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml
+++ b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml
@@ -24,15 +24,19 @@
-->
-
-
-
-
-
-
-
-
-
-
+
+
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml
index 25d5cebe4fb..2c09ebed669 100644
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml
+++ b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml
@@ -24,15 +24,19 @@
-->
-
-
-
-
-
-
-
-
-
-
+
+
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
index 25d5cebe4fb..2c09ebed669 100644
--- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
+++ b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml
@@ -24,15 +24,19 @@
-->
-
-
-
-
-
-
-
-
-
-
+
+
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
index a0ac7322175..27a2824336e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java
@@ -17,13 +17,8 @@
package org.apache.solr.cloud;
import java.io.IOException;
-import java.io.Reader;
-import java.io.Writer;
import java.lang.invoke.MethodHandles;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
+import java.net.URI;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
@@ -36,6 +31,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Optional;
import java.util.Properties;
import java.util.Random;
import java.util.Set;
@@ -84,6 +80,9 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.backup.BackupManager;
+import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.handler.component.ShardHandlerFactory;
import org.apache.solr.handler.component.ShardRequest;
@@ -2215,21 +2214,28 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
private void processBackupAction(ZkNodeProps message, NamedList results) throws IOException, KeeperException, InterruptedException {
String collectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME);
- String location = message.getStr(ZkStateReader.BACKUP_LOCATION);
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
+ String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
+ String location = message.getStr(CoreAdminParams.BACKUP_LOCATION);
+
Map requestMap = new HashMap<>();
Instant startTime = Instant.now();
- // note: we assume a shared files system to backup a collection, since a collection is distributed
- Path backupPath = Paths.get(location).resolve(backupName).toAbsolutePath();
+ CoreContainer cc = this.overseer.getZkController().getCoreContainer();
+ BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
+ BackupManager backupMgr = new BackupManager(repository, zkStateReader, collectionName);
+
+ // Backup location
+ URI backupPath = repository.createURI(location, backupName);
//Validating if the directory already exists.
- if (Files.exists(backupPath)) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- "Backup directory already exists: " + backupPath);
+ if (repository.exists(backupPath)) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "The backup directory already exists: " + backupPath);
}
- Files.createDirectory(backupPath); // create now
+
+ // Create a directory to store backup details.
+ repository.createDirectory(backupPath);
log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName,
backupPath);
@@ -2242,7 +2248,8 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.BACKUPCORE.toString());
params.set(NAME, slice.getName());
- params.set("location", backupPath.toString()); // note: index dir will be here then the "snapshot." + slice name
+ params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
+ params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.getPath()); // note: index dir will be here then the "snapshot." + slice name
params.set(CORE_NAME_PROP, coreName);
sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap);
@@ -2256,29 +2263,24 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
//Download the configs
String configName = zkStateReader.readConfigName(collectionName);
- Path zkBackup = backupPath.resolve("zk_backup");
- zkStateReader.getConfigManager().downloadConfigDir(configName, zkBackup.resolve("configs").resolve(configName));
+ backupMgr.downloadConfigDir(location, backupName, configName);
//Save the collection's state. Can be part of the monolithic clusterstate.json or a individual state.json
//Since we don't want to distinguish we extract the state and back it up as a separate json
- DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
- Files.write(zkBackup.resolve("collection_state.json"),
- Utils.toJSON(Collections.singletonMap(collectionName, collection)));
+ DocCollection collectionState = zkStateReader.getClusterState().getCollection(collectionName);
+ backupMgr.writeCollectionState(location, backupName, collectionName, collectionState);
- Path propertiesPath = backupPath.resolve("backup.properties");
Properties properties = new Properties();
- properties.put("backupName", backupName);
- properties.put("collection", collectionName);
- properties.put("collection.configName", configName);
- properties.put("startTime", startTime.toString());
+ properties.put(BackupManager.BACKUP_NAME_PROP, backupName);
+ properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName);
+ properties.put(COLL_CONF, configName);
+ properties.put(BackupManager.START_TIME_PROP, startTime.toString());
//TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same.
//if they are not the same then we can throw an error or have an 'overwriteConfig' flag
//TODO save numDocs for the shardLeader. We can use it to sanity check the restore.
- try (Writer os = Files.newBufferedWriter(propertiesPath, StandardCharsets.UTF_8)) {
- properties.store(os, "Snapshot properties file");
- }
+ backupMgr.writeBackupProperties(location, backupName, properties);
log.info("Completed backing up ZK data for backupName={}", backupName);
}
@@ -2287,26 +2289,21 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
// TODO maybe we can inherit createCollection's options/code
String restoreCollectionName = message.getStr(COLLECTION_PROP);
String backupName = message.getStr(NAME); // of backup
- String location = message.getStr(ZkStateReader.BACKUP_LOCATION);
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
String asyncId = message.getStr(ASYNC);
+ String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
+ String location = message.getStr(CoreAdminParams.BACKUP_LOCATION);
Map requestMap = new HashMap<>();
- Path backupPath = Paths.get(location).resolve(backupName).toAbsolutePath();
- if (!Files.exists(backupPath)) {
- throw new SolrException(ErrorCode.SERVER_ERROR, "Couldn't restore since doesn't exist: " + backupPath);
- }
- Path backupZkPath = backupPath.resolve("zk_backup");
+ CoreContainer cc = this.overseer.getZkController().getCoreContainer();
+ BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
- Properties properties = new Properties();
- try (Reader in = Files.newBufferedReader(backupPath.resolve("backup.properties"), StandardCharsets.UTF_8)) {
- properties.load(in);
- }
+ URI backupPath = repository.createURI(location, backupName);
+ BackupManager backupMgr = new BackupManager(repository, zkStateReader, restoreCollectionName);
- String backupCollection = (String) properties.get("collection");
- byte[] data = Files.readAllBytes(backupZkPath.resolve("collection_state.json"));
- ClusterState backupClusterState = ClusterState.load(-1, data, Collections.emptySet());
- DocCollection backupCollectionState = backupClusterState.getCollection(backupCollection);
+ Properties properties = backupMgr.readBackupProperties(location, backupName);
+ String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP);
+ DocCollection backupCollectionState = backupMgr.readCollectionState(location, backupName, backupCollection);
//Upload the configs
String configName = (String) properties.get(COLL_CONF);
@@ -2316,11 +2313,11 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
//TODO add overwrite option?
} else {
log.info("Uploading config {}", restoreConfigName);
- zkStateReader.getConfigManager().uploadConfigDir(backupZkPath.resolve("configs").resolve(configName), restoreConfigName);
+ backupMgr.uploadConfigDir(location, backupName, configName, restoreConfigName);
}
log.info("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, backupName,
- backupPath);
+ location);
//Create core-less collection
{
@@ -2410,7 +2407,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.RESTORECORE.toString());
params.set(NAME, "snapshot." + slice.getName());
- params.set("location", backupPath.toString());
+ params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.getPath());
+ params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
+
sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap);
}
processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap);
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 9ebca6f81d6..a6a15082acf 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -53,7 +53,25 @@ import org.apache.solr.cloud.overseer.OverseerAction;
import org.apache.solr.cloud.overseer.SliceMutator;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.cloud.*;
+import org.apache.solr.common.cloud.BeforeReconnect;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.ClusterStateUtil;
+import org.apache.solr.common.cloud.DefaultConnectionStrategy;
+import org.apache.solr.common.cloud.DefaultZkACLProvider;
+import org.apache.solr.common.cloud.DefaultZkCredentialsProvider;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.OnReconnect;
+import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.Slice;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkACLProvider;
+import org.apache.solr.common.cloud.ZkCmdExecutor;
+import org.apache.solr.common.cloud.ZkConfigManager;
+import org.apache.solr.common.cloud.ZkCoreNodeProps;
+import org.apache.solr.common.cloud.ZkCredentialsProvider;
+import org.apache.solr.common.cloud.ZkNodeProps;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.ZooKeeperException;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
@@ -2242,8 +2260,8 @@ public final class ZkController {
String errMsg = "Failed to persist resource at {0} - old {1}";
try {
try {
- zkClient.setData(resourceLocation, content, znodeVersion, true);
- latestVersion = znodeVersion + 1;// if the set succeeded , it should have incremented the version by one always
+ Stat stat = zkClient.setData(resourceLocation, content, znodeVersion, true);
+ latestVersion = stat.getVersion();// if the set succeeded , it should have incremented the version by one always
log.info("Persisted config data to node {} ", resourceLocation);
touchConfDir(zkLoader);
} catch (NoNodeException e) {
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 422a7616119..a6d40664ee0 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -25,6 +25,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
@@ -44,6 +45,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.BackupRepositoryFactory;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.admin.CollectionsHandler;
@@ -149,8 +151,21 @@ public class CoreContainer {
private BackupRepositoryFactory backupRepoFactory;
- public BackupRepositoryFactory getBackupRepoFactory() {
- return backupRepoFactory;
+ /**
+ * This method instantiates a new instance of {@linkplain BackupRepository}.
+ *
+ * @param repositoryName The name of the backup repository (Optional).
+ * If not specified, a default implementation is used.
+ * @return a new instance of {@linkplain BackupRepository}.
+ */
+ public BackupRepository newBackupRepository(Optional repositoryName) {
+ BackupRepository repository;
+ if (repositoryName.isPresent()) {
+ repository = backupRepoFactory.newInstance(getResourceLoader(), repositoryName.get());
+ } else {
+ repository = backupRepoFactory.newInstance(getResourceLoader());
+ }
+ return repository;
}
public ExecutorService getCoreZkRegisterExecutorService() {
diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java
index a5f54580e06..eb3aa5fc7f1 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java
@@ -234,7 +234,8 @@ public class SolrConfig extends Config implements MapSerializable {
queryResultWindowSize = Math.max(1, getInt("query/queryResultWindowSize", 1));
queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE);
enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false);
-
+
+ useRangeVersionsForPeerSync = getBool("peerSync/useRangeVersions", true);
filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
@@ -462,6 +463,9 @@ public class SolrConfig extends Config implements MapSerializable {
public final int queryResultWindowSize;
public final int queryResultMaxDocsCached;
public final boolean enableLazyFieldLoading;
+
+ public final boolean useRangeVersionsForPeerSync;
+
// DocSet
public final float hashSetInverseLoadFactor;
public final int hashDocSetMaxSize;
@@ -864,6 +868,10 @@ public class SolrConfig extends Config implements MapSerializable {
"addHttpRequestToContext", addHttpRequestToContext));
if (indexConfig != null) result.put("indexConfig", indexConfig.toMap());
+ m = new LinkedHashMap();
+ result.put("peerSync", m);
+ m.put("useRangeVersions", useRangeVersionsForPeerSync);
+
//TODO there is more to add
return result;
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index 53af3d1b5ce..14a4e0ff1e8 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -28,7 +28,19 @@ import java.lang.reflect.Constructor;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.NoSuchFileException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
@@ -77,7 +89,22 @@ import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.logging.MDCLoggingContext;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestHandler;
-import org.apache.solr.response.*;
+import org.apache.solr.response.BinaryResponseWriter;
+import org.apache.solr.response.CSVResponseWriter;
+import org.apache.solr.response.GeoJSONResponseWriter;
+import org.apache.solr.response.GraphMLResponseWriter;
+import org.apache.solr.response.JSONResponseWriter;
+import org.apache.solr.response.PHPResponseWriter;
+import org.apache.solr.response.PHPSerializedResponseWriter;
+import org.apache.solr.response.PythonResponseWriter;
+import org.apache.solr.response.QueryResponseWriter;
+import org.apache.solr.response.RawResponseWriter;
+import org.apache.solr.response.RubyResponseWriter;
+import org.apache.solr.response.SchemaXmlResponseWriter;
+import org.apache.solr.response.SmileResponseWriter;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.response.SortingResponseWriter;
+import org.apache.solr.response.XMLResponseWriter;
import org.apache.solr.response.transform.TransformerFactory;
import org.apache.solr.rest.ManagedResourceStorage;
import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
@@ -86,6 +113,7 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.schema.ManagedIndexSchema;
+import org.apache.solr.schema.SchemaManager;
import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrFieldCacheMBean;
@@ -2488,13 +2516,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
SolrZkClient zkClient = cc.getZkController().getZkClient();
int solrConfigversion, overlayVersion, managedSchemaVersion = 0;
SolrConfig cfg = null;
- try (SolrCore core1 = cc.solrCores.getCoreFromAnyList(coreName, true)) {
- if (core1 == null || core1.isClosed()) return;
- cfg = core1.getSolrConfig();
- solrConfigversion = core1.getSolrConfig().getOverlay().getZnodeVersion();
- overlayVersion = core1.getSolrConfig().getZnodeVersion();
+ try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
+ if (solrCore == null || solrCore.isClosed()) return;
+ cfg = solrCore.getSolrConfig();
+ solrConfigversion = solrCore.getSolrConfig().getOverlay().getZnodeVersion();
+ overlayVersion = solrCore.getSolrConfig().getZnodeVersion();
if (managedSchmaResourcePath != null) {
- managedSchemaVersion = ((ManagedIndexSchema) core1.getLatestSchema()).getSchemaZkVersion();
+ managedSchemaVersion = ((ManagedIndexSchema) solrCore.getLatestSchema()).getSchemaZkVersion();
}
}
@@ -2504,6 +2532,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
if (checkStale(zkClient, overlayPath, solrConfigversion) ||
checkStale(zkClient, solrConfigPath, overlayVersion) ||
checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) {
+
+ try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
+ solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore));
+ } catch (Exception e) {
+ log.warn("", SolrZkClient.checkInterrupted(e));
+ }
+
log.info("core reload {}", coreName);
try {
cc.reload(coreName);
@@ -2513,9 +2548,9 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
return;
}
//some files in conf directory may have other than managedschema, overlay, params
- try (SolrCore core1 = cc.solrCores.getCoreFromAnyList(coreName, true)) {
- if (core1 == null || core1.isClosed()) return;
- for (Runnable listener : core1.confListeners) {
+ try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
+ if (solrCore == null || solrCore.isClosed()) return;
+ for (Runnable listener : solrCore.confListeners) {
try {
listener.run();
} catch (Exception e) {
diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
new file mode 100644
index 00000000000..0575bff95ac
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java
@@ -0,0 +1,250 @@
+package org.apache.solr.core.backup;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Writer;
+import java.lang.invoke.MethodHandles;
+import java.net.URI;
+import java.nio.charset.StandardCharsets;
+import java.util.Collections;
+import java.util.List;
+import java.util.Properties;
+
+import com.google.common.base.Preconditions;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkConfigManager;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.backup.repository.BackupRepository;
+import org.apache.solr.core.backup.repository.BackupRepository.PathType;
+import org.apache.solr.util.PropertiesInputStream;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This class implements functionality to create a backup with extension points provided to integrate with different
+ * types of file-systems.
+ */
+public class BackupManager {
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+ public static final String COLLECTION_PROPS_FILE = "collection_state.json";
+ public static final String BACKUP_PROPS_FILE = "backup.properties";
+ public static final String ZK_STATE_DIR = "zk_backup";
+ public static final String CONFIG_STATE_DIR = "configs";
+
+ // Backup properties
+ public static final String COLLECTION_NAME_PROP = "collection";
+ public static final String BACKUP_NAME_PROP = "backupName";
+ public static final String INDEX_VERSION_PROP = "index.version";
+ public static final String START_TIME_PROP = "startTime";
+
+ protected final ZkStateReader zkStateReader;
+ protected final BackupRepository repository;
+
+ public BackupManager(BackupRepository repository, ZkStateReader zkStateReader, String collectionName) {
+ this.repository = Preconditions.checkNotNull(repository);
+ this.zkStateReader = Preconditions.checkNotNull(zkStateReader);
+ }
+
+ /**
+ * @return The version of this backup implementation.
+ */
+ public final String getVersion() {
+ return "1.0";
+ }
+
+ /**
+ * This method returns the configuration parameters for the specified backup.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup whose configuration params are required.
+ * @return the configuration parameters for the specified backup.
+ * @throws IOException In case of errors.
+ */
+ public Properties readBackupProperties(String backupLoc, String backupId) throws IOException {
+ Preconditions.checkNotNull(backupLoc);
+ Preconditions.checkNotNull(backupId);
+
+ // Backup location
+ URI backupPath = repository.createURI(backupLoc, backupId);
+ if (!repository.exists(backupPath)) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Couldn't restore since doesn't exist: " + backupPath);
+ }
+
+ Properties props = new Properties();
+ try (Reader is = new InputStreamReader(new PropertiesInputStream(
+ repository.openInput(backupPath, BACKUP_PROPS_FILE, IOContext.DEFAULT)), StandardCharsets.UTF_8)) {
+ props.load(is);
+ return props;
+ }
+ }
+
+ /**
+ * This method stores the backup properties at the specified location in the repository.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup whose configuration params are required.
+ * @param props The backup properties
+ * @throws IOException in case of I/O error
+ */
+ public void writeBackupProperties(String backupLoc, String backupId, Properties props) throws IOException {
+ URI dest = repository.createURI(backupLoc, backupId, BACKUP_PROPS_FILE);
+ try (Writer propsWriter = new OutputStreamWriter(repository.createOutput(dest), StandardCharsets.UTF_8)) {
+ props.store(propsWriter, "Backup properties file");
+ }
+ }
+
+ /**
+ * This method reads the meta-data information for the backed-up collection.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup.
+ * @return the meta-data information for the backed-up collection.
+ * @throws IOException in case of errors.
+ */
+ public DocCollection readCollectionState(String backupLoc, String backupId, String collectionName) throws IOException {
+ Preconditions.checkNotNull(collectionName);
+
+ URI zkStateDir = repository.createURI(backupLoc, backupId, ZK_STATE_DIR);
+ try (IndexInput is = repository.openInput(zkStateDir, COLLECTION_PROPS_FILE, IOContext.DEFAULT)) {
+ byte[] arr = new byte[(int) is.length()]; // probably ok since the json file should be small.
+ is.readBytes(arr, 0, (int) is.length());
+ ClusterState c_state = ClusterState.load(-1, arr, Collections.emptySet());
+ return c_state.getCollection(collectionName);
+ }
+ }
+
+ /**
+ * This method writes the collection meta-data to the specified location in the repository.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup.
+ * @param collectionName The name of the collection whose meta-data is being stored.
+ * @param collectionState The collection meta-data to be stored.
+ * @throws IOException in case of I/O errors.
+ */
+ public void writeCollectionState(String backupLoc, String backupId, String collectionName,
+ DocCollection collectionState) throws IOException {
+ URI dest = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, COLLECTION_PROPS_FILE);
+ try (OutputStream collectionStateOs = repository.createOutput(dest)) {
+ collectionStateOs.write(Utils.toJSON(Collections.singletonMap(collectionName, collectionState)));
+ }
+ }
+
+ /**
+ * This method uploads the Solr configuration files to the desired location in Zookeeper.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup.
+ * @param sourceConfigName The name of the config to be copied
+ * @param targetConfigName The name of the config to be created.
+ * @throws IOException in case of I/O errors.
+ */
+ public void uploadConfigDir(String backupLoc, String backupId, String sourceConfigName, String targetConfigName)
+ throws IOException {
+ URI source = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR, sourceConfigName);
+ String zkPath = ZkConfigManager.CONFIGS_ZKNODE + "/" + targetConfigName;
+ uploadToZk(zkStateReader.getZkClient(), source, zkPath);
+ }
+
+ /**
+ * This method stores the contents of a specified Solr config at the specified location in repository.
+ *
+ * @param backupLoc The base path used to store the backup data.
+ * @param backupId The unique name for the backup.
+ * @param configName The name of the config to be saved.
+ * @throws IOException in case of I/O errors.
+ */
+ public void downloadConfigDir(String backupLoc, String backupId, String configName) throws IOException {
+ URI dest = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR, configName);
+ repository.createDirectory(repository.createURI(backupLoc, backupId, ZK_STATE_DIR));
+ repository.createDirectory(repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR));
+ repository.createDirectory(dest);
+
+ downloadFromZK(zkStateReader.getZkClient(), ZkConfigManager.CONFIGS_ZKNODE + "/" + configName, dest);
+ }
+
+ private void downloadFromZK(SolrZkClient zkClient, String zkPath, URI dir) throws IOException {
+ try {
+ if (!repository.exists(dir)) {
+ repository.createDirectory(dir);
+ }
+ List files = zkClient.getChildren(zkPath, null, true);
+ for (String file : files) {
+ List children = zkClient.getChildren(zkPath + "/" + file, null, true);
+ if (children.size() == 0) {
+ log.info("Writing file {}", file);
+ byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true);
+ try (OutputStream os = repository.createOutput(repository.createURI(dir.getPath(), file))) {
+ os.write(data);
+ }
+ } else {
+ downloadFromZK(zkClient, zkPath + "/" + file, repository.createURI(dir.getPath(), file));
+ }
+ }
+ } catch (KeeperException | InterruptedException e) {
+ throw new IOException("Error downloading files from zookeeper path " + zkPath + " to " + dir.toString(),
+ SolrZkClient.checkInterrupted(e));
+ }
+ }
+
+ private void uploadToZk(SolrZkClient zkClient, URI sourceDir, String destZkPath) throws IOException {
+ Preconditions.checkArgument(repository.exists(sourceDir), "Path {} does not exist", sourceDir);
+ Preconditions.checkArgument(repository.getPathType(sourceDir) == PathType.DIRECTORY,
+ "Path {} is not a directory", sourceDir);
+
+ for (String file : repository.listAll(sourceDir)) {
+ String zkNodePath = destZkPath + "/" + file;
+ URI path = repository.createURI(sourceDir.getPath(), file);
+ PathType t = repository.getPathType(path);
+ switch (t) {
+ case FILE: {
+ try (IndexInput is = repository.openInput(sourceDir, file, IOContext.DEFAULT)) {
+ byte[] arr = new byte[(int) is.length()]; // probably ok since the config file should be small.
+ is.readBytes(arr, 0, (int) is.length());
+ zkClient.makePath(zkNodePath, arr, true);
+ } catch (KeeperException | InterruptedException e) {
+ throw new IOException(e);
+ }
+ break;
+ }
+
+ case DIRECTORY: {
+ if (!file.startsWith(".")) {
+ uploadToZk(zkClient, path, zkNodePath);
+ }
+ break;
+ }
+ default:
+ throw new IllegalStateException("Unknown path type " + t);
+ }
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/core/backup/package-info.java b/solr/core/src/java/org/apache/solr/core/backup/package-info.java
new file mode 100644
index 00000000000..defcad6b55c
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/core/backup/package-info.java
@@ -0,0 +1,22 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+
+/**
+ * Core classes for Solr's Backup/Restore functionality
+ */
+package org.apache.solr.core.backup;
\ No newline at end of file
diff --git a/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java b/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java
index f209b874a5f..20d8628a50c 100644
--- a/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java
+++ b/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java
@@ -21,20 +21,18 @@ import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
+import java.util.Optional;
+
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
/**
* This interface defines the functionality required to backup/restore Solr indexes to an arbitrary storage system.
*/
public interface BackupRepository extends NamedListInitializedPlugin, Closeable {
- /**
- * A parameter to specify the name of the backup repository to be used.
- */
- String REPOSITORY_PROPERTY_NAME = "repository";
-
/**
* This enumeration defines the type of a given path.
@@ -43,6 +41,17 @@ public interface BackupRepository extends NamedListInitializedPlugin, Closeable
DIRECTORY, FILE
}
+ /**
+ * This method returns the location where the backup should be stored (or restored from).
+ *
+ * @param override The location parameter supplied by the user.
+ * @return If override is not null then return the same value
+ * Otherwise return the default configuration value for the {@linkplain CoreAdminParams#BACKUP_LOCATION} parameter.
+ */
+ default String getBackupLocation(String override) {
+ return Optional.ofNullable(override).orElse(getConfigProperty(CoreAdminParams.BACKUP_LOCATION));
+ }
+
/**
* This method returns the value of the specified configuration property.
*/
diff --git a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
index 657e6b32bc1..c4b42d9d609 100644
--- a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
@@ -118,7 +118,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P
.withFunctionName("shortestPath", ShortestPathStream.class)
.withFunctionName("gatherNodes", GatherNodesStream.class)
.withFunctionName("sort", SortStream.class)
-
+ .withFunctionName("scoreNodes", ScoreNodesStream.class)
// metrics
.withFunctionName("min", MinMetric.class)
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index 1893a7da52d..6e1b3a087a1 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -37,6 +37,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import java.util.Properties;
import java.util.Random;
import java.util.concurrent.ExecutorService;
@@ -67,8 +68,8 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RateLimiter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ExecutorUtil;
@@ -84,7 +85,6 @@ import org.apache.solr.core.IndexDeletionPolicyWrapper;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrDeletionPolicy;
import org.apache.solr.core.SolrEventListener;
-import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.core.backup.repository.LocalFileSystemRepository;
import org.apache.solr.request.SolrQueryRequest;
@@ -331,7 +331,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
}
- SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME));
+ SnapShooter snapShooter = new SnapShooter(core, params.get(CoreAdminParams.BACKUP_LOCATION), params.get(NAME));
snapShooter.validateDeleteSnapshot();
snapShooter.deleteSnapAsync(this);
}
@@ -412,19 +412,16 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
"for the same core");
}
String name = params.get(NAME);
- String location = params.get(LOCATION);
+ String location = params.get(CoreAdminParams.BACKUP_LOCATION);
- String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME);
+ String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
CoreContainer cc = core.getCoreDescriptor().getCoreContainer();
- SolrResourceLoader rl = cc.getResourceLoader();
BackupRepository repo = null;
- if(repoName != null) {
- repo = cc.getBackupRepoFactory().newInstance(rl, repoName);
+ if (repoName != null) {
+ repo = cc.newBackupRepository(Optional.of(repoName));
+ location = repo.getBackupLocation(location);
if (location == null) {
- location = repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION);
- if(location == null) {
- throw new IllegalArgumentException("location is required");
- }
+ throw new IllegalArgumentException("location is required");
}
} else {
repo = new LocalFileSystemRepository();
@@ -520,18 +517,15 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
indexCommit = req.getSearcher().getIndexReader().getIndexCommit();
}
- String location = params.get(ZkStateReader.BACKUP_LOCATION);
- String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME);
+ String location = params.get(CoreAdminParams.BACKUP_LOCATION);
+ String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
CoreContainer cc = core.getCoreDescriptor().getCoreContainer();
- SolrResourceLoader rl = cc.getResourceLoader();
BackupRepository repo = null;
- if(repoName != null) {
- repo = cc.getBackupRepoFactory().newInstance(rl, repoName);
+ if (repoName != null) {
+ repo = cc.newBackupRepository(Optional.of(repoName));
+ location = repo.getBackupLocation(location);
if (location == null) {
- location = repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION);
- if(location == null) {
- throw new IllegalArgumentException("location is required");
- }
+ throw new IllegalArgumentException("location is required");
}
} else {
repo = new LocalFileSystemRepository();
@@ -1645,8 +1639,6 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
}
}
- private static final String LOCATION = "location";
-
private static final String SUCCESS = "success";
private static final String FAILED = "failed";
diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
index 91ee096e777..1e9ba27430b 100644
--- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
@@ -126,6 +126,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("select", SelectStream.class)
.withFunctionName("shortestPath", ShortestPathStream.class)
.withFunctionName("gatherNodes", GatherNodesStream.class)
+ .withFunctionName("scoreNodes", ScoreNodesStream.class)
// metrics
.withFunctionName("min", MinMetric.class)
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 85c98c1bba1..97fbd2d181d 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -18,6 +18,7 @@ package org.apache.solr.handler.admin;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
+import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -26,6 +27,7 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;
@@ -75,6 +77,7 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.CloudConfig;
import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.backup.repository.BackupRepository;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.handler.component.ShardHandler;
import org.apache.solr.request.SolrQueryRequest;
@@ -807,15 +810,32 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' does not exist, no action taken.");
}
- String location = req.getParams().get(ZkStateReader.BACKUP_LOCATION);
+ CoreContainer cc = h.coreContainer;
+ String repo = req.getParams().get(CoreAdminParams.BACKUP_REPOSITORY);
+ BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
+
+ String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) {
- location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null);
+ // Check if the location is specified in the cluster property.
+ location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null);
+ if (location == null) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ + " parameter or as a default repository property or as a cluster property.");
+ }
}
- if (location == null) {
- throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property");
+
+ // Check if the specified location is valid for this repository.
+ URI uri = repository.createURI(location);
+ try {
+ if (!repository.exists(uri)) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "specified location " + uri + " does not exist.");
+ }
+ } catch (IOException ex) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
}
+
Map params = req.getParams().getAll(null, NAME, COLLECTION_PROP);
- params.put("location", location);
+ params.put(CoreAdminParams.BACKUP_LOCATION, location);
return params;
}
},
@@ -831,16 +851,32 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' exists, no action taken.");
}
- String location = req.getParams().get(ZkStateReader.BACKUP_LOCATION);
+ CoreContainer cc = h.coreContainer;
+ String repo = req.getParams().get(CoreAdminParams.BACKUP_REPOSITORY);
+ BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo));
+
+ String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION));
if (location == null) {
- location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null);
+ // Check if the location is specified in the cluster property.
+ location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", null);
+ if (location == null) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ + " parameter or as a default repository property or as a cluster property.");
+ }
}
- if (location == null) {
- throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property");
+
+ // Check if the specified location is valid for this repository.
+ URI uri = repository.createURI(location);
+ try {
+ if (!repository.exists(uri)) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "specified location " + uri + " does not exist.");
+ }
+ } catch (IOException ex) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex);
}
Map params = req.getParams().getAll(null, NAME, COLLECTION_PROP);
- params.put("location", location);
+ params.put(CoreAdminParams.BACKUP_LOCATION, location);
// from CREATE_OP:
req.getParams().getAll(params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT, AUTO_ADD_REPLICAS);
copyPropertiesWithPrefix(req.getParams(), params, COLL_PROP_PREFIX);
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
index 3c52beace86..bf892277d78 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
@@ -27,6 +27,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Optional;
import java.util.Set;
import java.util.concurrent.Future;
@@ -40,6 +41,7 @@ import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.SyncStrategy;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.DocRouter;
@@ -858,21 +860,13 @@ enum CoreAdminOperation {
throw new IllegalArgumentException(CoreAdminParams.NAME + " is required");
}
- SolrResourceLoader loader = callInfo.handler.coreContainer.getResourceLoader();
- BackupRepository repository;
- String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME);
- if(repoName != null) {
- repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader, repoName);
- } else { // Fetch the default.
- repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader);
- }
+ String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
+ BackupRepository repository = callInfo.handler.coreContainer.newBackupRepository(Optional.ofNullable(repoName));
- String location = params.get(ZkStateReader.BACKUP_LOCATION);
- if (location == null) {
- location = repository.getConfigProperty(ZkStateReader.BACKUP_LOCATION);
- if (location == null) {
- throw new IllegalArgumentException("location is required");
- }
+ String location = repository.getBackupLocation(params.get(CoreAdminParams.BACKUP_LOCATION));
+ if(location == null) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ + " parameter or as a default repository property");
}
try (SolrCore core = callInfo.handler.coreContainer.getCore(cname)) {
@@ -912,21 +906,13 @@ enum CoreAdminOperation {
throw new IllegalArgumentException(CoreAdminParams.NAME + " is required");
}
- SolrResourceLoader loader = callInfo.handler.coreContainer.getResourceLoader();
- BackupRepository repository;
- String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME);
- if(repoName != null) {
- repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader, repoName);
- } else { // Fetch the default.
- repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader);
- }
+ String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY);
+ BackupRepository repository = callInfo.handler.coreContainer.newBackupRepository(Optional.ofNullable(repoName));
- String location = params.get(ZkStateReader.BACKUP_LOCATION);
- if (location == null) {
- location = repository.getConfigProperty(ZkStateReader.BACKUP_LOCATION);
- if (location == null) {
- throw new IllegalArgumentException("location is required");
- }
+ String location = repository.getBackupLocation(params.get(CoreAdminParams.BACKUP_LOCATION));
+ if(location == null) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query"
+ + " parameter or as a default repository property");
}
try (SolrCore core = callInfo.handler.coreContainer.getCore(cname)) {
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 1c42b034321..1942232115a 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -20,9 +20,13 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValuesType;
@@ -41,6 +45,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.StringUtils;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
@@ -97,8 +102,16 @@ public class RealTimeGetComponent extends SearchComponent
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
-
- String val = params.get("getVersions");
+
+ // This seems rather kludgey, may there is better way to indicate
+ // that replica can support handling version ranges
+ String val = params.get("checkCanHandleVersionRanges");
+ if(val != null) {
+ rb.rsp.add("canHandleVersionRanges", true);
+ return;
+ }
+
+ val = params.get("getVersions");
if (val != null) {
processGetVersions(rb);
return;
@@ -667,7 +680,14 @@ public class RealTimeGetComponent extends SearchComponent
UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog();
if (ulog == null) return;
- List versions = StrUtils.splitSmart(versionsStr, ",", true);
+ // handle version ranges
+ List versions = null;
+ if (versionsStr.indexOf("...") != -1) {
+ versions = resolveVersionRanges(versionsStr, ulog);
+ } else {
+ versions = StrUtils.splitSmart(versionsStr, ",", true).stream().map(Long::parseLong)
+ .collect(Collectors.toList());
+ }
List