diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3d29039ecec..82e81d1e352 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -5,10 +5,6 @@ http://s.apache.org/luceneversions ======================= Lucene 6.2.0 ======================= -Bug Fixes - -* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand) - New Features * LUCENE-6968: LSH Filter (Tommaso Teofili, Andy Hind, Cao Manh Dat) @@ -25,6 +21,13 @@ New Features analyzer for the Ukrainian language (Andriy Rysin via Mike McCandless) +Bug Fixes + +* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand) + +* LUCENE-7340: MemoryIndex.toString() could throw NPE; fixed. Renamed to toStringDebug(). + (Daniel Collins, David Smiley) + Improvements * LUCENE-7323: Compound file writing now verifies the incoming @@ -62,6 +65,10 @@ Improvements ScandinavianNormalizationFilterFactory now implement MultiTermAwareComponent. (Adrien Grand) +* LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to + control whether to split on whitespace prior to text analysis. Default + behavior remains unchanged: split-on-whitespace=true. (Steve Rowe) + Optimizations * LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java index f8874eb13e4..d826a60d677 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java @@ -36,6 +36,7 @@ import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.MockGraphTokenFilter; import org.apache.lucene.analysis.MockHoleInjectingTokenFilter; import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter; +import org.apache.lucene.analysis.MockSynonymFilter; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockVariableLengthPayloadFilter; @@ -75,6 +76,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase { MockGraphTokenFilter.class, MockHoleInjectingTokenFilter.class, MockRandomLookaheadTokenFilter.class, + MockSynonymFilter.class, MockTokenFilter.class, MockVariableLengthPayloadFilter.class, ValidatingTokenFilter.class, diff --git a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt new file mode 100644 index 00000000000..114260476e4 --- /dev/null +++ b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/mapping_uk.txt @@ -0,0 +1,19 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# This map normalizes some characters used in Ukrainian text +"\u2019" => "'" +"\u02BC" => "'" + +# Remove accent +"\u0301" => "" diff --git a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict index 679e39251c3..246897061aa 100644 Binary files a/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict and b/lucene/analysis/morfologik/src/resources/org/apache/lucene/analysis/uk/ukrainian.dict differ diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java index 87d3be53432..a38fc63e873 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/uk/TestUkrainianAnalyzer.java @@ -37,22 +37,29 @@ public class TestUkrainianAnalyzer extends BaseTokenStreamTestCase { public void testReusableTokenStream() throws Exception { Analyzer a = new UkrainianMorfologikAnalyzer(); - assertAnalyzesTo(a, "Ця п'єса у свою чергу рухається по колу.", - new String[] { "п'єса", "черга", "рухатися", "кола", "коло", "коло", "кіл", "кіл" }); + assertAnalyzesTo(a, "Ця п'єса, у свою чергу, рухається по емоційно-напруженому колу за ритм-енд-блюзом.", + new String[] { "п'єса", "черга", "рухатися", "емоційно", "напружений", "кола", "коло", "кіл", "ритм", "енд", "блюз" }); a.close(); } public void testSpecialCharsTokenStream() throws Exception { Analyzer a = new UkrainianMorfologikAnalyzer(); - assertAnalyzesTo(a, "Ця пʼєса, у сво́ю чергу, рухається по колу.", - new String[] { "п'єса", "черга", "рухатися", "кола", "коло", "коло", "кіл", "кіл" }); + assertAnalyzesTo(a, "Ця пʼєса, у сво́ю чергу рухається.", + new String[] { "п'єса", "черга", "рухатися" }); a.close(); } public void testCapsTokenStream() throws Exception { Analyzer a = new UkrainianMorfologikAnalyzer(); - assertAnalyzesTo(a, "Цей Чайковський.", - new String[] { "чайковський" }); + assertAnalyzesTo(a, "Цей Чайковський і Ґете.", + new String[] { "чайковський", "ґете" }); + a.close(); + } + + public void testSampleSentence() throws Exception { + Analyzer a = new UkrainianMorfologikAnalyzer(); + assertAnalyzesTo(a, "Це — проект генерування словника з тегами частин мови для української мови.", + new String[] { "проект", "генерування", "словник", "тег", "частина", "мова", "українська", "український", "мова" }); a.close(); } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java index a33d64072c9..2429c330f28 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -148,6 +148,10 @@ final class DocumentsWriter implements Closeable, Accountable { return seqNo; } + synchronized void setLastSeqNo(long seqNo) { + lastSeqNo = seqNo; + } + // TODO: we could check w/ FreqProxTermsWriter: if the // term doesn't exist, don't bother buffering into the // per-DWPT map (but still must go into the global map) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index f0d756bdfb5..fd2553671ce 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -765,8 +765,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * *

* NOTE: after ths writer is created, the given configuration instance - * cannot be passed to another writer. If you intend to do so, you should - * {@link IndexWriterConfig#clone() clone} it beforehand. + * cannot be passed to another writer. * * @param d * the index directory. The index is either created or appended @@ -2348,7 +2347,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { globalFieldNumberMap.clear(); success = true; - return docWriter.deleteQueue.getNextSequenceNumber(); + long seqNo = docWriter.deleteQueue.getNextSequenceNumber(); + docWriter.setLastSeqNo(seqNo); + return seqNo; } finally { docWriter.unlockAllAfterAbortAll(this); diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java index c2246823dcf..3e26965deee 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -137,40 +136,4 @@ public class TestStopFilter extends BaseTokenStreamTestCase { System.out.println(s); } } - - // stupid filter that inserts synonym of 'hte' for 'the' - private class MockSynonymFilter extends TokenFilter { - State bufferedState; - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - - MockSynonymFilter(TokenStream input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - if (bufferedState != null) { - restoreState(bufferedState); - posIncAtt.setPositionIncrement(0); - termAtt.setEmpty().append("hte"); - bufferedState = null; - return true; - } else if (input.incrementToken()) { - if (termAtt.toString().equals("the")) { - bufferedState = captureState(); - } - return true; - } else { - return false; - } - } - - @Override - public void reset() throws IOException { - super.reset(); - bufferedState = null; - } - } - } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java index 779c1f21a1f..a1b2a5c2917 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestControlledRealTimeReopenThread.java @@ -534,4 +534,19 @@ public class TestControlledRealTimeReopenThread extends ThreadedIndexingAndSearc iw.close(); dir.close(); } + + public void testDeleteAll() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + SearcherManager mgr = new SearcherManager(w, new SearcherFactory()); + nrtDeletesThread = new ControlledRealTimeReopenThread<>(w, mgr, 0.1, 0.01); + nrtDeletesThread.setName("NRTDeletes Reopen Thread"); + nrtDeletesThread.setDaemon(true); + nrtDeletesThread.start(); + + long gen1 = w.addDocument(new Document()); + long gen2 = w.deleteAll(); + nrtDeletesThread.waitForGeneration(gen2); + IOUtils.close(nrtDeletesThread, nrtDeletes, w, dir); + } } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java index 205fbab0981..d3019e3d077 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestQueryBuilder.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockSynonymFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -121,7 +122,7 @@ public class TestQueryBuilder extends LuceneTestCase { assertNull(builder.createBooleanQuery("field", "")); } - /** adds synonym of "dog" for "dogs". */ + /** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */ static class MockSynonymAnalyzer extends Analyzer { @Override protected TokenStreamComponents createComponents(String fieldName) { @@ -130,37 +131,6 @@ public class TestQueryBuilder extends LuceneTestCase { } } - /** - * adds synonym of "dog" for "dogs". - */ - protected static class MockSynonymFilter extends TokenFilter { - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - boolean addSynonym = false; - - public MockSynonymFilter(TokenStream input) { - super(input); - } - - @Override - public final boolean incrementToken() throws IOException { - if (addSynonym) { // inject our synonym - clearAttributes(); - termAtt.setEmpty().append("dog"); - posIncAtt.setPositionIncrement(0); - addSynonym = false; - return true; - } - - if (input.incrementToken()) { - addSynonym = termAtt.toString().equals("dogs"); - return true; - } else { - return false; - } - } - } - /** simple synonyms test */ public void testSynonyms() throws Exception { SynonymQuery expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog")); @@ -180,6 +150,15 @@ public class TestQueryBuilder extends LuceneTestCase { assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs")); } + /** forms multiphrase query */ + public void testMultiWordSynonymsPhrase() throws Exception { + MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder(); + expectedBuilder.add(new Term[] { new Term("field", "guinea"), new Term("field", "cavy") }); + expectedBuilder.add(new Term("field", "pig")); + QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer()); + assertEquals(expectedBuilder.build(), queryBuilder.createPhraseQuery("field", "guinea pig")); + } + protected static class SimpleCJKTokenizer extends Tokenizer { private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index cde20e57670..cdd53ed9e2f 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -43,10 +43,21 @@ import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.Sort; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.util.*; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefArray; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IntBlockPool; import org.apache.lucene.util.IntBlockPool.SliceReader; import org.apache.lucene.util.IntBlockPool.SliceWriter; +import org.apache.lucene.util.RecyclingByteBlockAllocator; +import org.apache.lucene.util.RecyclingIntBlockAllocator; +import org.apache.lucene.util.StringHelper; /** * High-performance single-document main memory Apache Lucene fulltext search index. @@ -746,13 +757,14 @@ public class MemoryIndex { * Returns a String representation of the index data for debugging purposes. * * @return the string representation + * @lucene.experimental */ - @Override - public String toString() { + public String toStringDebug() { StringBuilder result = new StringBuilder(256); int sumPositions = 0; int sumTerms = 0; final BytesRef spare = new BytesRef(); + final BytesRefBuilder payloadBuilder = storePayloads ? new BytesRefBuilder() : null; for (Map.Entry entry : fields.entrySet()) { String fieldName = entry.getKey(); Info info = entry.getValue(); @@ -778,9 +790,16 @@ public class MemoryIndex { result.append(", "); } } + if (storePayloads) { + int payloadIndex = postingsReader.readInt(); + if (payloadIndex != -1) { + result.append(", " + payloadsBytesRefs.get(payloadBuilder, payloadIndex)); + } + } result.append(")"); + if (!postingsReader.endOfSlice()) { - result.append(","); + result.append(", "); } } diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java index 57514578b16..2f95a4e5cca 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java @@ -464,4 +464,26 @@ public class TestMemoryIndex extends LuceneTestCase { assertEquals("term", leafReader.getBinaryDocValues("field").get(0).utf8ToString()); } + public void testToStringDebug() { + MemoryIndex mi = new MemoryIndex(true, true); + Analyzer analyzer = new MockPayloadAnalyzer(); + + mi.addField("analyzedField", "aa bb aa", analyzer); + + FieldType type = new FieldType(); + type.setDimensions(1, 4); + type.setDocValuesType(DocValuesType.BINARY); + type.freeze(); + mi.addField(new BinaryPoint("pointAndDvField", "term".getBytes(StandardCharsets.UTF_8), type), analyzer); + + assertEquals("analyzedField:\n" + + "\t'[61 61]':2: [(0, 0, 2, [70 6f 73 3a 20 30]), (1, 6, 8, [70 6f 73 3a 20 32])]\n" + + "\t'[62 62]':1: [(1, 3, 5, [70 6f 73 3a 20 31])]\n" + + "\tterms=2, positions=3\n" + + "pointAndDvField:\n" + + "\tterms=0, positions=0\n" + + "\n" + + "fields=2, terms=2, positions=3", mi.toStringDebug()); + } + } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java index 85b14614435..2c5fcbabde5 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/CharStream.java @@ -112,4 +112,4 @@ interface CharStream { void Done(); } -/* JavaCC - OriginalChecksum=c847dd1920bf7901125a7244125682ad (do not edit this line) */ +/* JavaCC - OriginalChecksum=30b94cad7b10d0d81e3a59a1083939d0 (do not edit this line) */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java index b9963ec1bd5..69a7559b71a 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java @@ -27,6 +27,7 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; /** * A QueryParser which constructs queries to search multiple fields. @@ -148,18 +149,54 @@ public class MultiFieldQueryParser extends QueryParser protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); + Query[] fieldQueries = new Query[fields.length]; + int maxTerms = 0; for (int i = 0; i < fields.length; i++) { Query q = super.getFieldQuery(fields[i], queryText, quoted); if (q != null) { - //If the user passes a map of boosts - if (boosts != null) { - //Get the boost from the map and apply them - Float boost = boosts.get(fields[i]); - if (boost != null) { - q = new BoostQuery(q, boost.floatValue()); + if (q instanceof TermQuery) { + maxTerms = Math.max(1, maxTerms); + } else if (q instanceof BooleanQuery) { + maxTerms = Math.max(maxTerms, ((BooleanQuery)q).clauses().size()); + } + fieldQueries[i] = q; + } + } + for (int termNum = 0; termNum < maxTerms; termNum++) { + List termClauses = new ArrayList<>(); + for (int i = 0; i < fields.length; i++) { + if (fieldQueries[i] != null) { + Query q = null; + if (fieldQueries[i] instanceof BooleanQuery) { + List nestedClauses = ((BooleanQuery)fieldQueries[i]).clauses(); + if (termNum < nestedClauses.size()) { + q = nestedClauses.get(termNum).getQuery(); + } + } else if (termNum == 0) { // e.g. TermQuery-s + q = fieldQueries[i]; + } + if (q != null) { + if (boosts != null) { + //Get the boost from the map and apply them + Float boost = boosts.get(fields[i]); + if (boost != null) { + q = new BoostQuery(q, boost); + } + } + termClauses.add(q); } } - clauses.add(q); + } + if (maxTerms > 1) { + if (termClauses.size() > 0) { + BooleanQuery.Builder builder = newBooleanQuery(); + for (Query termClause : termClauses) { + builder.add(termClause, BooleanClause.Occur.SHOULD); + } + clauses.add(builder.build()); + } + } else { + clauses.addAll(termClauses); } } if (clauses.size() == 0) // happens for stopwords diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java index a0ddab2d363..3c02be3f004 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/ParseException.java @@ -184,4 +184,4 @@ public class ParseException extends Exception { } } -/* JavaCC - OriginalChecksum=61602edcb3a15810cbc58f5593eba40d (do not edit this line) */ +/* JavaCC - OriginalChecksum=b187d97d5bb75c3fc63d642c1c26ac6e (do not edit this line) */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index 08a477e79b4..c137d3043b7 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -3,8 +3,11 @@ package org.apache.lucene.queryparser.classic; import java.io.StringReader; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; @@ -81,6 +84,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants */ static public enum Operator { OR, AND } + /** default split on whitespace behavior */ + public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true; + /** Create a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. @@ -90,6 +96,28 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants init(f, a); } + /** + * @see #setSplitOnWhitespace(boolean) + */ + public boolean getSplitOnWhitespace() { + return splitOnWhitespace; + } + + /** + * Whether query text should be split on whitespace prior to analysis. + * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + */ + public void setSplitOnWhitespace(boolean splitOnWhitespace) { + this.splitOnWhitespace = splitOnWhitespace; + } + + private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; + private static Set disallowedPostMultiTerm + = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static boolean allowedPostMultiTerm(int tokenKind) { + return disallowedPostMultiTerm.contains(tokenKind) == false; + } + // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) final public int Conjunction() throws ParseException { @@ -129,15 +157,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case PLUS: jj_consume_token(PLUS); - ret = MOD_REQ; + ret = MOD_REQ; break; case MINUS: jj_consume_token(MINUS); - ret = MOD_NOT; + ret = MOD_NOT; break; case NOT: jj_consume_token(NOT); - ret = MOD_NOT; + ret = MOD_NOT; break; default: jj_la1[2] = jj_gen; @@ -166,11 +194,37 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants List clauses = new ArrayList(); Query q, firstQuery=null; int conj, mods; - mods = Modifiers(); - q = Clause(field); - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; + if (jj_2_1(2)) { + firstQuery = MultiTerm(field, clauses); + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case NOT: + case PLUS: + case MINUS: + case BAREOPER: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + mods = Modifiers(); + q = Clause(field); + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) { + firstQuery = q; + } + break; + default: + jj_la1[4] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } label_1: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -193,39 +247,66 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants ; break; default: - jj_la1[4] = jj_gen; + jj_la1[5] = jj_gen; break label_1; } - conj = Conjunction(); - mods = Modifiers(); - q = Clause(field); - addClause(clauses, conj, mods, q); - } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} - else { - {if (true) return getBooleanQuery(clauses);} + if (jj_2_2(2)) { + MultiTerm(field, clauses); + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + case OR: + case NOT: + case PLUS: + case MINUS: + case BAREOPER: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + conj = Conjunction(); + mods = Modifiers(); + q = Clause(field); + addClause(clauses, conj, mods, q); + break; + default: + jj_la1[6] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } } + } + if (clauses.size() == 1 && firstQuery != null) { + {if (true) return firstQuery;} + } else { + {if (true) return getBooleanQuery(clauses);} + } throw new Error("Missing return statement in function"); } final public Query Clause(String field) throws ParseException { Query q; Token fieldToken=null, boost=null; - if (jj_2_1(2)) { + if (jj_2_3(2)) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); - field=discardEscapeChar(fieldToken.image); + field=discardEscapeChar(fieldToken.image); break; case STAR: jj_consume_token(STAR); jj_consume_token(COLON); - field="*"; + field="*"; break; default: - jj_la1[5] = jj_gen; + jj_la1[7] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -255,16 +336,16 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants boost = jj_consume_token(NUMBER); break; default: - jj_la1[6] = jj_gen; + jj_la1[8] = jj_gen; ; } break; default: - jj_la1[7] = jj_gen; + jj_la1[9] = jj_gen; jj_consume_token(-1); throw new ParseException(); } - {if (true) return handleBoost(q, boost);} + {if (true) return handleBoost(q, boost);} throw new Error("Missing return statement in function"); } @@ -291,73 +372,86 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants break; case STAR: term = jj_consume_token(STAR); - wildcard=true; + wildcard=true; break; case PREFIXTERM: term = jj_consume_token(PREFIXTERM); - prefix=true; + prefix=true; break; case WILDTERM: term = jj_consume_token(WILDTERM); - wildcard=true; + wildcard=true; break; case REGEXPTERM: term = jj_consume_token(REGEXPTERM); - regexp=true; + regexp=true; break; case NUMBER: term = jj_consume_token(NUMBER); break; case BAREOPER: term = jj_consume_token(BAREOPER); - term.image = term.image.substring(0,1); + term.image = term.image.substring(0,1); break; default: - jj_la1[8] = jj_gen; + jj_la1[10] = jj_gen; jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[9] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); + case FUZZY_SLOP: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[11] = jj_gen; + ; + } + break; case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; + fuzzy=true; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[12] = jj_gen; + ; + } break; default: - jj_la1[10] = jj_gen; - ; + jj_la1[13] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); } break; default: - jj_la1[11] = jj_gen; + jj_la1[14] = jj_gen; ; } - q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); + q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); break; case RANGEIN_START: case RANGEEX_START: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RANGEIN_START: jj_consume_token(RANGEIN_START); - startInc=true; + startInc = true; break; case RANGEEX_START: jj_consume_token(RANGEEX_START); break; default: - jj_la1[12] = jj_gen; + jj_la1[15] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -369,7 +463,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants goop1 = jj_consume_token(RANGE_QUOTED); break; default: - jj_la1[13] = jj_gen; + jj_la1[16] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -378,7 +472,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants jj_consume_token(RANGE_TO); break; default: - jj_la1[14] = jj_gen; + jj_la1[17] = jj_gen; ; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -389,20 +483,20 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants goop2 = jj_consume_token(RANGE_QUOTED); break; default: - jj_la1[15] = jj_gen; + jj_la1[18] = jj_gen; jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RANGEIN_END: jj_consume_token(RANGEIN_END); - endInc=true; + endInc = true; break; case RANGEEX_END: jj_consume_token(RANGEEX_END); break; default: - jj_la1[16] = jj_gen; + jj_la1[19] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -412,46 +506,69 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants boost = jj_consume_token(NUMBER); break; default: - jj_la1[17] = jj_gen; + jj_la1[20] = jj_gen; ; } - boolean startOpen=false; - boolean endOpen=false; - if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else if ("*".equals(goop1.image)) { - startOpen=true; - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else if ("*".equals(goop2.image)) { - endOpen=true; - } - q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); + boolean startOpen=false; + boolean endOpen=false; + if (goop1.kind == RANGE_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else if ("*".equals(goop1.image)) { + startOpen=true; + } + if (goop2.kind == RANGE_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else if ("*".equals(goop2.image)) { + endOpen=true; + } + q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); break; case QUOTED: term = jj_consume_token(QUOTED); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - default: - jj_la1[18] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); + case FUZZY_SLOP: + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[21] = jj_gen; + ; + } + break; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[22] = jj_gen; + ; + } + break; + default: + jj_la1[23] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } break; default: - jj_la1[19] = jj_gen; + jj_la1[24] = jj_gen; ; } - q = handleQuotedTerm(field, term, fuzzySlop); + q = handleQuotedTerm(field, term, fuzzySlop); break; default: - jj_la1[20] = jj_gen; + jj_la1[25] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -459,6 +576,44 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants throw new Error("Missing return statement in function"); } +/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */ + final public Query MultiTerm(String field, List clauses) throws ParseException { + Token text, whitespace, followingText; + Query firstQuery = null; + text = jj_consume_token(TERM); + if (splitOnWhitespace) { + firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); + addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery); + } + if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) { + + } else { + jj_consume_token(-1); + throw new ParseException(); + } + label_2: + while (true) { + followingText = jj_consume_token(TERM); + if (splitOnWhitespace) { + Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { // build up the text to send to analysis + text.image += " " + followingText.image; + } + if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) { + ; + } else { + break label_2; + } + } + if (splitOnWhitespace == false) { + firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); + addMultiTermClauses(clauses, firstQuery); + } + {if (true) return firstQuery;} + throw new Error("Missing return statement in function"); + } + private boolean jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { return !jj_3_1(); } @@ -466,23 +621,71 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants finally { jj_save(0, xla); } } - private boolean jj_3R_2() { + private boolean jj_2_2(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_2(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(1, xla); } + } + + private boolean jj_2_3(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_3(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(2, xla); } + } + + private boolean jj_3R_4() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; return false; } + private boolean jj_3_2() { + if (jj_3R_3()) return true; + return false; + } + private boolean jj_3_1() { + if (jj_3R_3()) return true; + return false; + } + + private boolean jj_3R_7() { + if (jj_scan_token(TERM)) return true; + return false; + } + + private boolean jj_3_3() { Token xsp; xsp = jj_scanpos; - if (jj_3R_2()) { + if (jj_3R_4()) { jj_scanpos = xsp; - if (jj_3R_3()) return true; + if (jj_3R_5()) return true; } return false; } private boolean jj_3R_3() { + if (jj_scan_token(TERM)) return true; + jj_lookingAhead = true; + jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); + jj_lookingAhead = false; + if (!jj_semLA || jj_3R_6()) return true; + Token xsp; + if (jj_3R_7()) return true; + while (true) { + xsp = jj_scanpos; + if (jj_3R_7()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_6() { + return false; + } + + private boolean jj_3R_5() { if (jj_scan_token(STAR)) return true; if (jj_scan_token(COLON)) return true; return false; @@ -497,8 +700,11 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants private int jj_ntk; private Token jj_scanpos, jj_lastpos; private int jj_la; + /** Whether we are looking ahead. */ + private boolean jj_lookingAhead = false; + private boolean jj_semLA; private int jj_gen; - final private int[] jj_la1 = new int[21]; + final private int[] jj_la1 = new int[26]; static private int[] jj_la1_0; static private int[] jj_la1_1; static { @@ -506,12 +712,12 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x200000,0x40000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0xfda2000,}; + jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7c00,0xfda7f00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x40000,0x240000,0x240000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0x240000,0x240000,0xfda2000,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,}; + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,}; } - final private JJCalls[] jj_2_rtns = new JJCalls[1]; + final private JJCalls[] jj_2_rtns = new JJCalls[3]; private boolean jj_rescan = false; private int jj_gc = 0; @@ -521,7 +727,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 21; i++) jj_la1[i] = -1; + for (int i = 0; i < 26; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -530,8 +736,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants token_source.ReInit(stream); token = new Token(); jj_ntk = -1; + jj_lookingAhead = false; jj_gen = 0; - for (int i = 0; i < 21; i++) jj_la1[i] = -1; + for (int i = 0; i < 26; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -541,7 +748,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 21; i++) jj_la1[i] = -1; + for (int i = 0; i < 26; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -551,7 +758,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 21; i++) jj_la1[i] = -1; + for (int i = 0; i < 26; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -614,7 +821,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants /** Get the specific Token. */ final public Token getToken(int index) { - Token t = token; + Token t = jj_lookingAhead ? jj_scanpos : token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; else t = t.next = token_source.getNextToken(); @@ -668,7 +875,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants la1tokens[jj_kind] = true; jj_kind = -1; } - for (int i = 0; i < 21; i++) { + for (int i = 0; i < 26; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1<{@value #DEFAULT_SPLIT_ON_WHITESPACE}. + */ + public void setSplitOnWhitespace(boolean splitOnWhitespace) { + this.splitOnWhitespace = splitOnWhitespace; + } + + private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; + private static Set disallowedPostMultiTerm + = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static boolean allowedPostMultiTerm(int tokenKind) { + return disallowedPostMultiTerm.contains(tokenKind) == false; + } } PARSER_END(QueryParser) @@ -123,15 +150,14 @@ PARSER_END(QueryParser) /* ***************** */ <*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -// every character that follows a backslash is considered as an escaped character -| <#_ESCAPED_CHAR: "\\" ~[] > + <#_NUM_CHAR: ["0"-"9"] > +| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > -| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > +| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > } SKIP : { @@ -139,37 +165,37 @@ PARSER_END(QueryParser) } TOKEN : { - -| -| -| -| -| > -| -| -| -| -| : Boost -| )* "\""> -| (<_TERM_CHAR>)* > -| )+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) > -| (<_TERM_CHAR>)* "*" ) > -| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -| + +| +| +| +| +| > +| +| +| +| +| : Boost +| )* "\""> +| (<_TERM_CHAR>)* > +| )+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) > +| (<_TERM_CHAR>)* "*" ) > +| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| | : Range | : Range } TOKEN : { -)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT + )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT } TOKEN : { - -| : DEFAULT -| : DEFAULT + +| : DEFAULT +| : DEFAULT | -| +| } // * Query ::= ( Clause )* @@ -191,23 +217,20 @@ int Modifiers() : { } { [ - { ret = MOD_REQ; } - | { ret = MOD_NOT; } - | { ret = MOD_NOT; } + { ret = MOD_REQ; } + | { ret = MOD_NOT; } + | { ret = MOD_NOT; } ] { return ret; } } // This makes sure that there is no garbage after the query string -Query TopLevelQuery(String field) : -{ +Query TopLevelQuery(String field) : { Query q; } { q=Query(field) - { - return q; - } + { return q; } } Query Query(String field) : @@ -217,23 +240,30 @@ Query Query(String field) : int conj, mods; } { - mods=Modifiers() q=Clause(field) - { - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; - } ( - conj=Conjunction() mods=Modifiers() q=Clause(field) - { addClause(clauses, conj, mods, q); } - )* - { - if (clauses.size() == 1 && firstQuery != null) - return firstQuery; - else { - return getBooleanQuery(clauses); + LOOKAHEAD(2) + firstQuery=MultiTerm(field, clauses) + | mods=Modifiers() q=Clause(field) + { + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) { + firstQuery = q; + } } + ) + ( + LOOKAHEAD(2) + MultiTerm(field, clauses) + | conj=Conjunction() mods=Modifiers() q=Clause(field) + { addClause(clauses, conj, mods, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) { + return firstQuery; + } else { + return getBooleanQuery(clauses); } + } } Query Clause(String field) : { @@ -244,20 +274,17 @@ Query Clause(String field) : { [ LOOKAHEAD(2) ( - fieldToken= {field=discardEscapeChar(fieldToken.image);} - | {field="*";} + fieldToken= {field=discardEscapeChar(fieldToken.image);} + | {field="*";} ) ] - ( - q=Term(field) - | q=Query(field) ( boost=)? - + q=Term(field) + | q=Query(field) [ boost= ] ) - { return handleBoost(q, boost); } + { return handleBoost(q, boost); } } - Query Term(String field) : { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; @@ -270,45 +297,85 @@ Query Term(String field) : { } { ( - ( - term= - | term= { wildcard=true; } - | term= { prefix=true; } - | term= { wildcard=true; } - | term= { regexp=true; } - | term= - | term= { term.image = term.image.substring(0,1); } - ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] - { - q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); - } - | ( ( {startInc=true;} | ) - ( goop1=|goop1= ) - [ ] - ( goop2=|goop2= ) - ( {endInc=true;} | )) - [ boost= ] - { - boolean startOpen=false; - boolean endOpen=false; - if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else if ("*".equals(goop1.image)) { - startOpen=true; - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else if ("*".equals(goop2.image)) { - endOpen=true; - } - q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); - } - | term= - [ fuzzySlop= ] - [ boost= ] - { q = handleQuotedTerm(field, term, fuzzySlop); } + ( + term= + | term= { wildcard=true; } + | term= { prefix=true; } + | term= { wildcard=true; } + | term= { regexp=true; } + | term= + | term= { term.image = term.image.substring(0,1); } + ) + [ + boost= [ fuzzySlop= { fuzzy=true; } ] + | fuzzySlop= { fuzzy=true; } [ boost= ] + ] + { q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); } + + | ( { startInc = true; } | ) + ( goop1= | goop1= ) + [ ] + ( goop2= | goop2= ) + ( { endInc = true; } | ) + [ boost= ] + { + boolean startOpen=false; + boolean endOpen=false; + if (goop1.kind == RANGE_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else if ("*".equals(goop1.image)) { + startOpen=true; + } + if (goop2.kind == RANGE_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else if ("*".equals(goop2.image)) { + endOpen=true; + } + q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); + } + + | term= + [ + boost= [ fuzzySlop= { fuzzy=true; } ] + | fuzzySlop= { fuzzy=true; } [ boost= ] + ] + { q = handleQuotedTerm(field, term, fuzzySlop); } ) { return handleBoost(q, boost); } } + +/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */ +Query MultiTerm(String field, List clauses) : { + Token text, whitespace, followingText; + Query firstQuery = null; +} +{ + text= + { + if (splitOnWhitespace) { + firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); + addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery); + } + } + // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest + LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) + ( + LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) + followingText= + { + if (splitOnWhitespace) { + Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { // build up the text to send to analysis + text.image += " " + followingText.image; + } + } + )+ + { + if (splitOnWhitespace == false) { + firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); + addMultiTermClauses(clauses, firstQuery); + } + return firstQuery; + } +} \ No newline at end of file diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index c00d88eecff..cdfa4776175 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -464,6 +464,45 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer throw new RuntimeException("Clause cannot be both required and prohibited"); } + /** + * Adds clauses generated from analysis over text containing whitespace. + * There are no operators, so the query's clauses can either be MUST (if the + * default operator is AND) or SHOULD (default OR). + * + * If all of the clauses in the given Query are TermQuery-s, this method flattens the result + * by adding the TermQuery-s individually to the output clause list; otherwise, the given Query + * is added as a single clause including its nested clauses. + */ + protected void addMultiTermClauses(List clauses, Query q) { + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) { + return; + } + boolean allNestedTermQueries = false; + if (q instanceof BooleanQuery) { + allNestedTermQueries = true; + for (BooleanClause clause : ((BooleanQuery)q).clauses()) { + if ( ! (clause.getQuery() instanceof TermQuery)) { + allNestedTermQueries = false; + break; + } + } + } + if (allNestedTermQueries) { + clauses.addAll(((BooleanQuery)q).clauses()); + } else { + BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST; + if (q instanceof BooleanQuery) { + for (BooleanClause clause : ((BooleanQuery)q).clauses()) { + clauses.add(newBooleanClause(clause.getQuery(), occur)); + } + } else { + clauses.add(newBooleanClause(q, occur)); + } + } + } + /** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java index 8c8951e1b83..065ff8b4411 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java @@ -285,7 +285,7 @@ private int jjMoveNfa_2(int startState, int curPos) jjCheckNAddTwoStates(33, 34); } else if (curChar == 92) - jjCheckNAddTwoStates(35, 35); + jjCheckNAdd(35); break; case 0: if ((0x97ffffff87ffffffL & l) != 0L) @@ -384,7 +384,7 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 26: if (curChar == 92) - jjAddStates(27, 28); + jjstateSet[jjnewStateCnt++] = 27; break; case 27: if (kind > 21) @@ -400,7 +400,7 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 29: if (curChar == 92) - jjAddStates(29, 30); + jjstateSet[jjnewStateCnt++] = 30; break; case 30: if (kind > 21) @@ -423,7 +423,7 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 34: if (curChar == 92) - jjCheckNAddTwoStates(35, 35); + jjCheckNAdd(35); break; case 35: if (kind > 23) @@ -453,7 +453,7 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 43: if (curChar == 92) - jjCheckNAddTwoStates(44, 44); + jjCheckNAdd(44); break; case 44: if (kind > 20) @@ -466,7 +466,7 @@ private int jjMoveNfa_2(int startState, int curPos) break; case 46: if (curChar == 92) - jjCheckNAddTwoStates(47, 47); + jjCheckNAdd(47); break; case 47: jjCheckNAddStates(18, 20); @@ -645,7 +645,7 @@ private int jjMoveNfa_0(int startState, int curPos) break; if (kind > 27) kind = 27; - jjAddStates(31, 32); + jjAddStates(27, 28); break; case 1: if (curChar == 46) @@ -799,11 +799,11 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddStates(33, 35); + jjCheckNAddStates(29, 31); break; case 3: if (curChar == 34) - jjCheckNAddStates(33, 35); + jjCheckNAddStates(29, 31); break; case 5: if (curChar == 34 && kind > 31) @@ -836,7 +836,7 @@ private int jjMoveNfa_1(int startState, int curPos) jjCheckNAdd(6); break; case 2: - jjAddStates(33, 35); + jjAddStates(29, 31); break; case 4: if (curChar == 92) @@ -872,7 +872,7 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(33, 35); + jjAddStates(29, 31); break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) @@ -899,9 +899,8 @@ private int jjMoveNfa_1(int startState, int curPos) } } static final int[] jjnextStates = { - 37, 39, 40, 17, 18, 20, 42, 45, 31, 46, 43, 22, 23, 25, 26, 24, - 25, 26, 45, 31, 46, 44, 47, 35, 22, 28, 29, 27, 27, 30, 30, 0, - 1, 2, 4, 5, + 37, 39, 40, 17, 18, 20, 42, 43, 45, 46, 31, 22, 23, 25, 26, 24, + 25, 26, 45, 46, 31, 44, 47, 35, 22, 28, 29, 0, 1, 2, 4, 5, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java index aa57487f4c2..0e52ec21969 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/Token.java @@ -128,4 +128,4 @@ public class Token implements java.io.Serializable { } } -/* JavaCC - OriginalChecksum=c1e1418b35aa9e47ef8dc98b87423d70 (do not edit this line) */ +/* JavaCC - OriginalChecksum=405bb5d2fcd84e94ac1c8f0b12c1f914 (do not edit this line) */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java index 7101f098f6e..ad111d0cd26 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/TokenMgrError.java @@ -144,4 +144,4 @@ public class TokenMgrError extends Error this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); } } -/* JavaCC - OriginalChecksum=0c275864a1972d9a01601ab81426872d (do not edit this line) */ +/* JavaCC - OriginalChecksum=f433e1a52b8eadbf12f3fbbbf87fd140 (do not edit this line) */ diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index 5b4eba87994..c3d7b37f5c1 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; @@ -44,7 +46,9 @@ import java.io.IOException; * Tests QueryParser. */ public class TestQueryParser extends QueryParserTestBase { - + + protected boolean splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE; + public static class QPTestParser extends QueryParser { public QPTestParser(String f, Analyzer a) { super(f, a); @@ -67,6 +71,7 @@ public class TestQueryParser extends QueryParserTestBase { if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); QueryParser qp = new QueryParser(getDefaultField(), a); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); + qp.setSplitOnWhitespace(splitOnWhitespace); return qp; } @@ -310,18 +315,7 @@ public class TestQueryParser extends QueryParserTestBase { Query unexpanded = new TermQuery(new Term("field", "dogs")); assertEquals(unexpanded, smart.parse("\"dogs\"")); } - - // TODO: fold these into QueryParserTestBase - - /** adds synonym of "dog" for "dogs". */ - static class MockSynonymAnalyzer extends Analyzer { - @Override - protected TokenStreamComponents createComponents(String fieldName) { - MockTokenizer tokenizer = new MockTokenizer(); - return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer)); - } - } - + /** simple synonyms test */ public void testSynonyms() throws Exception { Query expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog")); @@ -483,4 +477,229 @@ public class TestQueryParser extends QueryParserTestBase { qp.parse("a*aaaaaaa"); }); } -} + + // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support + @Override + public void testQPA() throws Exception { + boolean oldSplitOnWhitespace = splitOnWhitespace; + splitOnWhitespace = false; + + assertQueryEquals("term phrase term", qpAnalyzer, "term phrase1 phrase2 term"); + + CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); + setDefaultOperatorAND(cqpc); + assertQueryEquals(cqpc, "field", "term phrase term", "+term +phrase1 +phrase2 +term"); + + splitOnWhitespace = oldSplitOnWhitespace; + } + + // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability + public void testMultiWordSynonyms() throws Exception { + QueryParser dumb = new QueryParser("field", new Analyzer1()); + dumb.setSplitOnWhitespace(false); + + // A multi-word synonym source will form a synonym query for the same-starting-position tokens + BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder(); + Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy")); + multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD); + multiWordExpandedBqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); + Query multiWordExpandedBq = multiWordExpandedBqBuilder.build(); + assertEquals(multiWordExpandedBq, dumb.parse("guinea pig")); + + // With the phrase operator, a multi-word synonym source will form a multiphrase query. + // When the number of expanded term(s) is different from that of the original term(s), this is not good. + MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder(); + multiWordExpandedMpqBuilder.add(new Term[]{new Term("field", "guinea"), new Term("field", "cavy")}); + multiWordExpandedMpqBuilder.add(new Term("field", "pig")); + Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build(); + assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\"")); + + // custom behavior, the synonyms are expanded, unless you use quote operator + QueryParser smart = new SmartQueryParser(); + smart.setSplitOnWhitespace(false); + assertEquals(multiWordExpandedBq, smart.parse("guinea pig")); + + PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder(); + multiWordUnexpandedPqBuilder.add(new Term("field", "guinea")); + multiWordUnexpandedPqBuilder.add(new Term("field", "pig")); + Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build(); + assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\"")); + } + + // TODO: Move to QueryParserTestBase once standard flexible parser gets this capability + public void testOperatorsAndMultiWordSynonyms() throws Exception { + Analyzer a = new MockSynonymAnalyzer(); + + boolean oldSplitOnWhitespace = splitOnWhitespace; + splitOnWhitespace = false; + + // Operators should interrupt multiword analysis of adjacent words if they associate + assertQueryEquals("+guinea pig", a, "+guinea pig"); + assertQueryEquals("-guinea pig", a, "-guinea pig"); + assertQueryEquals("!guinea pig", a, "-guinea pig"); + assertQueryEquals("guinea* pig", a, "guinea* pig"); + assertQueryEquals("guinea? pig", a, "guinea? pig"); + assertQueryEquals("guinea~2 pig", a, "guinea~2 pig"); + assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig"); + + assertQueryEquals("guinea +pig", a, "guinea +pig"); + assertQueryEquals("guinea -pig", a, "guinea -pig"); + assertQueryEquals("guinea !pig", a, "guinea -pig"); + assertQueryEquals("guinea pig*", a, "guinea pig*"); + assertQueryEquals("guinea pig?", a, "guinea pig?"); + assertQueryEquals("guinea pig~2", a, "guinea pig~2"); + assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0"); + + assertQueryEquals("field:guinea pig", a, "guinea pig"); + assertQueryEquals("guinea field:pig", a, "guinea pig"); + + assertQueryEquals("NOT guinea pig", a, "-guinea pig"); + assertQueryEquals("guinea NOT pig", a, "guinea -pig"); + + assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)"); + assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig"); + assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)"); + assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig"); + + assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)"); + assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig"); + assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)"); + assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig"); + + assertQueryEquals("\"guinea\" pig", a, "guinea pig"); + assertQueryEquals("guinea \"pig\"", a, "guinea pig"); + + assertQueryEquals("(guinea) pig", a, "guinea pig"); + assertQueryEquals("guinea (pig)", a, "guinea pig"); + + assertQueryEquals("/guinea/ pig", a, "/guinea/ pig"); + assertQueryEquals("guinea /pig/", a, "guinea /pig/"); + + // Operators should not interrupt multiword analysis if not don't associate + assertQueryEquals("(guinea pig)", a, "Synonym(cavy guinea) pig"); + assertQueryEquals("+(guinea pig)", a, "+(Synonym(cavy guinea) pig)"); + assertQueryEquals("-(guinea pig)", a, "-(Synonym(cavy guinea) pig)"); + assertQueryEquals("!(guinea pig)", a, "-(Synonym(cavy guinea) pig)"); + assertQueryEquals("NOT (guinea pig)", a, "-(Synonym(cavy guinea) pig)"); + assertQueryEquals("(guinea pig)^2", a, "(Synonym(cavy guinea) pig)^2.0"); + + assertQueryEquals("field:(guinea pig)", a, "Synonym(cavy guinea) pig"); + + assertQueryEquals("+small guinea pig", a, "+small Synonym(cavy guinea) pig"); + assertQueryEquals("-small guinea pig", a, "-small Synonym(cavy guinea) pig"); + assertQueryEquals("!small guinea pig", a, "-small Synonym(cavy guinea) pig"); + assertQueryEquals("NOT small guinea pig", a, "-small Synonym(cavy guinea) pig"); + assertQueryEquals("small* guinea pig", a, "small* Synonym(cavy guinea) pig"); + assertQueryEquals("small? guinea pig", a, "small? Synonym(cavy guinea) pig"); + assertQueryEquals("\"small\" guinea pig", a, "small Synonym(cavy guinea) pig"); + + assertQueryEquals("guinea pig +running", a, "Synonym(cavy guinea) pig +running"); + assertQueryEquals("guinea pig -running", a, "Synonym(cavy guinea) pig -running"); + assertQueryEquals("guinea pig !running", a, "Synonym(cavy guinea) pig -running"); + assertQueryEquals("guinea pig NOT running", a, "Synonym(cavy guinea) pig -running"); + assertQueryEquals("guinea pig running*", a, "Synonym(cavy guinea) pig running*"); + assertQueryEquals("guinea pig running?", a, "Synonym(cavy guinea) pig running?"); + assertQueryEquals("guinea pig \"running\"", a, "Synonym(cavy guinea) pig running"); + + assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2"); + + assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\""); + + splitOnWhitespace = oldSplitOnWhitespace; + } + + public void testOperatorsAndMultiWordSynonymsSplitOnWhitespace() throws Exception { + Analyzer a = new MockSynonymAnalyzer(); + + boolean oldSplitOnWhitespace = splitOnWhitespace; + splitOnWhitespace = true; + + assertQueryEquals("+guinea pig", a, "+guinea pig"); + assertQueryEquals("-guinea pig", a, "-guinea pig"); + assertQueryEquals("!guinea pig", a, "-guinea pig"); + assertQueryEquals("guinea* pig", a, "guinea* pig"); + assertQueryEquals("guinea? pig", a, "guinea? pig"); + assertQueryEquals("guinea~2 pig", a, "guinea~2 pig"); + assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig"); + + assertQueryEquals("guinea +pig", a, "guinea +pig"); + assertQueryEquals("guinea -pig", a, "guinea -pig"); + assertQueryEquals("guinea !pig", a, "guinea -pig"); + assertQueryEquals("guinea pig*", a, "guinea pig*"); + assertQueryEquals("guinea pig?", a, "guinea pig?"); + assertQueryEquals("guinea pig~2", a, "guinea pig~2"); + assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0"); + + assertQueryEquals("field:guinea pig", a, "guinea pig"); + assertQueryEquals("guinea field:pig", a, "guinea pig"); + + assertQueryEquals("NOT guinea pig", a, "-guinea pig"); + assertQueryEquals("guinea NOT pig", a, "guinea -pig"); + + assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)"); + assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig"); + assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)"); + assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig"); + + assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)"); + assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig"); + assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)"); + assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig"); + + assertQueryEquals("\"guinea\" pig", a, "guinea pig"); + assertQueryEquals("guinea \"pig\"", a, "guinea pig"); + + assertQueryEquals("(guinea) pig", a, "guinea pig"); + assertQueryEquals("guinea (pig)", a, "guinea pig"); + + assertQueryEquals("/guinea/ pig", a, "/guinea/ pig"); + assertQueryEquals("guinea /pig/", a, "guinea /pig/"); + + assertQueryEquals("(guinea pig)", a, "guinea pig"); + assertQueryEquals("+(guinea pig)", a, "+(guinea pig)"); + assertQueryEquals("-(guinea pig)", a, "-(guinea pig)"); + assertQueryEquals("!(guinea pig)", a, "-(guinea pig)"); + assertQueryEquals("NOT (guinea pig)", a, "-(guinea pig)"); + assertQueryEquals("(guinea pig)^2", a, "(guinea pig)^2.0"); + + assertQueryEquals("field:(guinea pig)", a, "guinea pig"); + + assertQueryEquals("+small guinea pig", a, "+small guinea pig"); + assertQueryEquals("-small guinea pig", a, "-small guinea pig"); + assertQueryEquals("!small guinea pig", a, "-small guinea pig"); + assertQueryEquals("NOT small guinea pig", a, "-small guinea pig"); + assertQueryEquals("small* guinea pig", a, "small* guinea pig"); + assertQueryEquals("small? guinea pig", a, "small? guinea pig"); + assertQueryEquals("\"small\" guinea pig", a, "small guinea pig"); + + assertQueryEquals("guinea pig +running", a, "guinea pig +running"); + assertQueryEquals("guinea pig -running", a, "guinea pig -running"); + assertQueryEquals("guinea pig !running", a, "guinea pig -running"); + assertQueryEquals("guinea pig NOT running", a, "guinea pig -running"); + assertQueryEquals("guinea pig running*", a, "guinea pig running*"); + assertQueryEquals("guinea pig running?", a, "guinea pig running?"); + assertQueryEquals("guinea pig \"running\"", a, "guinea pig running"); + + assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2"); + + assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\""); + + splitOnWhitespace = oldSplitOnWhitespace; + } + + public void testDefaultSplitOnWhitespace() throws Exception { + QueryParser parser = new QueryParser("field", new Analyzer1()); + + assertTrue(parser.getSplitOnWhitespace()); // default is true + + BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder(); + bqBuilder.add(new TermQuery(new Term("field", "guinea")), BooleanClause.Occur.SHOULD); + bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD); + assertEquals(bqBuilder.build(), parser.parse("guinea pig")); + + boolean oldSplitOnWhitespace = splitOnWhitespace; + splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE; + assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig"); + splitOnWhitespace = oldSplitOnWhitespace; + } +} \ No newline at end of file diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java index 785dd1c23dc..934a4dac254 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/ext/TestExtendableQueryParser.java @@ -50,6 +50,7 @@ public class TestExtendableQueryParser extends TestQueryParser { getDefaultField(), a) : new ExtendableQueryParser( getDefaultField(), a, extensions); qp.setDefaultOperator(QueryParserBase.OR_OPERATOR); + qp.setSplitOnWhitespace(splitOnWhitespace); return qp; } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java index 25c737f214c..78d2bfda628 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestStandardQP.java @@ -203,4 +203,15 @@ public class TestStandardQP extends QueryParserTestBase { //TODO test something like "SmartQueryParser()" } + // TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support + @Override + public void testQPA() throws Exception { + super.testQPA(); + + assertQueryEquals("term phrase term", qpAnalyzer, "term (phrase1 phrase2) term"); + + CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); + setDefaultOperatorAND(cqpc); + assertQueryEquals(cqpc, "field", "term phrase term", "+term +(+phrase1 +phrase2) +term"); + } } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java index 70dc15a7cfe..f1eccf467ce 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java @@ -27,7 +27,6 @@ import java.util.TimeZone; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -535,8 +534,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase { assertQueryEquals("term -(stop) term", qpAnalyzer, "term term"); assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); - assertQueryEquals("term phrase term", qpAnalyzer, - "term (phrase1 phrase2) term"); + +// TODO: Re-enable once flexible standard parser gets multi-word synonym support +// assertQueryEquals("term phrase term", qpAnalyzer, +// "term phrase1 phrase2 term"); assertQueryEquals("term AND NOT phrase term", qpAnalyzer, "+term -(phrase1 phrase2) term"); assertQueryEquals("stop^3", qpAnalyzer, ""); @@ -552,8 +553,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase { CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer); setDefaultOperatorAND(cqpc); - assertQueryEquals(cqpc, "field", "term phrase term", - "+term +(+phrase1 +phrase2) +term"); +// TODO: Re-enable once flexible standard parser gets multi-word synonym support +// assertQueryEquals(cqpc, "field", "term phrase term", +// "+term +phrase1 +phrase2 +term"); assertQueryEquals(cqpc, "field", "phrase", "+phrase1 +phrase2"); } @@ -1101,37 +1103,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase { dir.close(); } - /** - * adds synonym of "dog" for "dogs". - */ - protected static class MockSynonymFilter extends TokenFilter { - CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); - boolean addSynonym = false; - - public MockSynonymFilter(TokenStream input) { - super(input); - } - - @Override - public final boolean incrementToken() throws IOException { - if (addSynonym) { // inject our synonym - clearAttributes(); - termAtt.setEmpty().append("dog"); - posIncAtt.setPositionIncrement(0); - addSynonym = false; - return true; - } - - if (input.incrementToken()) { - addSynonym = termAtt.toString().equals("dogs"); - return true; - } else { - return false; - } - } - } - /** whitespace+lowercase analyzer with synonyms */ protected class Analyzer1 extends Analyzer { public Analyzer1(){ @@ -1251,10 +1222,8 @@ public abstract class QueryParserTestBase extends LuceneTestCase { CharacterRunAutomaton stopStopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton()); - CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList)); - - qp = getParserConfig( - new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList)); + CommonQueryParserConfiguration qp + = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList)); qp.setEnablePositionIncrements(true); PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java new file mode 100644 index 00000000000..a2ce33e74e2 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymAnalyzer.java @@ -0,0 +1,28 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */ +public class MockSynonymAnalyzer extends Analyzer { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + MockTokenizer tokenizer = new MockTokenizer(); + return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer)); + } +} + diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java new file mode 100644 index 00000000000..b50be0735dd --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockSynonymFilter.java @@ -0,0 +1,97 @@ +package org.apache.lucene.analysis; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.util.AttributeSource; + +/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */ +public class MockSynonymFilter extends TokenFilter { + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); + List tokenQueue = new ArrayList<>(); + boolean endOfInput = false; + + public MockSynonymFilter(TokenStream input) { + super(input); + } + + @Override + public void reset() throws IOException { + super.reset(); + tokenQueue.clear(); + endOfInput = false; + } + + @Override + public final boolean incrementToken() throws IOException { + if (tokenQueue.size() > 0) { + tokenQueue.remove(0).copyTo(this); + return true; + } + if (endOfInput == false && input.incrementToken()) { + if (termAtt.toString().equals("dogs")) { + addSynonymAndRestoreOrigToken("dog", 1, offsetAtt.endOffset()); + } else if (termAtt.toString().equals("guinea")) { + AttributeSource firstSavedToken = cloneAttributes(); + if (input.incrementToken()) { + if (termAtt.toString().equals("pig")) { + AttributeSource secondSavedToken = cloneAttributes(); + int secondEndOffset = offsetAtt.endOffset(); + firstSavedToken.copyTo(this); + addSynonym("cavy", 2, secondEndOffset); + tokenQueue.add(secondSavedToken); + } else if (termAtt.toString().equals("dogs")) { + tokenQueue.add(cloneAttributes()); + addSynonym("dog", 1, offsetAtt.endOffset()); + } + } else { + endOfInput = true; + } + firstSavedToken.copyTo(this); + } + return true; + } else { + endOfInput = true; + return false; + } + } + private void addSynonym(String synonymText, int posLen, int endOffset) { + termAtt.setEmpty().append(synonymText); + posIncAtt.setPositionIncrement(0); + posLenAtt.setPositionLength(posLen); + offsetAtt.setOffset(offsetAtt.startOffset(), endOffset); + tokenQueue.add(cloneAttributes()); + } + private void addSynonymAndRestoreOrigToken(String synonymText, int posLen, int endOffset) { + AttributeSource origToken = cloneAttributes(); + addSynonym(synonymText, posLen, endOffset); + origToken.copyTo(this); + } +} + + diff --git a/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java new file mode 100644 index 00000000000..fb0d0657744 --- /dev/null +++ b/lucene/test-framework/src/test/org/apache/lucene/analysis/TestMockSynonymFilter.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis; + +import java.io.IOException; + +/** test the mock synonym filter */ +public class TestMockSynonymFilter extends BaseTokenStreamTestCase { + + /** test the mock synonym filter */ + public void test() throws IOException { + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + MockTokenizer tokenizer = new MockTokenizer(); + return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer)); + } + }; + + assertAnalyzesTo(analyzer, "dogs", + new String[]{"dogs", "dog"}, + new int[]{0, 0}, // start offset + new int[]{4, 4}, // end offset + null, + new int[]{1, 0}, // position increment + new int[]{1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "small dogs", + new String[]{"small", "dogs", "dog"}, + new int[]{0, 6, 6}, // start offset + new int[]{5, 10, 10}, // end offset + null, + new int[]{1, 1, 0}, // position increment + new int[]{1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "dogs running", + new String[]{"dogs", "dog", "running"}, + new int[]{0, 0, 5}, // start offset + new int[]{4, 4, 12}, // end offset + null, + new int[]{1, 0, 1}, // position increment + new int[]{1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "small dogs running", + new String[]{"small", "dogs", "dog", "running"}, + new int[]{0, 6, 6, 11}, // start offset + new int[]{5, 10, 10, 18}, // end offset + null, + new int[]{1, 1, 0, 1}, // position increment + new int[]{1, 1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "guinea", + new String[]{"guinea"}, + new int[]{0}, // start offset + new int[]{6}, // end offset + null, + new int[]{1}, // position increment + new int[]{1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "pig", + new String[]{"pig"}, + new int[]{0}, // start offset + new int[]{3}, // end offset + null, + new int[]{1}, // position increment + new int[]{1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "guinea pig", + new String[]{"guinea", "cavy", "pig"}, + new int[]{0, 0, 7}, // start offset + new int[]{6, 10, 10}, // end offset + null, + new int[]{1, 0, 1}, // position increment + new int[]{1, 2, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "guinea dogs", + new String[]{"guinea", "dogs", "dog"}, + new int[]{0, 7, 7}, // start offset + new int[]{6, 11, 11}, // end offset + null, + new int[]{1, 1, 0}, // position increment + new int[]{1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "dogs guinea", + new String[]{"dogs", "dog", "guinea"}, + new int[]{0, 0, 5}, // start offset + new int[]{4, 4, 11}, // end offset + null, + new int[]{1, 0, 1}, // position increment + new int[]{1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "dogs guinea pig", + new String[]{"dogs", "dog", "guinea", "cavy", "pig"}, + new int[]{0, 0, 5, 5, 12}, // start offset + new int[]{4, 4, 11, 15, 15}, // end offset + null, + new int[]{1, 0, 1, 0, 1}, // position increment + new int[]{1, 1, 1, 2, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "guinea pig dogs", + new String[]{"guinea", "cavy", "pig", "dogs", "dog"}, + new int[]{0, 0, 7, 11, 11}, // start offset + new int[]{6, 10, 10, 15, 15}, // end offset + null, + new int[]{1, 0, 1, 1, 0}, // position increment + new int[]{1, 2, 1, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "small dogs and guinea pig running", + new String[]{"small", "dogs", "dog", "and", "guinea", "cavy", "pig", "running"}, + new int[]{0, 6, 6, 11, 15, 15, 22, 26}, // start offset + new int[]{5, 10, 10, 14, 21, 25, 25, 33}, // end offset + null, + new int[]{1, 1, 0, 1, 1, 0, 1, 1}, // position increment + new int[]{1, 1, 1, 1, 1, 2, 1, 1}, // position length + true); // check that offsets are correct + + assertAnalyzesTo(analyzer, "small guinea pig and dogs running", + new String[]{"small", "guinea", "cavy", "pig", "and", "dogs", "dog", "running"}, + new int[]{0, 6, 6, 13, 17, 21, 21, 26}, // start offset + new int[]{5, 12, 16, 16, 20, 25, 25, 33}, // end offset + null, + new int[]{1, 1, 0, 1, 1, 1, 0, 1}, // position increment + new int[]{1, 1, 2, 1, 1, 1, 1, 1}, // position length + true); // check that offsets are correct + } +} diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 272f35d730f..497d1396f72 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -46,6 +46,14 @@ New Features * SOLR-9194: Enhance the bin/solr script to perform file operations to/from Zookeeper (Erick Erickson, janhoy) +* SOLR-9242: Collection Backup/Restore now supports specifying the directory implementation to use + via the "repository" parameter. (Hrishikesh Gadre, Varun Thacker) + +* SOLR-9193: Add scoreNodes Streaming Expression (Joel Bernstein) + +* SOLR-9243: Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms + (Joel Bernstein) + Bug Fixes ---------------------- @@ -78,6 +86,14 @@ Bug Fixes * SOLR-9181: Fix some races in CollectionStateWatcher API (Alan Woodward, Scott Blum) +* SOLR-9235: Fixed NPE when using non-numeric range query in deleteByQuery (hossman) + +* SOLR-9088: Fixed TestManagedSchemaAPI failures which exposed race conditions in the schema API ( Varun Thacker, noble) + +* SOLR-9207: PeerSync recovery failes if number of updates requested is high. A new useRangeVersions config option + is introduced (defaults to true) to send version ranges instead of individual versions for peer sync. + (Pushkar Raste, shalin) + Optimizations ---------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml +++ b/solr/contrib/morphlines-core/src/test-files/solr/collection1/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml +++ b/solr/contrib/morphlines-core/src/test-files/solr/minimr/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml +++ b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml +++ b/solr/contrib/morphlines-core/src/test-files/solr/solrcelltest/collection1/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java index a0ac7322175..27a2824336e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java +++ b/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionMessageHandler.java @@ -17,13 +17,8 @@ package org.apache.solr.cloud; import java.io.IOException; -import java.io.Reader; -import java.io.Writer; import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; +import java.net.URI; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; @@ -36,6 +31,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.Random; import java.util.Set; @@ -84,6 +80,9 @@ import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.SuppressForbidden; import org.apache.solr.common.util.Utils; +import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.backup.BackupManager; +import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.handler.component.ShardHandlerFactory; import org.apache.solr.handler.component.ShardRequest; @@ -2215,21 +2214,28 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler private void processBackupAction(ZkNodeProps message, NamedList results) throws IOException, KeeperException, InterruptedException { String collectionName = message.getStr(COLLECTION_PROP); String backupName = message.getStr(NAME); - String location = message.getStr(ZkStateReader.BACKUP_LOCATION); ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); String asyncId = message.getStr(ASYNC); + String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); + String location = message.getStr(CoreAdminParams.BACKUP_LOCATION); + Map requestMap = new HashMap<>(); Instant startTime = Instant.now(); - // note: we assume a shared files system to backup a collection, since a collection is distributed - Path backupPath = Paths.get(location).resolve(backupName).toAbsolutePath(); + CoreContainer cc = this.overseer.getZkController().getCoreContainer(); + BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); + BackupManager backupMgr = new BackupManager(repository, zkStateReader, collectionName); + + // Backup location + URI backupPath = repository.createURI(location, backupName); //Validating if the directory already exists. - if (Files.exists(backupPath)) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, - "Backup directory already exists: " + backupPath); + if (repository.exists(backupPath)) { + throw new SolrException(ErrorCode.BAD_REQUEST, "The backup directory already exists: " + backupPath); } - Files.createDirectory(backupPath); // create now + + // Create a directory to store backup details. + repository.createDirectory(backupPath); log.info("Starting backup of collection={} with backupName={} at location={}", collectionName, backupName, backupPath); @@ -2242,7 +2248,8 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler ModifiableSolrParams params = new ModifiableSolrParams(); params.set(CoreAdminParams.ACTION, CoreAdminAction.BACKUPCORE.toString()); params.set(NAME, slice.getName()); - params.set("location", backupPath.toString()); // note: index dir will be here then the "snapshot." + slice name + params.set(CoreAdminParams.BACKUP_REPOSITORY, repo); + params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.getPath()); // note: index dir will be here then the "snapshot." + slice name params.set(CORE_NAME_PROP, coreName); sendShardRequest(replica.getNodeName(), params, shardHandler, asyncId, requestMap); @@ -2256,29 +2263,24 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler //Download the configs String configName = zkStateReader.readConfigName(collectionName); - Path zkBackup = backupPath.resolve("zk_backup"); - zkStateReader.getConfigManager().downloadConfigDir(configName, zkBackup.resolve("configs").resolve(configName)); + backupMgr.downloadConfigDir(location, backupName, configName); //Save the collection's state. Can be part of the monolithic clusterstate.json or a individual state.json //Since we don't want to distinguish we extract the state and back it up as a separate json - DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName); - Files.write(zkBackup.resolve("collection_state.json"), - Utils.toJSON(Collections.singletonMap(collectionName, collection))); + DocCollection collectionState = zkStateReader.getClusterState().getCollection(collectionName); + backupMgr.writeCollectionState(location, backupName, collectionName, collectionState); - Path propertiesPath = backupPath.resolve("backup.properties"); Properties properties = new Properties(); - properties.put("backupName", backupName); - properties.put("collection", collectionName); - properties.put("collection.configName", configName); - properties.put("startTime", startTime.toString()); + properties.put(BackupManager.BACKUP_NAME_PROP, backupName); + properties.put(BackupManager.COLLECTION_NAME_PROP, collectionName); + properties.put(COLL_CONF, configName); + properties.put(BackupManager.START_TIME_PROP, startTime.toString()); //TODO: Add MD5 of the configset. If during restore the same name configset exists then we can compare checksums to see if they are the same. //if they are not the same then we can throw an error or have an 'overwriteConfig' flag //TODO save numDocs for the shardLeader. We can use it to sanity check the restore. - try (Writer os = Files.newBufferedWriter(propertiesPath, StandardCharsets.UTF_8)) { - properties.store(os, "Snapshot properties file"); - } + backupMgr.writeBackupProperties(location, backupName, properties); log.info("Completed backing up ZK data for backupName={}", backupName); } @@ -2287,26 +2289,21 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler // TODO maybe we can inherit createCollection's options/code String restoreCollectionName = message.getStr(COLLECTION_PROP); String backupName = message.getStr(NAME); // of backup - String location = message.getStr(ZkStateReader.BACKUP_LOCATION); ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); String asyncId = message.getStr(ASYNC); + String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); + String location = message.getStr(CoreAdminParams.BACKUP_LOCATION); Map requestMap = new HashMap<>(); - Path backupPath = Paths.get(location).resolve(backupName).toAbsolutePath(); - if (!Files.exists(backupPath)) { - throw new SolrException(ErrorCode.SERVER_ERROR, "Couldn't restore since doesn't exist: " + backupPath); - } - Path backupZkPath = backupPath.resolve("zk_backup"); + CoreContainer cc = this.overseer.getZkController().getCoreContainer(); + BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); - Properties properties = new Properties(); - try (Reader in = Files.newBufferedReader(backupPath.resolve("backup.properties"), StandardCharsets.UTF_8)) { - properties.load(in); - } + URI backupPath = repository.createURI(location, backupName); + BackupManager backupMgr = new BackupManager(repository, zkStateReader, restoreCollectionName); - String backupCollection = (String) properties.get("collection"); - byte[] data = Files.readAllBytes(backupZkPath.resolve("collection_state.json")); - ClusterState backupClusterState = ClusterState.load(-1, data, Collections.emptySet()); - DocCollection backupCollectionState = backupClusterState.getCollection(backupCollection); + Properties properties = backupMgr.readBackupProperties(location, backupName); + String backupCollection = properties.getProperty(BackupManager.COLLECTION_NAME_PROP); + DocCollection backupCollectionState = backupMgr.readCollectionState(location, backupName, backupCollection); //Upload the configs String configName = (String) properties.get(COLL_CONF); @@ -2316,11 +2313,11 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler //TODO add overwrite option? } else { log.info("Uploading config {}", restoreConfigName); - zkStateReader.getConfigManager().uploadConfigDir(backupZkPath.resolve("configs").resolve(configName), restoreConfigName); + backupMgr.uploadConfigDir(location, backupName, configName, restoreConfigName); } log.info("Starting restore into collection={} with backup_name={} at location={}", restoreCollectionName, backupName, - backupPath); + location); //Create core-less collection { @@ -2410,7 +2407,9 @@ public class OverseerCollectionMessageHandler implements OverseerMessageHandler ModifiableSolrParams params = new ModifiableSolrParams(); params.set(CoreAdminParams.ACTION, CoreAdminAction.RESTORECORE.toString()); params.set(NAME, "snapshot." + slice.getName()); - params.set("location", backupPath.toString()); + params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.getPath()); + params.set(CoreAdminParams.BACKUP_REPOSITORY, repo); + sliceCmd(clusterState, params, null, slice, shardHandler, asyncId, requestMap); } processResponses(new NamedList(), shardHandler, true, "Could not restore core", asyncId, requestMap); diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index 9ebca6f81d6..a6a15082acf 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -53,7 +53,25 @@ import org.apache.solr.cloud.overseer.OverseerAction; import org.apache.solr.cloud.overseer.SliceMutator; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.cloud.*; +import org.apache.solr.common.cloud.BeforeReconnect; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.ClusterStateUtil; +import org.apache.solr.common.cloud.DefaultConnectionStrategy; +import org.apache.solr.common.cloud.DefaultZkACLProvider; +import org.apache.solr.common.cloud.DefaultZkCredentialsProvider; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.OnReconnect; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkACLProvider; +import org.apache.solr.common.cloud.ZkCmdExecutor; +import org.apache.solr.common.cloud.ZkConfigManager; +import org.apache.solr.common.cloud.ZkCoreNodeProps; +import org.apache.solr.common.cloud.ZkCredentialsProvider; +import org.apache.solr.common.cloud.ZkNodeProps; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.cloud.ZooKeeperException; import org.apache.solr.common.params.CollectionParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; @@ -2242,8 +2260,8 @@ public final class ZkController { String errMsg = "Failed to persist resource at {0} - old {1}"; try { try { - zkClient.setData(resourceLocation, content, znodeVersion, true); - latestVersion = znodeVersion + 1;// if the set succeeded , it should have incremented the version by one always + Stat stat = zkClient.setData(resourceLocation, content, znodeVersion, true); + latestVersion = stat.getVersion();// if the set succeeded , it should have incremented the version by one always log.info("Persisted config data to node {} ", resourceLocation); touchConfDir(zkLoader); } catch (NoNodeException e) { diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index 422a7616119..a6d40664ee0 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutionException; @@ -44,6 +45,7 @@ import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.Utils; +import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepositoryFactory; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.admin.CollectionsHandler; @@ -149,8 +151,21 @@ public class CoreContainer { private BackupRepositoryFactory backupRepoFactory; - public BackupRepositoryFactory getBackupRepoFactory() { - return backupRepoFactory; + /** + * This method instantiates a new instance of {@linkplain BackupRepository}. + * + * @param repositoryName The name of the backup repository (Optional). + * If not specified, a default implementation is used. + * @return a new instance of {@linkplain BackupRepository}. + */ + public BackupRepository newBackupRepository(Optional repositoryName) { + BackupRepository repository; + if (repositoryName.isPresent()) { + repository = backupRepoFactory.newInstance(getResourceLoader(), repositoryName.get()); + } else { + repository = backupRepoFactory.newInstance(getResourceLoader()); + } + return repository; } public ExecutorService getCoreZkRegisterExecutorService() { diff --git a/solr/core/src/java/org/apache/solr/core/SolrConfig.java b/solr/core/src/java/org/apache/solr/core/SolrConfig.java index a5f54580e06..eb3aa5fc7f1 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrConfig.java +++ b/solr/core/src/java/org/apache/solr/core/SolrConfig.java @@ -234,7 +234,8 @@ public class SolrConfig extends Config implements MapSerializable { queryResultWindowSize = Math.max(1, getInt("query/queryResultWindowSize", 1)); queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE); enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false); - + + useRangeVersionsForPeerSync = getBool("peerSync/useRangeVersions", true); filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache"); queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache"); @@ -462,6 +463,9 @@ public class SolrConfig extends Config implements MapSerializable { public final int queryResultWindowSize; public final int queryResultMaxDocsCached; public final boolean enableLazyFieldLoading; + + public final boolean useRangeVersionsForPeerSync; + // DocSet public final float hashSetInverseLoadFactor; public final int hashDocSetMaxSize; @@ -864,6 +868,10 @@ public class SolrConfig extends Config implements MapSerializable { "addHttpRequestToContext", addHttpRequestToContext)); if (indexConfig != null) result.put("indexConfig", indexConfig.toMap()); + m = new LinkedHashMap(); + result.put("peerSync", m); + m.put("useRangeVersions", useRangeVersionsForPeerSync); + //TODO there is more to add return result; diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java index 53af3d1b5ce..14a4e0ff1e8 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrCore.java +++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java @@ -28,7 +28,19 @@ import java.lang.reflect.Constructor; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.NoSuchFileException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; @@ -77,7 +89,22 @@ import org.apache.solr.handler.component.SearchComponent; import org.apache.solr.logging.MDCLoggingContext; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestHandler; -import org.apache.solr.response.*; +import org.apache.solr.response.BinaryResponseWriter; +import org.apache.solr.response.CSVResponseWriter; +import org.apache.solr.response.GeoJSONResponseWriter; +import org.apache.solr.response.GraphMLResponseWriter; +import org.apache.solr.response.JSONResponseWriter; +import org.apache.solr.response.PHPResponseWriter; +import org.apache.solr.response.PHPSerializedResponseWriter; +import org.apache.solr.response.PythonResponseWriter; +import org.apache.solr.response.QueryResponseWriter; +import org.apache.solr.response.RawResponseWriter; +import org.apache.solr.response.RubyResponseWriter; +import org.apache.solr.response.SchemaXmlResponseWriter; +import org.apache.solr.response.SmileResponseWriter; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.response.SortingResponseWriter; +import org.apache.solr.response.XMLResponseWriter; import org.apache.solr.response.transform.TransformerFactory; import org.apache.solr.rest.ManagedResourceStorage; import org.apache.solr.rest.ManagedResourceStorage.StorageIO; @@ -86,6 +113,7 @@ import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IndexSchemaFactory; import org.apache.solr.schema.ManagedIndexSchema; +import org.apache.solr.schema.SchemaManager; import org.apache.solr.schema.SimilarityFactory; import org.apache.solr.search.QParserPlugin; import org.apache.solr.search.SolrFieldCacheMBean; @@ -2488,13 +2516,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable { SolrZkClient zkClient = cc.getZkController().getZkClient(); int solrConfigversion, overlayVersion, managedSchemaVersion = 0; SolrConfig cfg = null; - try (SolrCore core1 = cc.solrCores.getCoreFromAnyList(coreName, true)) { - if (core1 == null || core1.isClosed()) return; - cfg = core1.getSolrConfig(); - solrConfigversion = core1.getSolrConfig().getOverlay().getZnodeVersion(); - overlayVersion = core1.getSolrConfig().getZnodeVersion(); + try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) { + if (solrCore == null || solrCore.isClosed()) return; + cfg = solrCore.getSolrConfig(); + solrConfigversion = solrCore.getSolrConfig().getOverlay().getZnodeVersion(); + overlayVersion = solrCore.getSolrConfig().getZnodeVersion(); if (managedSchmaResourcePath != null) { - managedSchemaVersion = ((ManagedIndexSchema) core1.getLatestSchema()).getSchemaZkVersion(); + managedSchemaVersion = ((ManagedIndexSchema) solrCore.getLatestSchema()).getSchemaZkVersion(); } } @@ -2504,6 +2532,13 @@ public final class SolrCore implements SolrInfoMBean, Closeable { if (checkStale(zkClient, overlayPath, solrConfigversion) || checkStale(zkClient, solrConfigPath, overlayVersion) || checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) { + + try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) { + solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore)); + } catch (Exception e) { + log.warn("", SolrZkClient.checkInterrupted(e)); + } + log.info("core reload {}", coreName); try { cc.reload(coreName); @@ -2513,9 +2548,9 @@ public final class SolrCore implements SolrInfoMBean, Closeable { return; } //some files in conf directory may have other than managedschema, overlay, params - try (SolrCore core1 = cc.solrCores.getCoreFromAnyList(coreName, true)) { - if (core1 == null || core1.isClosed()) return; - for (Runnable listener : core1.confListeners) { + try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) { + if (solrCore == null || solrCore.isClosed()) return; + for (Runnable listener : solrCore.confListeners) { try { listener.run(); } catch (Exception e) { diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java new file mode 100644 index 00000000000..0575bff95ac --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java @@ -0,0 +1,250 @@ +package org.apache.solr.core.backup; + +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Reader; +import java.io.Writer; +import java.lang.invoke.MethodHandles; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; +import java.util.Properties; + +import com.google.common.base.Preconditions; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.cloud.ZkConfigManager; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.util.Utils; +import org.apache.solr.core.backup.repository.BackupRepository; +import org.apache.solr.core.backup.repository.BackupRepository.PathType; +import org.apache.solr.util.PropertiesInputStream; +import org.apache.zookeeper.KeeperException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This class implements functionality to create a backup with extension points provided to integrate with different + * types of file-systems. + */ +public class BackupManager { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + public static final String COLLECTION_PROPS_FILE = "collection_state.json"; + public static final String BACKUP_PROPS_FILE = "backup.properties"; + public static final String ZK_STATE_DIR = "zk_backup"; + public static final String CONFIG_STATE_DIR = "configs"; + + // Backup properties + public static final String COLLECTION_NAME_PROP = "collection"; + public static final String BACKUP_NAME_PROP = "backupName"; + public static final String INDEX_VERSION_PROP = "index.version"; + public static final String START_TIME_PROP = "startTime"; + + protected final ZkStateReader zkStateReader; + protected final BackupRepository repository; + + public BackupManager(BackupRepository repository, ZkStateReader zkStateReader, String collectionName) { + this.repository = Preconditions.checkNotNull(repository); + this.zkStateReader = Preconditions.checkNotNull(zkStateReader); + } + + /** + * @return The version of this backup implementation. + */ + public final String getVersion() { + return "1.0"; + } + + /** + * This method returns the configuration parameters for the specified backup. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup whose configuration params are required. + * @return the configuration parameters for the specified backup. + * @throws IOException In case of errors. + */ + public Properties readBackupProperties(String backupLoc, String backupId) throws IOException { + Preconditions.checkNotNull(backupLoc); + Preconditions.checkNotNull(backupId); + + // Backup location + URI backupPath = repository.createURI(backupLoc, backupId); + if (!repository.exists(backupPath)) { + throw new SolrException(ErrorCode.SERVER_ERROR, "Couldn't restore since doesn't exist: " + backupPath); + } + + Properties props = new Properties(); + try (Reader is = new InputStreamReader(new PropertiesInputStream( + repository.openInput(backupPath, BACKUP_PROPS_FILE, IOContext.DEFAULT)), StandardCharsets.UTF_8)) { + props.load(is); + return props; + } + } + + /** + * This method stores the backup properties at the specified location in the repository. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup whose configuration params are required. + * @param props The backup properties + * @throws IOException in case of I/O error + */ + public void writeBackupProperties(String backupLoc, String backupId, Properties props) throws IOException { + URI dest = repository.createURI(backupLoc, backupId, BACKUP_PROPS_FILE); + try (Writer propsWriter = new OutputStreamWriter(repository.createOutput(dest), StandardCharsets.UTF_8)) { + props.store(propsWriter, "Backup properties file"); + } + } + + /** + * This method reads the meta-data information for the backed-up collection. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup. + * @return the meta-data information for the backed-up collection. + * @throws IOException in case of errors. + */ + public DocCollection readCollectionState(String backupLoc, String backupId, String collectionName) throws IOException { + Preconditions.checkNotNull(collectionName); + + URI zkStateDir = repository.createURI(backupLoc, backupId, ZK_STATE_DIR); + try (IndexInput is = repository.openInput(zkStateDir, COLLECTION_PROPS_FILE, IOContext.DEFAULT)) { + byte[] arr = new byte[(int) is.length()]; // probably ok since the json file should be small. + is.readBytes(arr, 0, (int) is.length()); + ClusterState c_state = ClusterState.load(-1, arr, Collections.emptySet()); + return c_state.getCollection(collectionName); + } + } + + /** + * This method writes the collection meta-data to the specified location in the repository. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup. + * @param collectionName The name of the collection whose meta-data is being stored. + * @param collectionState The collection meta-data to be stored. + * @throws IOException in case of I/O errors. + */ + public void writeCollectionState(String backupLoc, String backupId, String collectionName, + DocCollection collectionState) throws IOException { + URI dest = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, COLLECTION_PROPS_FILE); + try (OutputStream collectionStateOs = repository.createOutput(dest)) { + collectionStateOs.write(Utils.toJSON(Collections.singletonMap(collectionName, collectionState))); + } + } + + /** + * This method uploads the Solr configuration files to the desired location in Zookeeper. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup. + * @param sourceConfigName The name of the config to be copied + * @param targetConfigName The name of the config to be created. + * @throws IOException in case of I/O errors. + */ + public void uploadConfigDir(String backupLoc, String backupId, String sourceConfigName, String targetConfigName) + throws IOException { + URI source = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR, sourceConfigName); + String zkPath = ZkConfigManager.CONFIGS_ZKNODE + "/" + targetConfigName; + uploadToZk(zkStateReader.getZkClient(), source, zkPath); + } + + /** + * This method stores the contents of a specified Solr config at the specified location in repository. + * + * @param backupLoc The base path used to store the backup data. + * @param backupId The unique name for the backup. + * @param configName The name of the config to be saved. + * @throws IOException in case of I/O errors. + */ + public void downloadConfigDir(String backupLoc, String backupId, String configName) throws IOException { + URI dest = repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR, configName); + repository.createDirectory(repository.createURI(backupLoc, backupId, ZK_STATE_DIR)); + repository.createDirectory(repository.createURI(backupLoc, backupId, ZK_STATE_DIR, CONFIG_STATE_DIR)); + repository.createDirectory(dest); + + downloadFromZK(zkStateReader.getZkClient(), ZkConfigManager.CONFIGS_ZKNODE + "/" + configName, dest); + } + + private void downloadFromZK(SolrZkClient zkClient, String zkPath, URI dir) throws IOException { + try { + if (!repository.exists(dir)) { + repository.createDirectory(dir); + } + List files = zkClient.getChildren(zkPath, null, true); + for (String file : files) { + List children = zkClient.getChildren(zkPath + "/" + file, null, true); + if (children.size() == 0) { + log.info("Writing file {}", file); + byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true); + try (OutputStream os = repository.createOutput(repository.createURI(dir.getPath(), file))) { + os.write(data); + } + } else { + downloadFromZK(zkClient, zkPath + "/" + file, repository.createURI(dir.getPath(), file)); + } + } + } catch (KeeperException | InterruptedException e) { + throw new IOException("Error downloading files from zookeeper path " + zkPath + " to " + dir.toString(), + SolrZkClient.checkInterrupted(e)); + } + } + + private void uploadToZk(SolrZkClient zkClient, URI sourceDir, String destZkPath) throws IOException { + Preconditions.checkArgument(repository.exists(sourceDir), "Path {} does not exist", sourceDir); + Preconditions.checkArgument(repository.getPathType(sourceDir) == PathType.DIRECTORY, + "Path {} is not a directory", sourceDir); + + for (String file : repository.listAll(sourceDir)) { + String zkNodePath = destZkPath + "/" + file; + URI path = repository.createURI(sourceDir.getPath(), file); + PathType t = repository.getPathType(path); + switch (t) { + case FILE: { + try (IndexInput is = repository.openInput(sourceDir, file, IOContext.DEFAULT)) { + byte[] arr = new byte[(int) is.length()]; // probably ok since the config file should be small. + is.readBytes(arr, 0, (int) is.length()); + zkClient.makePath(zkNodePath, arr, true); + } catch (KeeperException | InterruptedException e) { + throw new IOException(e); + } + break; + } + + case DIRECTORY: { + if (!file.startsWith(".")) { + uploadToZk(zkClient, path, zkNodePath); + } + break; + } + default: + throw new IllegalStateException("Unknown path type " + t); + } + } + } +} diff --git a/solr/core/src/java/org/apache/solr/core/backup/package-info.java b/solr/core/src/java/org/apache/solr/core/backup/package-info.java new file mode 100644 index 00000000000..defcad6b55c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/core/backup/package-info.java @@ -0,0 +1,22 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + + +/** + * Core classes for Solr's Backup/Restore functionality + */ +package org.apache.solr.core.backup; \ No newline at end of file diff --git a/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java b/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java index f209b874a5f..20d8628a50c 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java +++ b/solr/core/src/java/org/apache/solr/core/backup/repository/BackupRepository.java @@ -21,20 +21,18 @@ import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; import java.net.URI; +import java.util.Optional; + import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; +import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.util.plugin.NamedListInitializedPlugin; /** * This interface defines the functionality required to backup/restore Solr indexes to an arbitrary storage system. */ public interface BackupRepository extends NamedListInitializedPlugin, Closeable { - /** - * A parameter to specify the name of the backup repository to be used. - */ - String REPOSITORY_PROPERTY_NAME = "repository"; - /** * This enumeration defines the type of a given path. @@ -43,6 +41,17 @@ public interface BackupRepository extends NamedListInitializedPlugin, Closeable DIRECTORY, FILE } + /** + * This method returns the location where the backup should be stored (or restored from). + * + * @param override The location parameter supplied by the user. + * @return If override is not null then return the same value + * Otherwise return the default configuration value for the {@linkplain CoreAdminParams#BACKUP_LOCATION} parameter. + */ + default String getBackupLocation(String override) { + return Optional.ofNullable(override).orElse(getConfigProperty(CoreAdminParams.BACKUP_LOCATION)); + } + /** * This method returns the value of the specified configuration property. */ diff --git a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java index 657e6b32bc1..c4b42d9d609 100644 --- a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java @@ -118,7 +118,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P .withFunctionName("shortestPath", ShortestPathStream.class) .withFunctionName("gatherNodes", GatherNodesStream.class) .withFunctionName("sort", SortStream.class) - + .withFunctionName("scoreNodes", ScoreNodesStream.class) // metrics .withFunctionName("min", MinMetric.class) diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java index 1893a7da52d..6e1b3a087a1 100644 --- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java @@ -37,6 +37,7 @@ import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Properties; import java.util.Random; import java.util.concurrent.ExecutorService; @@ -67,8 +68,8 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RateLimiter; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.ExecutorUtil; @@ -84,7 +85,6 @@ import org.apache.solr.core.IndexDeletionPolicyWrapper; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrDeletionPolicy; import org.apache.solr.core.SolrEventListener; -import org.apache.solr.core.SolrResourceLoader; import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.LocalFileSystemRepository; import org.apache.solr.request.SolrQueryRequest; @@ -331,7 +331,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name"); } - SnapShooter snapShooter = new SnapShooter(core, params.get(LOCATION), params.get(NAME)); + SnapShooter snapShooter = new SnapShooter(core, params.get(CoreAdminParams.BACKUP_LOCATION), params.get(NAME)); snapShooter.validateDeleteSnapshot(); snapShooter.deleteSnapAsync(this); } @@ -412,19 +412,16 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw "for the same core"); } String name = params.get(NAME); - String location = params.get(LOCATION); + String location = params.get(CoreAdminParams.BACKUP_LOCATION); - String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME); + String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY); CoreContainer cc = core.getCoreDescriptor().getCoreContainer(); - SolrResourceLoader rl = cc.getResourceLoader(); BackupRepository repo = null; - if(repoName != null) { - repo = cc.getBackupRepoFactory().newInstance(rl, repoName); + if (repoName != null) { + repo = cc.newBackupRepository(Optional.of(repoName)); + location = repo.getBackupLocation(location); if (location == null) { - location = repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION); - if(location == null) { - throw new IllegalArgumentException("location is required"); - } + throw new IllegalArgumentException("location is required"); } } else { repo = new LocalFileSystemRepository(); @@ -520,18 +517,15 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw indexCommit = req.getSearcher().getIndexReader().getIndexCommit(); } - String location = params.get(ZkStateReader.BACKUP_LOCATION); - String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME); + String location = params.get(CoreAdminParams.BACKUP_LOCATION); + String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY); CoreContainer cc = core.getCoreDescriptor().getCoreContainer(); - SolrResourceLoader rl = cc.getResourceLoader(); BackupRepository repo = null; - if(repoName != null) { - repo = cc.getBackupRepoFactory().newInstance(rl, repoName); + if (repoName != null) { + repo = cc.newBackupRepository(Optional.of(repoName)); + location = repo.getBackupLocation(location); if (location == null) { - location = repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION); - if(location == null) { - throw new IllegalArgumentException("location is required"); - } + throw new IllegalArgumentException("location is required"); } } else { repo = new LocalFileSystemRepository(); @@ -1645,8 +1639,6 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw } } - private static final String LOCATION = "location"; - private static final String SUCCESS = "success"; private static final String FAILED = "failed"; diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 91ee096e777..1e9ba27430b 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -126,6 +126,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("select", SelectStream.class) .withFunctionName("shortestPath", ShortestPathStream.class) .withFunctionName("gatherNodes", GatherNodesStream.class) + .withFunctionName("scoreNodes", ScoreNodesStream.class) // metrics .withFunctionName("min", MinMetric.class) diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java index 85c98c1bba1..97fbd2d181d 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java @@ -18,6 +18,7 @@ package org.apache.solr.handler.admin; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -26,6 +27,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.TimeUnit; @@ -75,6 +77,7 @@ import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.Utils; import org.apache.solr.core.CloudConfig; import org.apache.solr.core.CoreContainer; +import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.handler.RequestHandlerBase; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.request.SolrQueryRequest; @@ -807,15 +810,32 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' does not exist, no action taken."); } - String location = req.getParams().get(ZkStateReader.BACKUP_LOCATION); + CoreContainer cc = h.coreContainer; + String repo = req.getParams().get(CoreAdminParams.BACKUP_REPOSITORY); + BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); + + String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION)); if (location == null) { - location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null); + // Check if the location is specified in the cluster property. + location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty(CoreAdminParams.BACKUP_LOCATION, null); + if (location == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" + + " parameter or as a default repository property or as a cluster property."); + } } - if (location == null) { - throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property"); + + // Check if the specified location is valid for this repository. + URI uri = repository.createURI(location); + try { + if (!repository.exists(uri)) { + throw new SolrException(ErrorCode.SERVER_ERROR, "specified location " + uri + " does not exist."); + } + } catch (IOException ex) { + throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex); } + Map params = req.getParams().getAll(null, NAME, COLLECTION_PROP); - params.put("location", location); + params.put(CoreAdminParams.BACKUP_LOCATION, location); return params; } }, @@ -831,16 +851,32 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission throw new SolrException(ErrorCode.BAD_REQUEST, "Collection '" + collectionName + "' exists, no action taken."); } - String location = req.getParams().get(ZkStateReader.BACKUP_LOCATION); + CoreContainer cc = h.coreContainer; + String repo = req.getParams().get(CoreAdminParams.BACKUP_REPOSITORY); + BackupRepository repository = cc.newBackupRepository(Optional.ofNullable(repo)); + + String location = repository.getBackupLocation(req.getParams().get(CoreAdminParams.BACKUP_LOCATION)); if (location == null) { - location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", (String) null); + // Check if the location is specified in the cluster property. + location = h.coreContainer.getZkController().getZkStateReader().getClusterProperty("location", null); + if (location == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" + + " parameter or as a default repository property or as a cluster property."); + } } - if (location == null) { - throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query parameter or set as a cluster property"); + + // Check if the specified location is valid for this repository. + URI uri = repository.createURI(location); + try { + if (!repository.exists(uri)) { + throw new SolrException(ErrorCode.SERVER_ERROR, "specified location " + uri + " does not exist."); + } + } catch (IOException ex) { + throw new SolrException(ErrorCode.SERVER_ERROR, "Failed to check the existance of " + uri + ". Is it valid?", ex); } Map params = req.getParams().getAll(null, NAME, COLLECTION_PROP); - params.put("location", location); + params.put(CoreAdminParams.BACKUP_LOCATION, location); // from CREATE_OP: req.getParams().getAll(params, COLL_CONF, REPLICATION_FACTOR, MAX_SHARDS_PER_NODE, STATE_FORMAT, AUTO_ADD_REPLICAS); copyPropertiesWithPrefix(req.getParams(), params, COLL_PROP_PREFIX); diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java index 3c52beace86..bf892277d78 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Future; @@ -40,6 +41,7 @@ import org.apache.solr.cloud.CloudDescriptor; import org.apache.solr.cloud.SyncStrategy; import org.apache.solr.cloud.ZkController; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.DocRouter; @@ -858,21 +860,13 @@ enum CoreAdminOperation { throw new IllegalArgumentException(CoreAdminParams.NAME + " is required"); } - SolrResourceLoader loader = callInfo.handler.coreContainer.getResourceLoader(); - BackupRepository repository; - String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME); - if(repoName != null) { - repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader, repoName); - } else { // Fetch the default. - repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader); - } + String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY); + BackupRepository repository = callInfo.handler.coreContainer.newBackupRepository(Optional.ofNullable(repoName)); - String location = params.get(ZkStateReader.BACKUP_LOCATION); - if (location == null) { - location = repository.getConfigProperty(ZkStateReader.BACKUP_LOCATION); - if (location == null) { - throw new IllegalArgumentException("location is required"); - } + String location = repository.getBackupLocation(params.get(CoreAdminParams.BACKUP_LOCATION)); + if(location == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" + + " parameter or as a default repository property"); } try (SolrCore core = callInfo.handler.coreContainer.getCore(cname)) { @@ -912,21 +906,13 @@ enum CoreAdminOperation { throw new IllegalArgumentException(CoreAdminParams.NAME + " is required"); } - SolrResourceLoader loader = callInfo.handler.coreContainer.getResourceLoader(); - BackupRepository repository; - String repoName = params.get(BackupRepository.REPOSITORY_PROPERTY_NAME); - if(repoName != null) { - repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader, repoName); - } else { // Fetch the default. - repository = callInfo.handler.coreContainer.getBackupRepoFactory().newInstance(loader); - } + String repoName = params.get(CoreAdminParams.BACKUP_REPOSITORY); + BackupRepository repository = callInfo.handler.coreContainer.newBackupRepository(Optional.ofNullable(repoName)); - String location = params.get(ZkStateReader.BACKUP_LOCATION); - if (location == null) { - location = repository.getConfigProperty(ZkStateReader.BACKUP_LOCATION); - if (location == null) { - throw new IllegalArgumentException("location is required"); - } + String location = repository.getBackupLocation(params.get(CoreAdminParams.BACKUP_LOCATION)); + if(location == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, "'location' is not specified as a query" + + " parameter or as a default repository property"); } try (SolrCore core = callInfo.handler.coreContainer.getCore(cname)) { diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java index 1c42b034321..1942232115a 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java @@ -20,9 +20,13 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import org.apache.lucene.document.Document; import org.apache.lucene.index.DocValuesType; @@ -41,6 +45,7 @@ import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.StringUtils; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; @@ -97,8 +102,16 @@ public class RealTimeGetComponent extends SearchComponent if (!params.getBool(COMPONENT_NAME, true)) { return; } - - String val = params.get("getVersions"); + + // This seems rather kludgey, may there is better way to indicate + // that replica can support handling version ranges + String val = params.get("checkCanHandleVersionRanges"); + if(val != null) { + rb.rsp.add("canHandleVersionRanges", true); + return; + } + + val = params.get("getVersions"); if (val != null) { processGetVersions(rb); return; @@ -667,7 +680,14 @@ public class RealTimeGetComponent extends SearchComponent UpdateLog ulog = req.getCore().getUpdateHandler().getUpdateLog(); if (ulog == null) return; - List versions = StrUtils.splitSmart(versionsStr, ",", true); + // handle version ranges + List versions = null; + if (versionsStr.indexOf("...") != -1) { + versions = resolveVersionRanges(versionsStr, ulog); + } else { + versions = StrUtils.splitSmart(versionsStr, ",", true).stream().map(Long::parseLong) + .collect(Collectors.toList()); + } List updates = new ArrayList<>(versions.size()); @@ -676,8 +696,7 @@ public class RealTimeGetComponent extends SearchComponent // TODO: get this from cache instead of rebuilding? try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) { - for (String versionStr : versions) { - long version = Long.parseLong(versionStr); + for (Long version : versions) { try { Object o = recentUpdates.lookup(version); if (o == null) continue; @@ -702,5 +721,37 @@ public class RealTimeGetComponent extends SearchComponent } } - + + + private List resolveVersionRanges(String versionsStr, UpdateLog ulog) { + if (StringUtils.isEmpty(versionsStr)) { + return Collections.emptyList(); + } + + List ranges = StrUtils.splitSmart(versionsStr, ",", true); + + // TODO merge ranges. + + // get all the versions from updatelog and sort them + List versionAvailable = null; + try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) { + versionAvailable = recentUpdates.getVersions(ulog.getNumRecordsToKeep()); + } + // sort versions + Collections.sort(versionAvailable, PeerSync.absComparator); + + // This can be done with single pass over both ranges and versionsAvailable, that would require + // merging ranges. We currently use Set to ensure there are no duplicates. + Set versionsToRet = new HashSet<>(ulog.getNumRecordsToKeep()); + for (String range : ranges) { + String[] rangeBounds = range.split("\\.{3}"); + int indexStart = Collections.binarySearch(versionAvailable, Long.valueOf(rangeBounds[1]), PeerSync.absComparator); + int indexEnd = Collections.binarySearch(versionAvailable, Long.valueOf(rangeBounds[0]), PeerSync.absComparator); + if(indexStart >=0 && indexEnd >= 0) { + versionsToRet.addAll(versionAvailable.subList(indexStart, indexEnd + 1)); // indexEnd is exclusive + } + } + // TODO do we need to sort versions using PeerSync.absComparator? + return new ArrayList<>(versionsToRet); + } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java index 7b707086b22..6ef0ee4f237 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java @@ -140,6 +140,8 @@ public abstract class SearchComponent implements SolrInfoMBean, NamedListInitial map.put(DebugComponent.COMPONENT_NAME, DebugComponent.class); map.put(RealTimeGetComponent.COMPONENT_NAME, RealTimeGetComponent.class); map.put(ExpandComponent.COMPONENT_NAME, ExpandComponent.class); + map.put(TermsComponent.COMPONENT_NAME, TermsComponent.class); + standard_components = Collections.unmodifiableMap(map); } diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java index f4a1776a185..e8362dcef2c 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java @@ -85,6 +85,8 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware , names.add( StatsComponent.COMPONENT_NAME ); names.add( DebugComponent.COMPONENT_NAME ); names.add( ExpandComponent.COMPONENT_NAME); + names.add( TermsComponent.COMPONENT_NAME); + return names; } diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java index a949268cf49..076c4eb1575 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -28,6 +28,7 @@ import org.apache.solr.common.util.StrUtils; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.StrField; import org.apache.solr.request.SimpleFacets.CountPair; +import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.BoundedTreeSet; import org.apache.solr.client.solrj.response.TermsResponse; @@ -64,8 +65,12 @@ public class TermsComponent extends SearchComponent { @Override public void prepare(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); - if (params.getBool(TermsParams.TERMS, false)) { + + //the terms parameter is also used by json facet API. So we will get errors if we try to parse as boolean + if (params.get(TermsParams.TERMS, "false").equals("true")) { rb.doTerms = true; + } else { + return; } // TODO: temporary... this should go in a different component. @@ -83,7 +88,9 @@ public class TermsComponent extends SearchComponent { @Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); - if (!params.getBool(TermsParams.TERMS, false)) return; + if (!params.get(TermsParams.TERMS, "false").equals("true")) { + return; + } String[] fields = params.getParams(TermsParams.TERMS_FIELD); @@ -92,6 +99,20 @@ public class TermsComponent extends SearchComponent { if (fields == null || fields.length==0) return; + boolean termStats = params.getBool(TermsParams.TERMS_STATS, false); + + if(termStats) { + NamedList stats = new SimpleOrderedMap(); + rb.rsp.add("indexstats", stats); + collectStats(rb.req.getSearcher(), stats); + } + + String termList = params.get(TermsParams.TERMS_LIST); + if(termList != null) { + fetchTerms(rb.req.getSearcher(), fields, termList, termsResult); + return; + } + int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; @@ -284,6 +305,13 @@ public class TermsComponent extends SearchComponent { @SuppressWarnings("unchecked") NamedList> terms = (NamedList>) srsp.getSolrResponse().getResponse().get("terms"); th.parse(terms); + + + NamedList stats = (NamedList)srsp.getSolrResponse().getResponse().get("indexstats"); + if(stats != null) { + th.numDocs += stats.get("numDocs").longValue(); + th.stats = true; + } } } } @@ -298,6 +326,11 @@ public class TermsComponent extends SearchComponent { NamedList terms = ti.buildResponse(); rb.rsp.add("terms", terms); + if(ti.stats) { + NamedList stats = new SimpleOrderedMap(); + stats.add("numDocs", Long.valueOf(ti.numDocs)); + rb.rsp.add("indexstats", stats); + } rb._termsHelper = null; } @@ -324,6 +357,8 @@ public class TermsComponent extends SearchComponent { // map to store returned terms private HashMap> fieldmap; private SolrParams params; + public long numDocs = 0; + public boolean stats; public TermsHelper() { fieldmap = new HashMap<>(5); @@ -377,8 +412,12 @@ public class TermsComponent extends SearchComponent { NamedList response = new SimpleOrderedMap<>(); // determine if we are going index or count sort - boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get( - TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); + boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(TermsParams.TERMS_SORT, + TermsParams.TERMS_SORT_COUNT)); + if(params.get(TermsParams.TERMS_LIST) != null) { + //Always use lexical sort when TERM_LIST is provided + sort = false; + } // init minimum frequency long freqmin = 1; @@ -466,6 +505,81 @@ public class TermsComponent extends SearchComponent { } } + private void fetchTerms(SolrIndexSearcher indexSearcher, + String[] fields, + String termList, + NamedList result) throws IOException { + + NamedList termsMap = new SimpleOrderedMap(); + List leaves = indexSearcher.getTopReaderContext().leaves(); + String field = fields[0]; + FieldType fieldType = indexSearcher.getSchema().getField(field).getType(); + String[] splitTerms = termList.split(","); + + for(int i=0; i leaves, TermContext[] contextArray, + Term[] queryTerms) throws IOException { + TermsEnum termsEnum = null; + for (LeafReaderContext context : leaves) { + final Fields fields = context.reader().fields(); + for (int i = 0; i < queryTerms.length; i++) { + Term term = queryTerms[i]; + TermContext termContext = contextArray[i]; + final Terms terms = fields.terms(term.field()); + if (terms == null) { + // field does not exist + continue; + } + termsEnum = terms.iterator(); + assert termsEnum != null; + + if (termsEnum == TermsEnum.EMPTY) continue; + if (termsEnum.seekExact(term.bytes())) { + if (termContext == null) { + contextArray[i] = new TermContext(reader.getContext(), + termsEnum.termState(), context.ord, termsEnum.docFreq(), + termsEnum.totalTermFreq()); + } else { + termContext.register(termsEnum.termState(), context.ord, + termsEnum.docFreq(), termsEnum.totalTermFreq()); + } + + } + + } + } + } + + private void collectStats(SolrIndexSearcher searcher, NamedList stats) { + int numDocs = searcher.getTopReaderContext().reader().numDocs(); + stats.add("numDocs", Long.valueOf(numDocs)); + } + @Override public String getDescription() { return "A Component for working with Term Enumerators"; diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java index ee6340d9f7e..80d407ad4f0 100644 --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java @@ -371,13 +371,14 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro filter = answer.getTopFilter(); } } + } else { + doCheck = false; } - + if (filter != null) { return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null)); } - final Terms terms = context.reader().terms(SolrRangeQuery.this.getField()); if (terms == null) { return segStates[context.ord] = new SegState((DocIdSet) null); diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java index 3b492a75dbe..ca3d756edff 100644 --- a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java +++ b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java @@ -16,6 +16,18 @@ */ package org.apache.solr.schema; +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; +import java.io.StringWriter; +import java.lang.invoke.MethodHandles; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + import org.apache.solr.cloud.ZkController; import org.apache.solr.cloud.ZkSolrResourceLoader; import org.apache.solr.common.SolrException; @@ -31,18 +43,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.InputSource; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.io.StringWriter; -import java.lang.invoke.MethodHandles; -import java.nio.charset.StandardCharsets; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; - import static java.util.Collections.singleton; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -108,7 +108,7 @@ public class SchemaManager { SolrCore core = req.getCore(); String errorMsg = "Unable to persist managed schema. "; while (!timeOut.hasTimedOut()) { - managedIndexSchema = getFreshManagedSchema(); + managedIndexSchema = getFreshManagedSchema(req.getCore()); for (CommandOperation op : operations) { OpType opType = OpType.get(op.name); if (opType != null) { @@ -131,9 +131,9 @@ public class SchemaManager { } try { - ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(), + int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(), managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true); - waitForOtherReplicasToUpdate(timeOut); + waitForOtherReplicasToUpdate(timeOut, latestVersion); core.setLatestSchema(managedIndexSchema); return Collections.emptyList(); } catch (ZkController.ResourceModifiedInZkException e) { @@ -155,7 +155,7 @@ public class SchemaManager { return singletonList(errorMsg + "Timed out."); } - private void waitForOtherReplicasToUpdate(TimeOut timeOut) { + private void waitForOtherReplicasToUpdate(TimeOut timeOut, int latestVersion) { CoreDescriptor cd = req.getCore().getCoreDescriptor(); String collection = cd.getCollectionName(); if (collection != null) { @@ -164,11 +164,8 @@ public class SchemaManager { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Not enough time left to update replicas. However, the schema is updated already."); } - ManagedIndexSchema.waitForSchemaZkVersionAgreement(collection, - cd.getCloudDescriptor().getCoreNodeName(), - (managedIndexSchema).getSchemaZkVersion(), - zkLoader.getZkController(), - (int) timeOut.timeLeft(TimeUnit.SECONDS)); + ManagedIndexSchema.waitForSchemaZkVersionAgreement(collection, cd.getCloudDescriptor().getCoreNodeName(), + latestVersion, zkLoader.getZkController(), (int) timeOut.timeLeft(TimeUnit.SECONDS)); } } @@ -423,21 +420,23 @@ public class SchemaManager { return sb.toString(); } - public ManagedIndexSchema getFreshManagedSchema() throws IOException, KeeperException, InterruptedException { - SolrResourceLoader resourceLoader = req.getCore().getResourceLoader(); + public static ManagedIndexSchema getFreshManagedSchema(SolrCore core) throws IOException, + KeeperException, InterruptedException { + + SolrResourceLoader resourceLoader = core.getResourceLoader(); + String name = core.getLatestSchema().getResourceName(); if (resourceLoader instanceof ZkSolrResourceLoader) { - InputStream in = resourceLoader.openResource(req.getSchema().getResourceName()); + InputStream in = resourceLoader.openResource(name); if (in instanceof ZkSolrResourceLoader.ZkByteArrayInputStream) { int version = ((ZkSolrResourceLoader.ZkByteArrayInputStream) in).getStat().getVersion(); log.info("managed schema loaded . version : {} ", version); - return new ManagedIndexSchema - (req.getCore().getSolrConfig(), req.getSchema().getResourceName(), new InputSource(in), - true, req.getSchema().getResourceName(), version, req.getSchema().getSchemaUpdateLock()); + return new ManagedIndexSchema(core.getSolrConfig(), name, new InputSource(in), true, name, version, + core.getLatestSchema().getSchemaUpdateLock()); } else { - return (ManagedIndexSchema) req.getCore().getLatestSchema(); + return (ManagedIndexSchema) core.getLatestSchema(); } } else { - return (ManagedIndexSchema) req.getCore().getLatestSchema(); + return (ManagedIndexSchema) core.getLatestSchema(); } } } diff --git a/solr/core/src/java/org/apache/solr/update/PeerSync.java b/solr/core/src/java/org/apache/solr/update/PeerSync.java index 9af5b1e7a6e..79f5ac9a2d7 100644 --- a/solr/core/src/java/org/apache/solr/update/PeerSync.java +++ b/solr/core/src/java/org/apache/solr/update/PeerSync.java @@ -26,6 +26,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.stream.Collectors; import org.apache.http.NoHttpResponseException; import org.apache.http.client.HttpClient; @@ -86,7 +87,7 @@ public class PeerSync { private SolrCore core; // comparator that sorts by absolute value, putting highest first - private static Comparator absComparator = (o1, o2) -> { + public static Comparator absComparator = (o1, o2) -> { long l1 = Math.abs(o1); long l2 = Math.abs(o2); if (l1 > l2) return -1; @@ -117,6 +118,8 @@ public class PeerSync { boolean doFingerprintComparison; List requestedUpdates; Exception updateException; + List requestedRanges; + long totalRequestedUpdates; } public PeerSync(SolrCore core, List replicas, int nUpdates) { @@ -359,6 +362,103 @@ public class PeerSync { } } + private boolean canHandleVersionRanges(String replica) { + SyncShardRequest sreq = new SyncShardRequest(); + requests.add(sreq); + + // determine if leader can handle version ranges + sreq.shards = new String[] {replica}; + sreq.actualShards = sreq.shards; + sreq.params = new ModifiableSolrParams(); + sreq.params.set("qt", "/get"); + sreq.params.set("distrib", false); + sreq.params.set("checkCanHandleVersionRanges", false); + + ShardHandler sh = shardHandlerFactory.getShardHandler(client); + sh.submit(sreq, replica, sreq.params); + + ShardResponse srsp = sh.takeCompletedIncludingErrors(); + Boolean canHandleVersionRanges = srsp.getSolrResponse().getResponse().getBooleanArg("canHandleVersionRanges"); + + if (canHandleVersionRanges == null || canHandleVersionRanges.booleanValue() == false) { + return false; + } + + return true; + } + + private boolean handleVersionsWithRanges(ShardResponse srsp, List otherVersions, SyncShardRequest sreq, + boolean completeList, long otherHigh, long otherHighest) { + // we may endup asking for updates for too many versions, causing 2MB post payload limit. Construct a range of + // versions to request instead of asking individual versions + List rangesToRequest = new ArrayList<>(); + + // construct ranges to request + // both ourUpdates and otherVersions are sorted with highest range first + // may be we can create another reverse the lists and avoid confusion + int ourUpdatesIndex = ourUpdates.size() - 1; + int otherUpdatesIndex = otherVersions.size() - 1; + long totalRequestedVersions = 0; + + while (otherUpdatesIndex >= 0) { + // we have run out of ourUpdates, pick up all the remaining versions from the other versions + if (ourUpdatesIndex < 0) { + String range = otherVersions.get(otherUpdatesIndex) + "..." + otherVersions.get(0); + rangesToRequest.add(range); + totalRequestedVersions += otherUpdatesIndex + 1; + break; + } + + // stop when the entries get old enough that reorders may lead us to see updates we don't need + if (!completeList && Math.abs(otherVersions.get(otherUpdatesIndex)) < ourLowThreshold) break; + + if (ourUpdates.get(ourUpdatesIndex).longValue() == otherVersions.get(otherUpdatesIndex).longValue()) { + ourUpdatesIndex--; + otherUpdatesIndex--; + } else if (Math.abs(ourUpdates.get(ourUpdatesIndex)) < Math.abs(otherVersions.get(otherUpdatesIndex))) { + ourUpdatesIndex--; + } else { + long rangeStart = otherVersions.get(otherUpdatesIndex); + while ((otherUpdatesIndex < otherVersions.size()) + && (Math.abs(otherVersions.get(otherUpdatesIndex)) < Math.abs(ourUpdates.get(ourUpdatesIndex)))) { + otherUpdatesIndex--; + totalRequestedVersions++; + } + // construct range here + rangesToRequest.add(rangeStart + "..." + otherVersions.get(otherUpdatesIndex + 1)); + } + } + + // TODO, do we really need to hold on to all the ranges we requested + // keeping track of totalRequestedUpdates should suffice for verification + sreq.requestedRanges = rangesToRequest; + sreq.totalRequestedUpdates = totalRequestedVersions; + + if (rangesToRequest.isEmpty()) { + log.info(msg() + " No additional versions requested. ourLowThreshold=" + ourLowThreshold + " otherHigh=" + + otherHigh + " ourHighest=" + ourHighest + " otherHighest=" + otherHighest); + + // we had (or already requested) all the updates referenced by the replica + + // If we requested updates from another replica, we can't compare fingerprints yet with this replica, we need to + // defer + if (doFingerprint) { + sreq.doFingerprintComparison = true; + } + + return true; + } + + if (totalRequestedVersions > maxUpdates) { + log.info(msg() + " Failing due to needing too many updates:" + maxUpdates); + return false; + } + + String rangesToRequestStr = rangesToRequest.stream().collect(Collectors.joining(",")); + return requestUpdates(srsp, rangesToRequestStr, totalRequestedVersions); + } + + private boolean handleVersions(ShardResponse srsp) { // we retrieved the last N updates from the replica List otherVersions = (List)srsp.getSolrResponse().getResponse().get("versions"); @@ -410,6 +510,15 @@ public class PeerSync { return true; } + if(core.getSolrConfig().useRangeVersionsForPeerSync && canHandleVersionRanges(sreq.shards[0])) { + return handleVersionsWithRanges(srsp, otherVersions, sreq, completeList, otherHigh, otherHighest); + } else { + return handleIndividualVersions(srsp, otherVersions, sreq, completeList, otherHigh, otherHighest); + } + } + + private boolean handleIndividualVersions(ShardResponse srsp, List otherVersions, SyncShardRequest sreq, + boolean completeList, long otherHigh, long otherHighest) { List toRequest = new ArrayList<>(); for (Long otherVersion : otherVersions) { // stop when the entries get old enough that reorders may lead us to see updates we don't need @@ -426,7 +535,10 @@ public class PeerSync { requestedUpdateSet.add(otherVersion); } + // TODO, do we really need to hold on to all the version numbers we requested. + // keeping track of totalRequestedUpdates should suffice for verification sreq.requestedUpdates = toRequest; + sreq.totalRequestedUpdates = toRequest.size(); if (toRequest.isEmpty()) { log.info(msg() + " No additional versions requested. ourLowThreshold="+ourLowThreshold + " otherHigh="+otherHigh+ " ourHighest=" + ourHighest + " otherHighest=" + otherHighest); @@ -446,7 +558,7 @@ public class PeerSync { return false; } - return requestUpdates(srsp, toRequest); + return requestUpdates(srsp, StrUtils.join(toRequest, ','), toRequest.size()); } private boolean compareFingerprint(SyncShardRequest sreq) { @@ -462,10 +574,10 @@ public class PeerSync { } } - private boolean requestUpdates(ShardResponse srsp, List toRequest) { + private boolean requestUpdates(ShardResponse srsp, String versionsAndRanges, long totalUpdates) { String replica = srsp.getShardRequest().shards[0]; - log.info(msg() + "Requesting updates from " + replica + "n=" + toRequest.size() + " versions=" + toRequest); + log.info(msg() + "Requesting updates from " + replica + "n=" + totalUpdates + " versions=" + versionsAndRanges); // reuse our original request object ShardRequest sreq = srsp.getShardRequest(); @@ -474,7 +586,7 @@ public class PeerSync { sreq.params = new ModifiableSolrParams(); sreq.params.set("qt", "/get"); sreq.params.set("distrib", false); - sreq.params.set("getUpdates", StrUtils.join(toRequest, ',')); + sreq.params.set("getUpdates", versionsAndRanges); sreq.params.set("onlyIfActive", onlyIfActive); sreq.responses.clear(); // needs to be zeroed for correct correlation to occur @@ -489,7 +601,7 @@ public class PeerSync { List updates = (List)srsp.getSolrResponse().getResponse().get("updates"); SyncShardRequest sreq = (SyncShardRequest) srsp.getShardRequest(); - if (updates.size() < sreq.requestedUpdates.size()) { + if (updates.size() < sreq.totalRequestedUpdates) { log.error(msg() + " Requested " + sreq.requestedUpdates.size() + " updates from " + sreq.shards[0] + " but retrieved " + updates.size()); return false; } diff --git a/solr/core/src/resources/EditableSolrConfigAttributes.json b/solr/core/src/resources/EditableSolrConfigAttributes.json index 394d63457a8..b0d6c2fee01 100644 --- a/solr/core/src/resources/EditableSolrConfigAttributes.json +++ b/solr/core/src/resources/EditableSolrConfigAttributes.json @@ -52,5 +52,8 @@ "multipartUploadLimitInKB":0, "formdataUploadLimitInKB":0, "enableRemoteStreaming":0, - "addHttpRequestToContext":0}} + "addHttpRequestToContext":0}}, + "peerSync":{ + "useRangeVersions":11 + } } \ No newline at end of file diff --git a/solr/core/src/resources/ImplicitPlugins.json b/solr/core/src/resources/ImplicitPlugins.json index 325bf913cee..58f6b79c209 100644 --- a/solr/core/src/resources/ImplicitPlugins.json +++ b/solr/core/src/resources/ImplicitPlugins.json @@ -104,6 +104,12 @@ "wt": "json", "distrib": false } + }, + "/terms": { + "class": "solr.SearchHandler", + "components": [ + "terms" + ] } } } \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml b/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml new file mode 100644 index 00000000000..6cb006a766b --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-psuedo-fields.xml @@ -0,0 +1,71 @@ + + + + + + + + + + + + + + + + + + + id + text + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml index 680f5723098..c6e2f958347 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml @@ -46,6 +46,10 @@ + + + ${solr.peerSync.useRangeVersions:false} + @@ -57,12 +61,12 @@ - ${solr.autoCommit.maxTime:-1} + ${solr.autoCommit.maxTime:-1} false - ${solr.autoSoftCommit.maxTime:-1} + ${solr.autoSoftCommit.maxTime:-1} diff --git a/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java b/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java index 4926dd463ac..af058d02f6f 100644 --- a/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java +++ b/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java @@ -114,7 +114,9 @@ public class MinimalSchemaTest extends SolrTestCaseJ4 { handler.startsWith("/export") || handler.startsWith("/graph") || handler.startsWith("/sql") || - handler.startsWith("/stream") + handler.startsWith("/stream") || + handler.startsWith("/terms") + ) { continue; } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java similarity index 70% rename from solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java rename to solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java index 5e35616db68..96faf923922 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestCloudBackupRestore.java +++ b/solr/core/src/test/org/apache/solr/cloud/AbstractCloudBackupRestoreTestCase.java @@ -1,3 +1,35 @@ +package org.apache.solr.cloud; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; +import java.util.TreeMap; + +import org.apache.lucene.util.TestUtil; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.CollectionAdminRequest.ClusterProp; +import org.apache.solr.client.solrj.response.RequestStatusState; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.ImplicitDocRouter; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.CoreAdminParams; +import org.junit.BeforeClass; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.solr.common.params.ShardParams._ROUTE_; + /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -15,58 +47,43 @@ * limitations under the License. */ -package org.apache.solr.cloud; - -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Random; -import java.util.TreeMap; - -import org.apache.lucene.util.TestUtil; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrClient; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.response.RequestStatusState; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.cloud.DocCollection; -import org.apache.solr.common.cloud.ImplicitDocRouter; -import org.apache.solr.common.cloud.Slice; -import org.junit.BeforeClass; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.solr.common.params.ShardParams._ROUTE_; - -public class TestCloudBackupRestore extends SolrCloudTestCase { - +/** + * This class implements the logic required to test Solr cloud backup/restore capability. + */ +public abstract class AbstractCloudBackupRestoreTestCase extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static final int NUM_SHARDS = 2;//granted we sometimes shard split to get more + protected static final int NUM_SHARDS = 2;//granted we sometimes shard split to get more private static long docsSeed; // see indexDocs() @BeforeClass public static void createCluster() throws Exception { - configureCluster(2)// nodes - .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) - .configure(); - docsSeed = random().nextLong(); } + /** + * @return The name of the collection to use. + */ + public abstract String getCollectionName(); + + /** + * @return The name of the backup repository to use. + */ + public abstract String getBackupRepoName(); + + /** + * @return The absolute path for the backup location. + * Could return null. + */ + public abstract String getBackupLocation(); + @Test public void test() throws Exception { - String collectionName = "backuprestore"; boolean isImplicit = random().nextBoolean(); int replFactor = TestUtil.nextInt(random(), 1, 2); CollectionAdminRequest.Create create = - CollectionAdminRequest.createCollection(collectionName, "conf1", NUM_SHARDS, replFactor); + CollectionAdminRequest.createCollection(getCollectionName(), "conf1", NUM_SHARDS, replFactor); if (NUM_SHARDS * replFactor > cluster.getJettySolrRunners().size() || random().nextBoolean()) { create.setMaxShardsPerNode(NUM_SHARDS);//just to assert it survives the restoration } @@ -90,24 +107,62 @@ public class TestCloudBackupRestore extends SolrCloudTestCase { CloudSolrClient solrClient = cluster.getSolrClient(); create.process(solrClient); - indexDocs(collectionName); + indexDocs(getCollectionName()); if (!isImplicit && random().nextBoolean()) { // shard split the first shard - int prevActiveSliceCount = getActiveSliceCount(collectionName); - CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(collectionName); + int prevActiveSliceCount = getActiveSliceCount(getCollectionName()); + CollectionAdminRequest.SplitShard splitShard = CollectionAdminRequest.splitShard(getCollectionName()); splitShard.setShardName("shard1"); splitShard.process(solrClient); // wait until we see one more active slice... - for (int i = 0; getActiveSliceCount(collectionName) != prevActiveSliceCount + 1; i++) { + for (int i = 0; getActiveSliceCount(getCollectionName()) != prevActiveSliceCount + 1; i++) { assertTrue(i < 30); Thread.sleep(500); } // issue a hard commit. Split shard does a soft commit which isn't good enough for the backup/snapshooter to see - solrClient.commit(collectionName); + solrClient.commit(getCollectionName()); } - testBackupAndRestore(collectionName); + testBackupAndRestore(getCollectionName()); + testInvalidPath(getCollectionName()); + } + + // This test verifies the system behavior when the backup location cluster property is configured with an invalid + // value for the specified repository (and the default backup location is not configured in solr.xml). + private void testInvalidPath(String collectionName) throws Exception { + // Execute this test only if the default backup location is NOT configured in solr.xml + if (getBackupLocation() == null) { + return; + } + + String backupName = "invalidbackuprequest"; + CloudSolrClient solrClient = cluster.getSolrClient(); + + ClusterProp req = CollectionAdminRequest.setClusterProperty(CoreAdminParams.BACKUP_LOCATION, "/location/does/not/exist"); + assertEquals(0, req.process(solrClient).getStatus()); + + // Do not specify the backup location. + CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName) + .setRepositoryName(getBackupRepoName()); + try { + backup.process(solrClient); + fail("This request should have failed since the cluster property value for backup location property is invalid."); + } catch (SolrServerException ex) { + assertTrue(ex.getCause() instanceof RemoteSolrException); + assertEquals(ErrorCode.SERVER_ERROR.code, ((RemoteSolrException)ex.getCause()).code()); + } + + String restoreCollectionName = collectionName + "_invalidrequest"; + CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName) + .setRepositoryName(getBackupRepoName()); + try { + restore.process(solrClient); + fail("This request should have failed since the cluster property value for backup location property is invalid."); + } catch (SolrServerException ex) { + assertTrue(ex.getCause() instanceof RemoteSolrException); + assertEquals(ErrorCode.SERVER_ERROR.code, ((RemoteSolrException)ex.getCause()).code()); + } } private int getActiveSliceCount(String collectionName) { @@ -134,6 +189,7 @@ public class TestCloudBackupRestore extends SolrCloudTestCase { } private void testBackupAndRestore(String collectionName) throws Exception { + String backupLocation = getBackupLocation(); String backupName = "mytestbackup"; CloudSolrClient client = cluster.getSolrClient(); @@ -142,13 +198,11 @@ public class TestCloudBackupRestore extends SolrCloudTestCase { Map origShardToDocCount = getShardToDocCountMap(client, backupCollection); assert origShardToDocCount.isEmpty() == false; - String location = createTempDir().toFile().getAbsolutePath(); - log.info("Triggering Backup command"); { CollectionAdminRequest.Backup backup = CollectionAdminRequest.backupCollection(collectionName, backupName) - .setLocation(location); + .setLocation(backupLocation).setRepositoryName(getBackupRepoName()); if (random().nextBoolean()) { assertEquals(0, backup.process(client).getStatus()); } else { @@ -163,7 +217,8 @@ public class TestCloudBackupRestore extends SolrCloudTestCase { { CollectionAdminRequest.Restore restore = CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName) - .setLocation(location); + .setLocation(backupLocation).setRepositoryName(getBackupRepoName()); + if (origShardToDocCount.size() > cluster.getJettySolrRunners().size()) { // may need to increase maxShardsPerNode (e.g. if it was shard split, then now we need more) restore.setMaxShardsPerNode(origShardToDocCount.size()); @@ -215,5 +270,4 @@ public class TestCloudBackupRestore extends SolrCloudTestCase { } return shardToDocCount; } - } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java new file mode 100644 index 00000000000..bf56821a614 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudPseudoReturnFields.java @@ -0,0 +1,836 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.cloud; + +import java.lang.invoke.MethodHandles; + +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; + +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.impl.HttpSolrClient; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.schema.SchemaRequest.Field; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.schema.SchemaResponse.FieldResponse; + +import org.apache.solr.cloud.SolrCloudTestCase; + +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; + +import org.apache.solr.search.TestPseudoReturnFields; + +import org.apache.lucene.util.TestUtil; + +import org.apache.commons.lang.StringUtils; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +/** @see TestPseudoReturnFields */ +public class TestCloudPseudoReturnFields extends SolrCloudTestCase { + + private static final String DEBUG_LABEL = MethodHandles.lookup().lookupClass().getName(); + private static final String COLLECTION_NAME = DEBUG_LABEL + "_collection"; + + /** A basic client for operations at the cloud level, default collection will be set */ + private static CloudSolrClient CLOUD_CLIENT; + /** One client per node */ + private static ArrayList CLIENTS = new ArrayList<>(5); + + @BeforeClass + private static void createMiniSolrCloudCluster() throws Exception { + // multi replicas should matter... + final int repFactor = usually() ? 1 : 2;; + // ... but we definitely want to ensure forwarded requests to other shards work ... + final int numShards = 2; + // ... including some forwarded requests from nodes not hosting a shard + final int numNodes = 1 + (numShards * repFactor); + + final String configName = DEBUG_LABEL + "_config-set"; + final Path configDir = Paths.get(TEST_HOME(), "collection1", "conf"); + + configureCluster(numNodes).addConfig(configName, configDir).configure(); + + Map collectionProperties = new HashMap<>(); + collectionProperties.put("config", "solrconfig-tlog.xml"); + collectionProperties.put("schema", "schema-psuedo-fields.xml"); + + assertNotNull(cluster.createCollection(COLLECTION_NAME, numShards, repFactor, + configName, null, null, collectionProperties)); + + CLOUD_CLIENT = cluster.getSolrClient(); + CLOUD_CLIENT.setDefaultCollection(COLLECTION_NAME); + + waitForRecoveriesToFinish(CLOUD_CLIENT); + + for (JettySolrRunner jetty : cluster.getJettySolrRunners()) { + CLIENTS.add(getHttpSolrClient(jetty.getBaseUrl() + "/" + COLLECTION_NAME + "/")); + } + + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "42", "val_i", "1", "ssto", "X", "subject", "aaa")).getStatus()); + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "43", "val_i", "9", "ssto", "X", "subject", "bbb")).getStatus()); + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "44", "val_i", "4", "ssto", "X", "subject", "aaa")).getStatus()); + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "45", "val_i", "6", "ssto", "X", "subject", "aaa")).getStatus()); + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")).getStatus()); + assertEquals(0, CLOUD_CLIENT.commit().getStatus());; + + // uncommitted doc in transaction log + assertEquals(0, CLOUD_CLIENT.add(sdoc("id", "99", "val_i", "1", "ssto", "X", + "subject", "uncommitted")).getStatus()); + } + + @AfterClass + private static void afterClass() throws Exception { + CLOUD_CLIENT.close(); CLOUD_CLIENT = null; + for (HttpSolrClient client : CLIENTS) { + client.close(); + } + CLIENTS = null; + } + + public void testMultiValued() throws Exception { + // the response writers used to consult isMultiValued on the field + // but this doesn't work when you alias a single valued field to + // a multi valued field (the field value is copied first, then + // if the type lookup is done again later, we get the wrong thing). SOLR-4036 + + // score as psuedo field - precondition checks + for (String name : new String[] {"score", "val_ss"}) { + try { + FieldResponse frsp = new Field(name, params("includeDynamic","true", + "showDefaults","true")).process(CLOUD_CLIENT); + assertNotNull("Test depends on a (dynamic) field matching '"+name+"', Null response", frsp); + assertEquals("Test depends on a (dynamic) field matching '"+name+"', bad status: " + frsp.toString(), + 0, frsp.getStatus()); + assertNotNull("Test depends on a (dynamic) field matching '"+name+ + "', schema was changed out from under us? ... " + frsp.toString(), frsp.getField()); + assertEquals("Test depends on a multivalued dynamic field matching '"+name+ + "', schema was changed out from under us? ... " + frsp.toString(), + Boolean.TRUE, frsp.getField().get("multiValued")); + } catch (SolrServerException e) { + assertEquals("Couldn't fetch field for '"+name+"' ... schema changed out from under us?", + null, e); + } + } + + SolrDocument doc = null; + + // score as psuedo field + doc = assertSearchOneDoc(params("q","*:*", "fq", "id:42", "fl","id,score,val_ss,val2_ss")); + assertEquals("42", doc.getFieldValue("id")); + assertEquals(1.0F, doc.getFieldValue("score")); + assertEquals(""+doc, 2, doc.size()); // no value for val2_ss or val_ss ... yet... + + // TODO: update this test & TestPseudoReturnFields to index docs using a (multivalued) "val_ss" instead of "ssto" + // + // that way we can first sanity check a single value in a multivalued field is returned correctly + // as a "List" of one element, *AND* then we could be testing that a (single valued) psuedo-field correctly + // overrides that actual (real) value in a multivalued field (ie: not returning a an List) + // + // (NOTE: not doing this yet due to how it will impact most other tests, many of which are currently + // @AwaitsFix'ed) + // + //assertTrue(doc.getFieldValue("val_ss").getClass().toString(), + // doc.getFieldValue("val_ss") instanceof List); + + // single value int using alias that matches multivalued dynamic field + doc = assertSearchOneDoc(params("q","id:42", "fl","val_ss:val_i, val2_ss:10")); + assertEquals(""+doc, 2, doc.size()); + assertEquals(""+doc, 1, doc.getFieldValue("val_ss")); + assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss")); + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testMultiValuedRTG() throws Exception { + SolrDocument doc = null; + + // check same results as testMultiValued via RTG (committed doc) + doc = getRandClient(random()).getById("42", params("fl","val_ss:val_i, val2_ss:10, subject")); + assertEquals(""+doc, 2, doc.size()); + assertEquals(""+doc, 1, doc.getFieldValue("val_ss")); + assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss")); + assertEquals(""+doc, "aaa", doc.getFieldValue("subject")); + + // also check real-time-get from transaction log (uncommitted doc) + doc = getRandClient(random()).getById("99", params("fl","val_ss:val_i, val2_ss:10, subject")); + assertEquals(""+doc, 3, doc.size()); + assertEquals(""+doc, 1, doc.getFieldValue("val_ss")); + assertEquals(""+doc, 10L, doc.getFieldValue("val2_ss")); + assertEquals(""+doc, "uncommitted", doc.getFieldValue("subject")); + } + + public void testAllRealFields() throws Exception { + + for (String fl : TestPseudoReturnFields.ALL_REAL_FIELDS) { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl",fl)); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(fl + " => " + doc, 4, doc.size()); + assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + } + } + } + + public void testAllRealFieldsRTG() throws Exception { + // shouldn't matter if we use RTG (committed or otherwise) + for (String fl : TestPseudoReturnFields.ALL_REAL_FIELDS) { + for (int i : Arrays.asList(42, 43, 44, 45, 46, 99)) { + SolrDocument doc = getRandClient(random()).getById(""+i, params("fl",fl)); + assertEquals(fl + " => " + doc, 4, doc.size()); + assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + + } + } + } + + public void testScoreAndAllRealFields() throws Exception { + for (String fl : TestPseudoReturnFields.SCORE_AND_REAL_FIELDS) { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl",fl)); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(fl + " => " + doc, 5, doc.size()); + assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("score") instanceof Float); + assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + } + } + } + + public void testScoreAndAllRealFieldsRTG() throws Exception { + // also shouldn't matter if we use RTG (committed or otherwise) .. score should be ignored + for (String fl : TestPseudoReturnFields.SCORE_AND_REAL_FIELDS) { + for (int i : Arrays.asList(42, 43, 44, 45, 46, 99)) { + SolrDocument doc = getRandClient(random()).getById(""+i, params("fl",fl)); + assertEquals(fl + " => " + doc, 4, doc.size()); + assertTrue(fl + " => " + doc, doc.getFieldValue("id") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(fl + " => " + doc, doc.getFieldValue("subject") instanceof String); + assertTrue(fl + " => " + doc, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + } + } + } + + public void testScoreAndExplicitRealFields() throws Exception { + + SolrDocumentList docs = null; + SolrDocument doc = null; + + for (SolrParams p : Arrays.asList(params("q","*:*", "rows", "1", "fl","score,val_i"), + params("q","*:*", "rows", "1", "fl","score", "fl","val_i"))) { + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + doc = docs.get(0); // doesn't really matter which one + assertEquals(p + " => " + doc, 2, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(p + " => " + doc, doc.getFieldValue("score") instanceof Float); + } + + docs = assertSearch(params("q","*:*", "rows", "1", "fl","val_i")); + assertEquals("" + docs, 5, docs.getNumFound()); + doc = docs.get(0); // doesn't really matter which one + assertEquals("" + doc, 1, doc.size()); + assertTrue("" + doc, doc.getFieldValue("val_i") instanceof Integer); + } + + public void testScoreAndExplicitRealFieldsRTG() throws Exception { + SolrDocumentList docs = null; + SolrDocument doc = null; + + // shouldn't matter if we use RTG (committed or otherwise) .. score should be ignored + for (int i : Arrays.asList(42, 43, 44, 45, 46, 99)) { + for (SolrParams p : Arrays.asList(params("fl","score,val_i"), + params("fl","score", "fl","val_i"))) { + doc = getRandClient(random()).getById(""+i, p); + assertEquals(p + " => " + doc, 1, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + } + } + } + + public void testFunctions() throws Exception { + + SolrDocumentList docs = assertSearch(params("q","*:*","rows","1","fl","log(val_i)")); + assertEquals(""+docs, 5, docs.getNumFound()); + SolrDocument doc = docs.get(0); // doesn't really matter which one + assertEquals(""+doc, 1, doc.size()); + assertTrue(""+doc, doc.getFieldValue("log(val_i)") instanceof Double); + + for (SolrParams p : Arrays.asList(params("q","*:*", "rows", "1", "fl","log(val_i),abs(val_i)"), + params("q","*:*", "rows", "1", "fl","log(val_i)", "fl","abs(val_i)"))) { + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + doc = docs.get(0); // doesn't really matter which one + assertEquals(p + " => " + doc, 2, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("log(val_i)") instanceof Double); + assertTrue(p + " => " + doc, doc.getFieldValue("abs(val_i)") instanceof Float); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testFunctionsRTG() throws Exception { + // if we use RTG (committed or otherwise) functions should behave the same + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("fl","log(val_i),abs(val_i)"), + params("fl","log(val_i)","fl", "abs(val_i)"))) { + SolrDocument doc = getRandClient(random()).getById(id, p); + String msg = id + "," + p + " => " + doc; + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("log(val_i)") instanceof Double); + assertTrue(msg, doc.getFieldValue("abs(val_i)") instanceof Float); + // true for both these specific docs + assertEquals(msg, 0.0D, doc.getFieldValue("log(val_i)")); + assertEquals(msg, 1.0F, doc.getFieldValue("abs(val_i)")); + } + } + } + + public void testFunctionsAndExplicit() throws Exception { + for (SolrParams p : Arrays.asList(params("q","*:*", "rows", "1", "fl","log(val_i),val_i"), + params("q","*:*", "rows", "1", "fl","log(val_i)", "fl","val_i"))) { + SolrDocumentList docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + SolrDocument doc = docs.get(0); // doesn't really matter which one + assertEquals(p + " => " + doc, 2, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("log(val_i)") instanceof Double); + assertTrue(p + " => " + doc, doc.getFieldValue("val_i") instanceof Integer); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testFunctionsAndExplicitRTG() throws Exception { + // shouldn't matter if we use RTG (committed or otherwise) + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("fl","log(val_i),val_i"), + params("fl","log(val_i)","fl","val_i"))) { + SolrDocument doc = getRandClient(random()).getById(id, p); + String msg = id + "," + p + " => " + doc; + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("log(val_i)") instanceof Double); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + // true for both these specific docs + assertEquals(msg, 0.0D, doc.getFieldValue("log(val_i)")); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + } + } + } + + + public void testFunctionsAndScore() throws Exception { + + for (SolrParams p : Arrays.asList(params("fl","log(val_i),score"), + params("fl","log(val_i)","fl","score"))) { + SolrDocumentList docs = assertSearch(SolrParams.wrapDefaults(p, params("q", "*:*", "rows", "10"))); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(p + " => " + doc, 2, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("score") instanceof Float); + assertTrue(p + " => " + doc, doc.getFieldValue("log(val_i)") instanceof Double); + } + } + for (SolrParams p : Arrays.asList(params("fl","log(val_i),abs(val_i),score"), + params("fl","log(val_i),abs(val_i)","fl","score"), + params("fl","log(val_i)","fl","abs(val_i),score"), + params("fl","log(val_i)","fl","abs(val_i)","fl","score"))) { + SolrDocumentList docs = assertSearch(SolrParams.wrapDefaults(p, params("q", "*:*", "rows", "10"))); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(p + " => " + doc, 3, doc.size()); + assertTrue(p + " => " + doc, doc.getFieldValue("score") instanceof Float); + assertTrue(p + " => " + doc, doc.getFieldValue("abs(val_i)") instanceof Float); + assertTrue(p + " => " + doc, doc.getFieldValue("log(val_i)") instanceof Double); + } + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testFunctionsAndScoreRTG() throws Exception { + + // if we use RTG (committed or otherwise) score should be ignored + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("fl","score","fl","log(val_i)","fl","abs(val_i)"), + params("fl","score","fl","log(val_i),abs(val_i)"), + params("fl","score,log(val_i)","fl","abs(val_i)"), + params("fl","score,log(val_i),abs(val_i)"))) { + SolrDocument doc = getRandClient(random()).getById(id, p); + String msg = id + "," + p + " => " + doc; + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("log(val_i)") instanceof Double); + assertTrue(msg, doc.getFieldValue("abs(val_i)") instanceof Float); + // true for both these specific docs + assertEquals(msg, 0.0D, doc.getFieldValue("log(val_i)")); + assertEquals(msg, 1.0F, doc.getFieldValue("abs(val_i)")); + } + } + } + + public void testGlobs() throws Exception { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl","val_*")); + assertEquals(5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(doc.toString(), 1, doc.size()); + assertTrue(doc.toString(), doc.getFieldValue("val_i") instanceof Integer); + } + for (SolrParams p : Arrays.asList(params("q", "*:*", "rows", "10", "fl","val_*,subj*,ss*"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*,ss*"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*","fl","ss*"))) { + docs = assertSearch(p); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + assertTrue(msg, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + assertEquals(msg, "X", doc.getFieldValue("ssto")); + } + } + } + + public void testGlobsRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + + SolrDocument doc = getRandClient(random()).getById(id, params("fl","val_*")); + String msg = id + ": fl=val_* => " + doc; + assertEquals(msg, 1, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,ss*"), + params("fl","val_*","fl","subj*,ss*"))) { + doc = getRandClient(random()).getById(id, p); + msg = id + ": " + p + " => " + doc; + + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + // NOTE: 'subject' is diff between two docs + assertTrue(msg, doc.getFieldValue("ssto") instanceof String); // TODO: val_ss: List + assertEquals(msg, "X", doc.getFieldValue("ssto")); + } + } + } + + public void testGlobsAndExplicit() throws Exception { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl","val_*,id")); + assertEquals(5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(doc.toString(), 2, doc.size()); + assertTrue(doc.toString(), doc.getFieldValue("val_i") instanceof Integer); + assertTrue(doc.toString(), doc.getFieldValue("id") instanceof String); + } + + for (SolrParams p : Arrays.asList(params("q", "*:*", "rows", "10", "fl","val_*,subj*,id"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*","fl","id"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*,id"))) { + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + } + } + } + + public void testGlobsAndExplicitRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + SolrDocument doc = getRandClient(random()).getById(id, params("fl","val_*,id")); + String msg = id + ": fl=val_*,id => " + doc; + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,id"), + params("fl","val_*","fl","subj*","fl","id"), + params("fl","val_*","fl","subj*,id"))) { + doc = getRandClient(random()).getById(id, p); + msg = id + ": " + p + " => " + doc; + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + } + } + } + + public void testGlobsAndScore() throws Exception { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl","val_*,score")); + assertEquals(5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(doc.toString(), 2, doc.size()); + assertTrue(doc.toString(), doc.getFieldValue("val_i") instanceof Integer); + assertTrue(doc.toString(), doc.getFieldValue("score") instanceof Float); + } + + for (SolrParams p : Arrays.asList(params("q", "*:*", "rows", "10", "fl","val_*,subj*,score"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*","fl","score"), + params("q", "*:*", "rows", "10", "fl","val_*","fl","subj*,score"))) { + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + assertTrue(msg, doc.getFieldValue("score") instanceof Float); + } + } + } + + public void testGlobsAndScoreRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted, score should be ignored + for (String id : Arrays.asList("42","99")) { + SolrDocument doc = getRandClient(random()).getById(id, params("fl","val_*,score")); + String msg = id + ": fl=val_*,score => " + doc; + assertEquals(msg, 1, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,score"), + params("fl","val_*","fl","subj*","fl","score"), + params("fl","val_*","fl","subj*,score"))) { + doc = getRandClient(random()).getById(id, p); + msg = id + ": " + p + " => " + doc; + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + } + } + } + + public void testAugmenters() throws Exception { + SolrDocumentList docs = assertSearch(params("q", "*:*", "rows", "10", "fl","[docid]")); + assertEquals(5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + assertEquals(doc.toString(), 1, doc.size()); + assertTrue(doc.toString(), doc.getFieldValue("[docid]") instanceof Integer); + } + + for (SolrParams p : Arrays.asList(params("q","*:*", "fl","[docid],[shard],[explain],x_alias:[value v=10 t=int]"), + params("q","*:*", "fl","[docid],[shard]","fl","[explain],x_alias:[value v=10 t=int]"), + params("q","*:*", "fl","[docid]","fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 4, doc.size()); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("[shard]") instanceof String); + assertTrue(msg, doc.getFieldValue("[explain]") instanceof String); + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + } + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9289") + public void testDocIdAugmenterRTG() throws Exception { + // NOTE: once this test is fixed to pass, testAugmentersRTG should also be updated to test [docid] + + // TODO: in single node, [docid] is silently ignored for uncommited docs (see SOLR-9288) ... + // here we see even more confusing: [docid] is silently ignored for both committed & uncommited docs + + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + SolrDocument doc = getRandClient(random()).getById(id, params("fl","[docid]")); + String msg = id + ": fl=[docid] => " + doc; + assertEquals(msg, 1, doc.size()); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testAugmentersRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + for (SolrParams p : Arrays.asList(params("fl","[shard],[explain],x_alias:[value v=10 t=int]"), + params("fl","[shard]","fl","[explain],x_alias:[value v=10 t=int]"), + params("fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + + SolrDocument doc = getRandClient(random()).getById(id, p); + String msg = id + ": " + p + " => " + doc; + + assertEquals(msg, 2, doc.size()); + // assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO + assertTrue(msg, doc.getFieldValue("[shard]") instanceof String); + // RTG: [explain] should be ignored + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + } + } + } + + public void testAugmentersAndExplicit() throws Exception { + for (SolrParams p : Arrays.asList(params("q", "*:*", "fl","id,[docid],[explain],x_alias:[value v=10 t=int]"), + params("q", "*:*", "fl","id","fl","[docid],[explain],x_alias:[value v=10 t=int]"), + params("q", "*:*", "fl","id","fl","[docid]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + SolrDocumentList docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 4, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("[explain]") instanceof String); + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + } + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testAugmentersAndExplicitRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + for (SolrParams p : Arrays.asList(params("fl","id,[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + SolrDocument doc = getRandClient(random()).getById(id, p); + String msg = id + ": " + p + " => " + doc; + + assertEquals(msg, 2, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + // assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO + // RTG: [explain] should be missing (ignored) + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + } + } + } + + public void testAugmentersAndScore() throws Exception { + SolrParams params = params("q","*:*", "fl","[docid],x_alias:[value v=10 t=int],score"); + SolrDocumentList docs = assertSearch(params); + assertEquals(params + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = params + " => " + doc; + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + assertTrue(msg, doc.getFieldValue("score") instanceof Float); + } + for (SolrParams p : Arrays.asList(params("q","*:*","fl","[docid],x_alias:[value v=10 t=int],[explain],score"), + params("q","*:*","fl","[docid]","fl","x_alias:[value v=10 t=int],[explain]","fl","score"), + params("q","*:*","fl","[docid]","fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) { + + docs = assertSearch(p); + assertEquals(p + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = p + " => " + doc; + assertEquals(msg, 4, doc.size()); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + assertTrue(msg, doc.getFieldValue("[explain]") instanceof String); + assertTrue(msg, doc.getFieldValue("score") instanceof Float); + } + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9286") + public void testAugmentersAndScoreRTG() throws Exception { + // if we use RTG (committed or otherwise) score should be ignored + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + SolrDocument doc = getRandClient(random()).getById(id, params("fl","x_alias:[value v=10 t=int],score")); + String msg = id + " => " + doc; + + assertEquals(msg, 1, doc.size()); + // assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + + for (SolrParams p : Arrays.asList(params("fl","x_alias:[value v=10 t=int],[explain],score"), + params("fl","x_alias:[value v=10 t=int],[explain]","fl","score"), + params("fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) { + + doc = getRandClient(random()).getById(id, p); + msg = id + ": " + p + " => " + doc; + + assertEquals(msg, 1, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + // assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO + assertTrue(msg, doc.getFieldValue("x_alias") instanceof Integer); + assertEquals(msg, 10, doc.getFieldValue("x_alias")); + // RTG: [explain] and score should be missing (ignored) + } + } + } + + public void testAugmentersGlobsExplicitAndScoreOhMy() throws Exception { + Random random = random(); + + // NOTE: 'ssto' is the missing one + final List fl = Arrays.asList + ("id","[docid]","[explain]","score","val_*","subj*"); + + final int iters = atLeast(random, 10); + for (int i = 0; i< iters; i++) { + + Collections.shuffle(fl, random); + + final SolrParams singleFl = params("q","*:*", "rows", "1","fl",StringUtils.join(fl.toArray(),',')); + final ModifiableSolrParams multiFl = params("q","*:*", "rows", "1"); + for (String item : fl) { + multiFl.add("fl",item); + } + for (SolrParams params : Arrays.asList(singleFl, multiFl)) { + SolrDocumentList docs = assertSearch(params); + assertEquals(params + " => " + docs, 5, docs.getNumFound()); + // shouldn't matter what doc we pick... + for (SolrDocument doc : docs) { + String msg = params + " => " + doc; + assertEquals(msg, 6, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); + assertTrue(msg, doc.getFieldValue("[explain]") instanceof String); + assertTrue(msg, doc.getFieldValue("score") instanceof Float); + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + } + } + } + } + + public void testAugmentersGlobsExplicitAndScoreOhMyRTG() throws Exception { + Random random = random(); + + // NOTE: 'ssto' is the missing one + final List fl = Arrays.asList + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + ("id","[explain]","score","val_*","subj*"); + + final int iters = atLeast(random, 10); + for (int i = 0; i< iters; i++) { + + Collections.shuffle(fl, random); + + final SolrParams singleFl = params("fl",StringUtils.join(fl.toArray(),',')); + final ModifiableSolrParams multiFl = params(); + for (String item : fl) { + multiFl.add("fl",item); + } + + // RTG behavior should be consistent, (committed or otherwise) + for (String id : Arrays.asList("42","99")) { + for (SolrParams params : Arrays.asList(singleFl, multiFl)) { + SolrDocument doc = getRandClient(random()).getById(id, params); + String msg = id + ": " + params + " => " + doc; + + assertEquals(msg, 3, doc.size()); + assertTrue(msg, doc.getFieldValue("id") instanceof String); + // assertTrue(msg, doc.getFieldValue("[docid]") instanceof Integer); // TODO + assertTrue(msg, doc.getFieldValue("val_i") instanceof Integer); + assertEquals(msg, 1, doc.getFieldValue("val_i")); + assertTrue(msg, doc.getFieldValue("subject") instanceof String); + // RTG: [explain] and score should be missing (ignored) + } + } + } + } + + + + /** + * Given a set of query params, executes as a Query against a random SolrClient and + * asserts that exactly one document is returned + */ + public static SolrDocument assertSearchOneDoc(SolrParams p) throws Exception { + SolrDocumentList docs = assertSearch(p); + assertEquals("does not match exactly one doc: " + p.toString() + " => " + docs.toString(), + 1, docs.getNumFound()); + assertEquals("does not contain exactly one doc: " + p.toString() + " => " + docs.toString(), + 1, docs.size()); + return docs.get(0); + } + + /** + * Given a set of query params, executes as a Query against a random SolrClient and + * asserts that at least 1 doc is matched and at least 1 doc is returned + */ + public static SolrDocumentList assertSearch(SolrParams p) throws Exception { + QueryResponse rsp = getRandClient(random()).query(p); + assertEquals("failed request: " + p.toString() + " => " + rsp.toString(), 0, rsp.getStatus()); + assertTrue("does not match at least one doc: " + p.toString() + " => " + rsp.toString(), + 1 <= rsp.getResults().getNumFound()); + assertTrue("rsp does not contain at least one doc: " + p.toString() + " => " + rsp.toString(), + 1 <= rsp.getResults().size()); + return rsp.getResults(); + } + + /** + * returns a random SolrClient -- either a CloudSolrClient, or an HttpSolrClient pointed + * at a node in our cluster + */ + public static SolrClient getRandClient(Random rand) { + int numClients = CLIENTS.size(); + int idx = TestUtil.nextInt(rand, 0, numClients); + return (idx == numClients) ? CLOUD_CLIENT : CLIENTS.get(idx); + } + + public static void waitForRecoveriesToFinish(CloudSolrClient client) throws Exception { + assert null != client.getDefaultCollection(); + AbstractDistribZkTestBase.waitForRecoveriesToFinish(client.getDefaultCollection(), + client.getZkStateReader(), + true, true, 330); + } + +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestHdfsCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/TestHdfsCloudBackupRestore.java new file mode 100644 index 00000000000..a09fc2f5692 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestHdfsCloudBackupRestore.java @@ -0,0 +1,148 @@ +package org.apache.solr.cloud; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.net.URI; +import java.net.URISyntaxException; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; +import org.apache.solr.cloud.hdfs.HdfsTestUtil; +import org.apache.solr.util.BadHdfsThreadsFilter; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This class implements the tests for HDFS integration for Solr backup/restore capability. + */ +@ThreadLeakFilters(defaultFilters = true, filters = { + BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) +}) +public class TestHdfsCloudBackupRestore extends AbstractCloudBackupRestoreTestCase { + public static final String SOLR_XML = "\n" + + "\n" + + " ${shareSchema:false}\n" + + " ${configSetBaseDir:configsets}\n" + + " ${coreRootDirectory:.}\n" + + "\n" + + " \n" + + " ${urlScheme:}\n" + + " ${socketTimeout:90000}\n" + + " ${connTimeout:15000}\n" + + " \n" + + "\n" + + " \n" + + " 127.0.0.1\n" + + " ${hostPort:8983}\n" + + " ${hostContext:solr}\n" + + " ${solr.zkclienttimeout:30000}\n" + + " ${genericCoreNodeNames:true}\n" + + " 10000\n" + + " ${distribUpdateConnTimeout:45000}\n" + + " ${distribUpdateSoTimeout:340000}\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " ${solr.hdfs.default.backup.path}\n" + + " ${solr.hdfs.home:}\n" + + " ${solr.hdfs.confdir:}\n" + + " \n" + + " \n" + + " \n" + + "\n"; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static MiniDFSCluster dfsCluster; + private static String hdfsUri; + private static FileSystem fs; + + @BeforeClass + public static void setupClass() throws Exception { + dfsCluster = HdfsTestUtil.setupClass(createTempDir().toFile().getAbsolutePath()); + hdfsUri = HdfsTestUtil.getURI(dfsCluster); + try { + URI uri = new URI(hdfsUri); + Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster); + conf.setBoolean("fs.hdfs.impl.disable.cache", true); + fs = FileSystem.get(uri, conf); + + if (fs instanceof DistributedFileSystem) { + // Make sure dfs is not in safe mode + while (((DistributedFileSystem) fs).setSafeMode(SafeModeAction.SAFEMODE_GET, true)) { + log.warn("The NameNode is in SafeMode - Solr will wait 5 seconds and try again."); + try { + Thread.sleep(5000); + } catch (InterruptedException e) { + Thread.interrupted(); + // continue + } + } + } + + fs.mkdirs(new org.apache.hadoop.fs.Path("/backup")); + } catch (IOException | URISyntaxException e) { + throw new RuntimeException(e); + } + + System.setProperty("solr.hdfs.default.backup.path", "/backup"); + System.setProperty("solr.hdfs.home", hdfsUri + "/solr"); + useFactory("solr.StandardDirectoryFactory"); + + configureCluster(NUM_SHARDS)// nodes + .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) + .withSolrXml(SOLR_XML) + .configure(); + } + + @AfterClass + public static void teardownClass() throws Exception { + System.clearProperty("solr.hdfs.home"); + System.clearProperty("solr.hdfs.default.backup.path"); + System.clearProperty("test.build.data"); + System.clearProperty("test.cache.data"); + IOUtils.closeQuietly(fs); + fs = null; + HdfsTestUtil.teardownClass(dfsCluster); + dfsCluster = null; + } + + @Override + public String getCollectionName() { + return "hdfsbackuprestore"; + } + + @Override + public String getBackupRepoName() { + return "hdfs"; + } + + @Override + public String getBackupLocation() { + return null; + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/TestLocalFSCloudBackupRestore.java b/solr/core/src/test/org/apache/solr/cloud/TestLocalFSCloudBackupRestore.java new file mode 100644 index 00000000000..6f3e2bc8db0 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/TestLocalFSCloudBackupRestore.java @@ -0,0 +1,50 @@ +package org.apache.solr.cloud; + +import org.junit.BeforeClass; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This class implements the tests for local file-system integration for Solr backup/restore capability. + * Note that the Solr backup/restore still requires a "shared" file-system. Its just that in this case + * such file-system would be exposed via local file-system API. + */ +public class TestLocalFSCloudBackupRestore extends AbstractCloudBackupRestoreTestCase { + + @BeforeClass + public static void setupClass() throws Exception { + configureCluster(NUM_SHARDS)// nodes + .addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) + .configure(); + } + + @Override + public String getCollectionName() { + return "backuprestore"; + } + + @Override + public String getBackupRepoName() { + return null; + } + + @Override + public String getBackupLocation() { + return createTempDir().toFile().getAbsolutePath(); + } +} diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java index 049d5e72145..75dbf0c65a7 100644 --- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java +++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java @@ -98,6 +98,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 { ++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler"); ++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler"); ++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler"); + ++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler"); ++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler"); ++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler"); ++ihCount; assertEquals(pathToClassMap.get("/schema"), "solr.SchemaHandler"); diff --git a/solr/core/src/test/org/apache/solr/core/TestBackupRepositoryFactory.java b/solr/core/src/test/org/apache/solr/core/TestBackupRepositoryFactory.java index 81d3c40cf67..a03d4c4d7bf 100644 --- a/solr/core/src/test/org/apache/solr/core/TestBackupRepositoryFactory.java +++ b/solr/core/src/test/org/apache/solr/core/TestBackupRepositoryFactory.java @@ -21,9 +21,9 @@ import java.io.File; import java.util.HashMap; import java.util.Map; +import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; -import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.core.backup.repository.BackupRepository; import org.apache.solr.core.backup.repository.BackupRepositoryFactory; @@ -37,8 +37,6 @@ import org.junit.rules.ExpectedException; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; -import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; - public class TestBackupRepositoryFactory extends SolrTestCaseJ4 { @Rule public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule()); @@ -129,7 +127,7 @@ public class TestBackupRepositoryFactory extends SolrTestCaseJ4 { attrs.put(CoreAdminParams.NAME, "repo1"); attrs.put(FieldType.CLASS_NAME, LocalFileSystemRepository.class.getName()); attrs.put("default" , "true"); - attrs.put(ZkStateReader.BACKUP_LOCATION, "/tmp"); + attrs.put("location", "/tmp"); plugins[0] = new PluginInfo("repository", attrs); } @@ -139,14 +137,14 @@ public class TestBackupRepositoryFactory extends SolrTestCaseJ4 { BackupRepository repo = f.newInstance(loader); assertTrue(repo instanceof LocalFileSystemRepository); - assertEquals("/tmp", repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION)); + assertEquals("/tmp", repo.getConfigProperty("location")); } { BackupRepository repo = f.newInstance(loader, "repo1"); assertTrue(repo instanceof LocalFileSystemRepository); - assertEquals("/tmp", repo.getConfigProperty(ZkStateReader.BACKUP_LOCATION)); + assertEquals("/tmp", repo.getConfigProperty("location")); } } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java index bcd2f258f39..951cd88cb7b 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java @@ -30,10 +30,10 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase @Test public void test() throws Exception { del("*:*"); - index(id, 18, "b_t", "snake spider shark snail slug seal"); - index(id, 19, "b_t", "snake spider shark snail slug"); - index(id, 20, "b_t", "snake spider shark snail"); - index(id, 21, "b_t", "snake spider shark"); + index(id, 18, "b_t", "snake spider shark snail slug seal", "foo_i", "1"); + index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2"); + index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3"); + index(id, 21, "b_t", "snake spider shark", "foo_i", "2"); index(id, 22, "b_t", "snake spider"); index(id, 23, "b_t", "snake"); index(id, 24, "b_t", "ant zebra"); @@ -49,5 +49,10 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index"); query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index"); query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index"); + query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra, ant, bad"); + query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1"); + query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1"); + + } } diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java index 934a6324e02..0974524fad0 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java @@ -32,9 +32,9 @@ public class TermsComponentTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeTest() throws Exception { System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ - initCore("solrconfig.xml","schema12.xml"); + initCore("solrconfig.xml", "schema12.xml"); - assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i","1"))); + assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i", "1"))); assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa", "foo_i","1"))); assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa", "foo_i","2"))); assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "aaa", "standardfilt", "abbb"))); @@ -45,7 +45,10 @@ public class TermsComponentTest extends SolrTestCaseJ4 { assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "baa", "standardfilt", "cccc"))); assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "bbb", "standardfilt", "ccccc"))); + assertNull(h.validateUpdate(adoc("id", "10", "standardfilt", "ddddd"))); + + assertNull(h.validateUpdate(commit())); assertNull(h.validateUpdate(adoc("id", "11", "standardfilt", "ddddd"))); assertNull(h.validateUpdate(adoc("id", "12", "standardfilt", "ddddd"))); assertNull(h.validateUpdate(adoc("id", "13", "standardfilt", "ddddd"))); @@ -53,6 +56,8 @@ public class TermsComponentTest extends SolrTestCaseJ4 { assertNull(h.validateUpdate(adoc("id", "15", "standardfilt", "d"))); assertNull(h.validateUpdate(adoc("id", "16", "standardfilt", "d"))); + assertNull(h.validateUpdate(commit())); + assertNull(h.validateUpdate(adoc("id", "17", "standardfilt", "snake"))); assertNull(h.validateUpdate(adoc("id", "18", "standardfilt", "spider"))); assertNull(h.validateUpdate(adoc("id", "19", "standardfilt", "shark"))); @@ -137,13 +142,13 @@ public class TermsComponentTest extends SolrTestCaseJ4 { @Test public void testRegexpWithFlags() throws Exception { // TODO: there are no uppercase or mixed-case terms in the index! - assertQ(req("indent","true", "qt","/terms", "terms","true", - "terms.fl","standardfilt", - "terms.lower","a", "terms.lower.incl","false", - "terms.upper","c", "terms.upper.incl","true", - "terms.regex","B.*", - "terms.regex.flag","case_insensitive") - ,"count(//lst[@name='standardfilt']/*)=3" + assertQ(req("indent", "true", "qt", "/terms", "terms", "true", + "terms.fl", "standardfilt", + "terms.lower", "a", "terms.lower.incl", "false", + "terms.upper", "c", "terms.upper.incl", "true", + "terms.regex", "B.*", + "terms.regex.flag", "case_insensitive") + , "count(//lst[@name='standardfilt']/*)=3" ); } @@ -162,6 +167,41 @@ public class TermsComponentTest extends SolrTestCaseJ4 { } + @Test + public void testTermsList() throws Exception { + //Terms list always returns in index order + assertQ(req("indent","true", "qt","/terms", "terms","true", + "terms.fl","standardfilt", + "terms.list","spider, snake, shark, ddddd, bad") + ,"count(//lst[@name='standardfilt']/*)=4" + ,"//lst[@name='standardfilt']/int[1][@name='ddddd'][.='4']" + ,"//lst[@name='standardfilt']/int[2][@name='shark'][.='2']" + ,"//lst[@name='standardfilt']/int[3][@name='snake'][.='3']" + ,"//lst[@name='standardfilt']/int[4][@name='spider'][.='1']" + ); + + + //Test with numeric terms + assertQ(req("indent","true", "qt","/terms", "terms","true", + "terms.fl","foo_i", + "terms.list","2, 1") + ,"count(//lst[@name='foo_i']/*)=2" + ,"//lst[@name='foo_i']/int[1][@name='1'][.='2']" + ,"//lst[@name='foo_i']/int[2][@name='2'][.='1']" + ); + } + + + @Test + public void testStats() throws Exception { + //Terms list always returns in index order + assertQ(req("indent", "true", "qt", "/terms", "terms", "true", + "terms.fl", "standardfilt","terms.stats", "true", + "terms.list", "spider, snake, shark, ddddd, bad") + , "//lst[@name='indexstats']/long[1][@name='numDocs'][.='23']" + ); + } + @Test public void testSortIndex() throws Exception { assertQ(req("indent","true", "qt","/terms", "terms","true", diff --git a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java index 451bc5f92fa..2b6848b7c02 100644 --- a/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java +++ b/solr/core/src/test/org/apache/solr/search/TestPseudoReturnFields.java @@ -16,20 +16,24 @@ */ package org.apache.solr.search; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.schema.SchemaField; - -import org.apache.commons.lang.StringUtils; - -import org.junit.BeforeClass; -import org.junit.Test; - import java.util.List; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Random; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.cloud.TestCloudPseudoReturnFields; +import org.apache.solr.schema.SchemaField; + +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.ModifiableSolrParams; + +import org.apache.commons.lang.StringUtils; + +import org.junit.BeforeClass; + + +/** @see TestCloudPseudoReturnFields */ public class TestPseudoReturnFields extends SolrTestCaseJ4 { // :TODO: datatypes produced by the functions used may change @@ -37,20 +41,18 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { /** * values of the fl param that mean all real fields */ - private static String[] ALL_REAL_FIELDS = new String[] { "", "*" }; + public static String[] ALL_REAL_FIELDS = new String[] { "", "*" }; /** * values of the fl param that mean all real fields and score */ - private static String[] SCORE_AND_REAL_FIELDS = new String[] { + public static String[] SCORE_AND_REAL_FIELDS = new String[] { "score,*", "*,score" }; @BeforeClass public static void beforeTests() throws Exception { - System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_ - initCore("solrconfig.xml","schema12.xml"); - + initCore("solrconfig-tlog.xml","schema-psuedo-fields.xml"); assertU(adoc("id", "42", "val_i", "1", "ssto", "X", "subject", "aaa")); assertU(adoc("id", "43", "val_i", "9", "ssto", "X", "subject", "bbb")); @@ -58,9 +60,12 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { assertU(adoc("id", "45", "val_i", "6", "ssto", "X", "subject", "aaa")); assertU(adoc("id", "46", "val_i", "3", "ssto", "X", "subject", "ggg")); assertU(commit()); + + // uncommitted doc in transaction log + assertU(adoc("id", "99", "val_i", "1", "ssto", "X", "subject", "uncommitted")); + } - @Test public void testMultiValued() throws Exception { // the response writers used to consult isMultiValued on the field // but this doesn't work when you alias a single valued field to @@ -88,17 +93,23 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { assertJQ(req("qt","/get", "id","42", "fl","val_ss:val_i, val2_ss:10") ,"/doc=={'val2_ss':10,'val_ss':1}" ); + } + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9285") + public void testMultiValuedRTG() throws Exception { + + // single value int using alias that matches multivalued dynamic field - via RTG + assertJQ(req("qt","/get", "id","42", "fl","val_ss:val_i, val2_ss:10, subject") + ,"/doc=={'val2_ss':10,'val_ss':1, 'subject':'aaa'}" + ); + // also check real-time-get from transaction log - assertU(adoc("id", "42", "val_i", "1", "ssto", "X", "subject", "aaa")); - - assertJQ(req("qt","/get", "id","42", "fl","val_ss:val_i, val2_ss:10") - ,"/doc=={'val2_ss':10,'val_ss':1}" + assertJQ(req("qt","/get", "id","99", "fl","val_ss:val_i, val2_ss:10, subject") + ,"/doc=={'val2_ss':10,'val_ss':1,'subject':'uncommitted'}" ); } - @Test public void testAllRealFields() throws Exception { for (String fl : ALL_REAL_FIELDS) { @@ -114,8 +125,26 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ); } } + + public void testAllRealFieldsRTG() throws Exception { + // shouldn't matter if we use RTG (committed or otherwise) + for (String fl : ALL_REAL_FIELDS) { + for (String id : Arrays.asList("42","99")) { + assertQ("id="+id+", fl="+fl+" ... all real fields", + req("qt","/get","id",id, "wt","xml","fl",fl) + ,"count(//doc)=1" + ,"//doc/str[@name='id']" + ,"//doc/int[@name='val_i']" + ,"//doc/str[@name='ssto']" + ,"//doc/str[@name='subject']" + ,"//doc[count(*)=4]" + ); + } + } + + + } - @Test public void testScoreAndAllRealFields() throws Exception { for (String fl : SCORE_AND_REAL_FIELDS) { @@ -132,10 +161,28 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ); } } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testScoreAndAllRealFieldsRTG() throws Exception { + + // if we use RTG (committed or otherwise) score should be ignored + for (String fl : SCORE_AND_REAL_FIELDS) { + for (String id : Arrays.asList("42","99")) { + assertQ("id="+id+", fl="+fl+" ... score real fields", + req("qt","/get","id",id, "wt","xml","fl",fl) + ,"count(//doc)=1" + ,"//doc/str[@name='id']" + ,"//doc/int[@name='val_i']" + ,"//doc/str[@name='ssto']" + ,"//doc/str[@name='subject']" + ,"//doc[count(*)=4]" + ); + } + } + } - @Test public void testScoreAndExplicitRealFields() throws Exception { - + assertQ("fl=score,val_i", req("q","*:*", "rows", "1", "fl","score,val_i") ,"//result[@numFound='5']" @@ -152,7 +199,7 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ,"//result/doc[count(*)=2]" ); - + assertQ("fl=val_i", req("q","*:*", "rows", "1", "fl","val_i") ,"//result[@numFound='5']" @@ -162,7 +209,19 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ); } - @Test + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testScoreAndExplicitRealFieldsRTG() throws Exception { + // if we use RTG (committed or otherwise) score should be ignored + for (String id : Arrays.asList("42","99")) { + assertQ("id="+id+", fl=score,val_i", + req("qt","/get","id",id, "wt","xml", "fl","score,val_i") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i']" + ,"//doc[count(*)=1]" + ); + } + } + public void testFunctions() throws Exception { assertQ("fl=log(val_i)", req("q","*:*", "rows", "1", "fl","log(val_i)") @@ -189,8 +248,26 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ,"//result/doc[count(*)=2]" ); } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9285") + public void testFunctionsRTG() throws Exception { + // if we use RTG (committed or otherwise) functions should behave the same + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("qt","/get","id",id,"wt","xml", + "fl","log(val_i),abs(val_i)"), + params("qt","/get","id",id,"wt","xml", + "fl","log(val_i)","fl", "abs(val_i)"))) { + assertQ("id="+id+", params="+p, req(p) + ,"count(//doc)=1" + // true for both these specific docs + ,"//doc/double[@name='log(val_i)'][.='0.0']" + ,"//doc/float[@name='abs(val_i)'][.='1.0']" + ,"//doc[count(*)=2]" + ); + } + } + } - @Test public void testFunctionsAndExplicit() throws Exception { assertQ("fl=log(val_i),val_i", req("q","*:*", "rows", "1", "fl","log(val_i),val_i") @@ -211,7 +288,24 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ); } - @Test + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9285") + public void testFunctionsAndExplicitRTG() throws Exception { + // shouldn't matter if we use RTG (committed or otherwise) + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("fl","log(val_i),val_i"), + params("fl","log(val_i)","fl","val_i"))) { + assertQ(id + " " + p, + req(p, "qt","/get", "wt","xml","id",id) + ,"count(//doc)=1" + // true for both these specific docs + ,"//doc/double[@name='log(val_i)'][.='0.0']" + ,"//doc/int[@name='val_i'][.='1']" + ,"//doc[count(*)=2]" + ); + } + } + } + public void testFunctionsAndScore() throws Exception { assertQ("fl=log(val_i),score", @@ -253,8 +347,27 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ); } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testFunctionsAndScoreRTG() throws Exception { + + // if we use RTG (committed or otherwise) score should be ignored + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(params("fl","score","fl","log(val_i)","fl","abs(val_i)"), + params("fl","score","fl","log(val_i),abs(val_i)"), + params("fl","score,log(val_i)","fl","abs(val_i)"), + params("fl","score,log(val_i),abs(val_i)"))) { + assertQ("id="+id+", p="+p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/double[@name='log(val_i)']" + ,"//doc/float[@name='abs(val_i)']" + ,"//doc[count(*)=2]" + ); + } + } + } - @Test public void testGlobs() throws Exception { assertQ("fl=val_*", req("q","*:*", "rows", "1", "fl","val_*") @@ -263,26 +376,45 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ,"//result/doc[count(*)=1]" ); + for (SolrParams p : Arrays.asList(params("q", "*:*", "rows", "1", "fl","val_*,subj*,ss*"), + params("q", "*:*", "rows", "1", "fl","val_*","fl","subj*,ss*"), + params("q", "*:*", "rows", "1", "fl","val_*","fl","subj*","fl","ss*"))) { - assertQ("fl=val_*,subj*", - req("q","*:*", "rows", "1", "fl","val_*,subj*") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" + assertQ(p.toString(), + req(p) + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/str[@name='ssto'][.='X']" - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=val_*&fl=subj*", - req("q","*:*", "rows", "1", "fl","val_*","fl","subj*") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=2]" - ); + ,"//result/doc[count(*)=3]" + ); + } + } + + public void testGlobsRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + assertQ(id + ": fl=val_*", + req("qt","/get","id",id, "wt","xml", "fl","val_*") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i'][.=1]" + ,"//doc[count(*)=1]" + ); + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,ss*"), + params("fl","val_*","fl","subj*,ss*"))) { + assertQ(id + ": " + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i'][.=1]" + ,"//doc/str[@name='subject']" // value differs between docs + ,"//doc/str[@name='ssto'][.='X']" + ,"//doc[count(*)=3]" + ); + } + } } - @Test public void testGlobsAndExplicit() throws Exception { assertQ("fl=val_*,id", req("q","*:*", "rows", "1", "fl","val_*,id") @@ -293,27 +425,49 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ,"//result/doc[count(*)=2]" ); - assertQ("fl=val_*,subj*,id", - req("q","*:*", "rows", "1", "fl","val_*,subj*,id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=val_*&fl=subj*&fl=id", - req("q","*:*", "rows", "1", "fl","val_*","fl","subj*","fl","id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,id"), + params("fl","val_*","fl","subj*","fl","id"), + params("fl","val_*","fl","subj*,id"))) { + assertQ("" + p, + req(p, "q","*:*", "rows", "1") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/str[@name='id']" + + ,"//result/doc[count(*)=3]" + ); + } + } + + public void testGlobsAndExplicitRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + assertQ(id + " + fl=val_*,id", + req("qt","/get","id",id, "wt","xml", "fl","val_*,id") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i'][.=1]" + ,"//doc/str[@name='id']" + + ,"//doc[count(*)=2]" + ); + + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,id"), + params("fl","val_*","fl","subj*","fl","id"), + params("fl","val_*","fl","subj*,id"))) { + assertQ(id + " + " + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i'][.=1]" + ,"//doc/str[@name='subject']" + ,"//doc/str[@name='id']" + + ,"//doc[count(*)=3]" + ); + } + } } - @Test public void testGlobsAndScore() throws Exception { assertQ("fl=val_*,score", req("q","*:*", "rows", "1", "fl","val_*,score", "indent", "true") @@ -323,127 +477,205 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { ,"//result/doc[count(*)=2]" ); - - assertQ("fl=val_*,subj*,score", - req("q","*:*", "rows", "1", "fl","val_*,subj*,score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=val_*&fl=subj*&fl=score", - req("q","*:*", "rows", "1", - "fl","val_*","fl","subj*","fl","score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/str[@name='subject']" - - ,"//result/doc[count(*)=3]" - ); - - + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,score"), + params("fl","val_*","fl","subj*","fl","score"), + params("fl","val_*","fl","subj*,score"))) { + assertQ("" + p, + req(p, "q","*:*", "rows", "1") + ,"//result[@numFound='5']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/str[@name='subject']" + + ,"//result/doc[count(*)=3]" + ); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testGlobsAndScoreRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted, score should be ignored + for (String id : Arrays.asList("42","99")) { + assertQ(id + ": fl=val_*,score", + req("qt","/get","id",id, "wt","xml", "fl","val_*,score") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i']" + ,"//doc[count(*)=1]" + ); + for (SolrParams p : Arrays.asList(params("fl","val_*,subj*,score"), + params("fl","val_*","fl","subj*","fl","score"), + params("fl","val_*","fl","subj*,score"))) { + assertQ("" + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/int[@name='val_i']" + ,"//doc/str[@name='subject']" + ,"//doc[count(*)=2]" + ); + } + } } - @Test public void testAugmenters() throws Exception { assertQ("fl=[docid]", req("q","*:*", "rows", "1", "fl","[docid]") ,"//result[@numFound='5']" ,"//result/doc/int[@name='[docid]']" - ,"//result/doc[count(*)=1]" ); - - assertQ("fl=[docid],[explain]", - req("q","*:*", "rows", "1", "fl","[docid],[explain]") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=2]" - ); - assertQ("fl=[docid]&fl=[explain]", - req("q","*:*", "rows", "1", "fl","[docid]","fl","[explain]") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=2]" - ); + for (SolrParams p : Arrays.asList(params("fl","[docid],[shard],[explain],x_alias:[value v=10 t=int]"), + params("fl","[docid],[shard]","fl","[explain],x_alias:[value v=10 t=int]"), + params("fl","[docid]","fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + assertQ("" + p, + req(p, "q","*:*", "rows", "1") + ,"//result[@numFound='5']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[shard]'][.='[not a shard request]']" + ,"//result/doc/str[@name='[explain]']" + ,"//result/doc/int[@name='x_alias'][.=10]" + + ,"//result/doc[count(*)=4]" + ); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9288") + public void testDocIdAugmenterRTG() throws Exception { + // NOTE: once this test is fixed to pass, testAugmentersRTG should also be updated to test [docid] + + // TODO: behavior of fl=[docid] should be consistent regardless of wether doc is committed + // what should behavior be? + // right now, for an uncommited doc, [docid] is silently ignored and no value included in result + // perhaps it should be "null" or "-1" ? + + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + assertQ(id + ": fl=[docid]", + req("qt","/get","id",id, "wt","xml", "fl","[docid]") + ,"count(//doc)=1" + ,"//doc/int[@name='[docid]']" + ,"//doc[count(*)=1]" + ); + } + } + + public void testAugmentersRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + for (SolrParams p : Arrays.asList(params("fl","[shard],[explain],x_alias:[value v=10 t=int]"), + params("fl","[shard]","fl","[explain],x_alias:[value v=10 t=int]"), + params("fl","[shard]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + assertQ(id + ": " + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + // ,"//doc/int[@name='[docid]']" // TODO + ,"//doc/str[@name='[shard]'][.='[not a shard request]']" + // RTG: [explain] should be missing (ignored) + ,"//doc/int[@name='x_alias'][.=10]" + + ,"//doc[count(*)=2]" + ); + } + } } - @Test public void testAugmentersAndExplicit() throws Exception { - assertQ("fl=[docid],id", - req("q","*:*", "rows", "1", - "fl","[docid],id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='id']" + for (SolrParams p : Arrays.asList(params("fl","id,[docid],[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[docid],[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[docid]","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + assertQ(p.toString(), + req(p, "q","*:*", "rows", "1") + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + ,"//result/doc/int[@name='x_alias'][.=10]" - ,"//result/doc[count(*)=2]" - ); - - assertQ("fl=[docid],[explain],id", - req("q","*:*", "rows", "1", - "fl","[docid],[explain],id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=[docid]&fl=[explain]&fl=id", - req("q","*:*", "rows", "1", - "fl","[docid]","fl","[explain]","fl","id") - ,"//result[@numFound='5']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - ,"//result/doc/str[@name='id']" - - ,"//result/doc[count(*)=3]" - ); + ,"//result/doc[count(*)=4]" + ); + } + } + + public void testAugmentersAndExplicitRTG() throws Exception { + // behavior shouldn't matter if we are committed or uncommitted + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + for (SolrParams p : Arrays.asList(params("fl","id,[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[explain],x_alias:[value v=10 t=int]"), + params("fl","id","fl","[explain]","fl","x_alias:[value v=10 t=int]"))) { + assertQ(id + ": " + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/str[@name='id']" + // ,"//doc/int[@name='[docid]']" // TODO + // RTG: [explain] should be missing (ignored) + ,"//doc/int[@name='x_alias'][.=10]" + + ,"//doc[count(*)=2]" + ); + } + } } - @Test public void testAugmentersAndScore() throws Exception { - assertQ("fl=[docid],score", - req("q","*:*", "rows", "1", - "fl","[docid],score") + assertQ(req("q","*:*", "rows", "1", + "fl","[docid],x_alias:[value v=10 t=int],score") ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/int[@name='x_alias'][.=10]" + ,"//result/doc/float[@name='score']" - ,"//result/doc[count(*)=2]" + ,"//result/doc[count(*)=3]" ); + for (SolrParams p : Arrays.asList(params("fl","[docid],x_alias:[value v=10 t=int],[explain],score"), + params("fl","[docid]","fl","x_alias:[value v=10 t=int],[explain]","fl","score"), + params("fl","[docid]","fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) { - assertQ("fl=[docid],[explain],score", - req("q","*:*", "rows", "1", - "fl","[docid],[explain],score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=3]" - ); - assertQ("fl=[docid]&fl=[explain]&fl=score", - req("q","*:*", "rows", "1", - "fl","[docid]","fl","[explain]","fl","score") - ,"//result[@numFound='5']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=3]" - ); + assertQ(p.toString(), + req(p, "q","*:*", "rows", "1") + ,"//result[@numFound='5']" + + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/int[@name='x_alias'][.=10]" + ,"//result/doc/str[@name='[explain]']" + ,"//result/doc/float[@name='score']" + + ,"//result/doc[count(*)=4]" + ); + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testAugmentersAndScoreRTG() throws Exception { + // if we use RTG (committed or otherwise) score should be ignored + for (String id : Arrays.asList("42","99")) { + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + assertQ(id, + req("qt","/get","id",id, "wt","xml", + "fl","x_alias:[value v=10 t=int],score") + // ,"//doc/int[@name='[docid]']" // TODO + ,"//doc/int[@name='x_alias'][.=10]" + + ,"//doc[count(*)=1]" + ); + for (SolrParams p : Arrays.asList(params("fl","x_alias:[value v=10 t=int],[explain],score"), + params("fl","x_alias:[value v=10 t=int],[explain]","fl","score"), + params("fl","x_alias:[value v=10 t=int]","fl","[explain]","fl","score"))) { + + assertQ(p.toString(), + req(p, "qt","/get","id",id, "wt","xml") + + // ,"//doc/int[@name='[docid]']" // TODO + ,"//doc/int[@name='x_alias'][.=10]" + // RTG: [explain] and score should be missing (ignored) + + ,"//doc[count(*)=1]" + ); + } + } } - @Test public void testAugmentersGlobsExplicitAndScoreOhMy() throws Exception { Random random = random(); @@ -456,42 +688,63 @@ public class TestPseudoReturnFields extends SolrTestCaseJ4 { Collections.shuffle(fl, random); - final String singleFl = StringUtils.join(fl.toArray(),','); - assertQ("fl=" + singleFl, - req("q","*:*", "rows", "1","fl",singleFl) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=6]" - ); - - final List params = new ArrayList<>((fl.size()*2) + 4); - final StringBuilder info = new StringBuilder(); - params.addAll(Arrays.asList("q","*:*", "rows", "1")); + final SolrParams singleFl = params("q","*:*", "rows", "1","fl",StringUtils.join(fl.toArray(),',')); + final ModifiableSolrParams multiFl = params("q","*:*", "rows", "1"); for (String item : fl) { - params.add("fl"); - params.add(item); - info.append("&fl=").append(item); + multiFl.add("fl",item); } - - assertQ(info.toString(), - req((String[])params.toArray(new String[0])) - ,"//result[@numFound='5']" - ,"//result/doc/str[@name='id']" - ,"//result/doc/float[@name='score']" - ,"//result/doc/str[@name='subject']" - ,"//result/doc/int[@name='val_i']" - ,"//result/doc/int[@name='[docid]']" - ,"//result/doc/str[@name='[explain]']" - - ,"//result/doc[count(*)=6]" - ); + for (SolrParams p : Arrays.asList(singleFl, multiFl)) { + assertQ(p.toString(), + req(p) + ,"//result[@numFound='5']" + ,"//result/doc/str[@name='id']" + ,"//result/doc/float[@name='score']" + ,"//result/doc/str[@name='subject']" + ,"//result/doc/int[@name='val_i']" + ,"//result/doc/int[@name='[docid]']" + ,"//result/doc/str[@name='[explain]']" + + ,"//result/doc[count(*)=6]" + ); + } + } + } + + @AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-9287") + public void testAugmentersGlobsExplicitAndScoreOhMyRTG() throws Exception { + Random random = random(); + // NOTE: 'ssto' is the missing one + final List fl = Arrays.asList + // NOTE: once testDocIdAugmenterRTG can pass, [docid] should be tested here as well. + ("id","[explain]","score","val_*","subj*"); + + final int iters = atLeast(random, 10); + for (int i = 0; i< iters; i++) { + + Collections.shuffle(fl, random); + + final SolrParams singleFl = params("fl",StringUtils.join(fl.toArray(),',')); + final ModifiableSolrParams multiFl = params(); + for (String item : fl) { + multiFl.add("fl",item); + } + + // RTG behavior should be consistent, (committed or otherwise) + for (String id : Arrays.asList("42","99")) { + for (SolrParams p : Arrays.asList(singleFl, multiFl)) { + assertQ(id + ": " + p, + req(p, "qt","/get","id",id, "wt","xml") + ,"count(//doc)=1" + ,"//doc/str[@name='id']" + // ,"//doc/int[@name='[docid]']" // TODO + // RTG: [explain] and score should be missing (ignored) + ,"//doc/int[@name='val_i'][.=1]" + ,"//doc/str[@name='subject']" + ,"//result/doc[count(*)=3]" + ); + } + } } } } diff --git a/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java b/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java index b471a754a5a..d2244b1272c 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java +++ b/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java @@ -16,6 +16,8 @@ */ package org.apache.solr.search; +import org.apache.lucene.util.TestUtil; + import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.request.SolrQueryRequest; @@ -44,14 +46,12 @@ public class TestRangeQuery extends SolrTestCaseJ4 { assertU(commit()); } - Random r = new Random(1); - void addInt(SolrInputDocument doc, int l, int u, String... fields) { int v=0; if (0==l && l==u) { - v=r.nextInt(); + v=random().nextInt(); } else { - v=r.nextInt(u-l)+l; + v=random().nextInt(u-l)+l; } for (String field : fields) { doc.addField(field, v); @@ -193,43 +193,74 @@ public class TestRangeQuery extends SolrTestCaseJ4 { assertQ(req("{!frange incl=false incu=false" + " l=" +v[3] +" u="+v[4]+"}"+f ), "*[count(//doc)=3]"); } + // now pick a random range to use to delete (some of) the docs... + + final boolean incl = random().nextBoolean(); + final boolean incu = random().nextBoolean(); + final int expected = 0 + (incl ? 0 : 1) + (incu ? 0 : 1); + String dbq = null; + if (random().nextBoolean()) { // regular range + String field = randomKey(norm_fields); + String[] values = norm_fields.get(field); + dbq = field + ":" + (incl ? "[" : "{") + values[0] + " TO " + values[2] + (incu ? "]" : "}"); + } else { // frange + String field = randomKey(frange_fields); + String[] values = frange_fields.get(field); + dbq = "{!frange incl=" + incl + " incu=" + incu + " l=" + values[0] + " u=" + values[2] + "}" + field; + } + if (random().nextBoolean()) { + // wrap in a BQ + String field = randomKey(norm_fields); + String value = norm_fields.get(field)[1]; + // wraping shouldn't affect expected + dbq = "("+field+":\""+value+"\" OR " + dbq + ")"; + } + + assertU(delQ(dbq)); + assertU(commit()); + assertQ(req("q","*:*","_trace_dbq",dbq), + "*[count(//doc)=" + expected + "]"); + } @Test public void testRandomRangeQueries() throws Exception { String handler=""; - final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d" - ,"foo_ti","foo_tl","foo_tf","foo_td" - }; - final int l=-5; - final int u=25; - - - createIndex(15, new DocProcessor() { - @Override - public void process(SolrInputDocument doc) { - // 10% of the docs have missing values - if (r.nextInt(10)!=0) addInt(doc, l,u, fields); - } - }); - assertU(commit()); + final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d", + "foo_ti","foo_tl","foo_tf","foo_td" }; + // NOTE: foo_s supports ranges, but for the arrays below we are only + // interested in fields that support *equivilent* ranges -- strings + // are not ordered the same as ints/longs, so we can't test the ranges + // for equivilence across diff fields. + // // fields that a normal range query will work correctly on - String[] norm_fields = { - "foo_i","foo_l","foo_f","foo_d" - ,"foo_ti","foo_tl","foo_tf","foo_td" - - }; - + String[] norm_fields = {"foo_i","foo_l","foo_f","foo_d", + "foo_ti","foo_tl","foo_tf","foo_td" }; // fields that a value source range query should work on String[] frange_fields = {"foo_i","foo_l","foo_f","foo_d"}; - for (int i=0; i<1000; i++) { - int lower = l + r.nextInt(u-l+10)-5; - int upper = lower + r.nextInt(u+5-lower); - boolean lowerMissing = r.nextInt(10)==1; - boolean upperMissing = r.nextInt(10)==1; - boolean inclusive = lowerMissing || upperMissing || r.nextBoolean(); + final int l= -1 * atLeast(50); + final int u= atLeast(250); + + // sometimes a very small index, sometimes a very large index + final int numDocs = random().nextBoolean() ? random().nextInt(50) : atLeast(1000); + createIndex(numDocs, new DocProcessor() { + @Override + public void process(SolrInputDocument doc) { + // 10% of the docs have missing values + if (random().nextInt(10)!=0) addInt(doc, l,u, fields); + } + }); + assertU(commit()); + + final int numIters = atLeast(1000); + for (int i=0; i < numIters; i++) { + int lower = TestUtil.nextInt(random(), 2 * l, u); + int upper = TestUtil.nextInt(random(), lower, 2 * u); + boolean lowerMissing = random().nextInt(10)==1; + boolean upperMissing = random().nextInt(10)==1; + boolean inclusive = lowerMissing || upperMissing || random().nextBoolean(); // lower=2; upper=2; inclusive=true; // inclusive=true; lowerMissing=true; upperMissing=true; @@ -252,33 +283,82 @@ public class TestRangeQuery extends SolrTestCaseJ4 { + "}"; qs.add(q); } - + String lastQ = null; SolrQueryResponse last=null; for (String q : qs) { // System.out.println("QUERY="+q); - SolrQueryRequest req = req("q",q,"rows","1000"); + SolrQueryRequest req = req("q",q,"rows",""+numDocs); SolrQueryResponse qr = h.queryAndResponse(handler, req); if (last != null) { // we only test if the same docs matched since some queries will include factors like idf, etc. DocList rA = ((ResultContext)qr.getResponse()).getDocList(); DocList rB = ((ResultContext)last.getResponse()).getDocList(); - sameDocs( rA, rB ); + sameDocs(q + " vs " + lastQ, rA, rB ); } req.close(); last = qr; + lastQ = q; + } + } + + // now build some random queries (against *any* field) and validate that using it in a DBQ changes + // the index by the expected number of docs + int numDocsLeftInIndex = numDocs; + final int numDBQs= atLeast(10); + for (int i=0; i < numDBQs; i++) { + int lower = TestUtil.nextInt(random(), 2 * l, u); + int upper = TestUtil.nextInt(random(), lower, 2 * u); + boolean lowerMissing = random().nextInt(10)==1; + boolean upperMissing = random().nextInt(10)==1; + boolean inclusive = lowerMissing || upperMissing || random().nextBoolean(); + + String dbq = null; + if (random().nextBoolean()) { // regular range + String field = fields[random().nextInt(fields.length)]; + dbq = field + ':' + (inclusive?'[':'{') + + (lowerMissing?"*":lower) + + " TO " + + (upperMissing?"*":upper) + + (inclusive?']':'}'); + } else { // frange + String field = frange_fields[random().nextInt(frange_fields.length)]; + dbq = "{!frange v="+field + + (lowerMissing?"":(" l="+lower)) + + (upperMissing?"":(" u="+upper)) + + (inclusive?"":" incl=false") + + (inclusive?"":" incu=false") + + "}"; + } + try (SolrQueryRequest req = req("q",dbq,"rows","0")) { + SolrQueryResponse qr = h.queryAndResponse(handler, req); + numDocsLeftInIndex -= ((ResultContext)qr.getResponse()).getDocList().matches(); + } + assertU(delQ(dbq)); + assertU(commit()); + try (SolrQueryRequest req = req("q","*:*","rows","0","_trace_after_dbq",dbq)) { + SolrQueryResponse qr = h.queryAndResponse(handler, req); + final int allDocsFound = ((ResultContext)qr.getResponse()).getDocList().matches(); + assertEquals(dbq, numDocsLeftInIndex, allDocsFound); } } } - static boolean sameDocs(DocSet a, DocSet b) { + static boolean sameDocs(String msg, DocSet a, DocSet b) { DocIterator i = a.iterator(); // System.out.println("SIZES="+a.size() + "," + b.size()); - assertEquals(a.size(), b.size()); + assertEquals(msg, a.size(), b.size()); while (i.hasNext()) { int doc = i.nextDoc(); - assertTrue(b.exists(doc)); + assertTrue(msg, b.exists(doc)); // System.out.println("MATCH! " + doc); } return true; } + + private static ,Y> X randomKey(Map map) { + assert ! map.isEmpty(); + List sortedKeys = new ArrayList<>(map.keySet()); + Collections.sort(sortedKeys); + return sortedKeys.get(random().nextInt(sortedKeys.size())); + } } diff --git a/solr/core/src/test/org/apache/solr/security/BasicAuthIntegrationTest.java b/solr/core/src/test/org/apache/solr/security/BasicAuthIntegrationTest.java index 89e41402836..5cb322fc0f2 100644 --- a/solr/core/src/test/org/apache/solr/security/BasicAuthIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/security/BasicAuthIntegrationTest.java @@ -86,135 +86,138 @@ public class BasicAuthIntegrationTest extends TestMiniSolrCloudClusterBase { cloudSolrClient.setDefaultCollection(null); NamedList rsp; - HttpClient cl = HttpClientUtil.createClient(null); - String baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); - verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20); - zkClient.setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true); - verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20); - - boolean found = false; - for (JettySolrRunner jettySolrRunner : miniCluster.getJettySolrRunners()) { - if(baseUrl.contains(String.valueOf(jettySolrRunner.getLocalPort()))){ - found = true; - jettySolrRunner.stop(); - jettySolrRunner.start(); - verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20); - break; - } - } - - assertTrue("No server found to restart , looking for : "+baseUrl , found); - - String command = "{\n" + - "'set-user': {'harry':'HarryIsCool'}\n" + - "}"; - - GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams()); - genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), ""))); + HttpClient cl = null; try { - cloudSolrClient.request(genericReq); - fail("Should have failed with a 401"); - } catch (HttpSolrClient.RemoteSolrException e) { - } - command = "{\n" + - "'set-user': {'harry':'HarryIsUberCool'}\n" + - "}"; + cl = HttpClientUtil.createClient(null); + String baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); + verifySecurityStatus(cl, baseUrl + authcPrefix, "/errorMessages", null, 20); + zkClient.setData("/security.json", STD_CONF.replaceAll("'", "\"").getBytes(UTF_8), true); + verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20); - HttpPost httpPost = new HttpPost(baseUrl + authcPrefix); - setBasicAuthHeader(httpPost, "solr", "SolrRocks"); - httpPost.setEntity(new ByteArrayEntity(command.getBytes(UTF_8))); - httpPost.addHeader("Content-Type", "application/json; charset=UTF-8"); - verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication.enabled", "true", 20); - HttpResponse r = cl.execute(httpPost); - int statusCode = r.getStatusLine().getStatusCode(); - Utils.consumeFully(r.getEntity()); - assertEquals("proper_cred sent, but access denied", 200, statusCode); - baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); + boolean found = false; + for (JettySolrRunner jettySolrRunner : miniCluster.getJettySolrRunners()) { + if(baseUrl.contains(String.valueOf(jettySolrRunner.getLocalPort()))){ + found = true; + jettySolrRunner.stop(); + jettySolrRunner.start(); + verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/class", "solr.BasicAuthPlugin", 20); + break; + } + } - verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20); - command = "{\n" + - "'set-user-role': {'harry':'admin'}\n" + - "}"; + assertTrue("No server found to restart , looking for : "+baseUrl , found); - executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks"); + String command = "{\n" + + "'set-user': {'harry':'HarryIsCool'}\n" + + "}"; - baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); - verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20); - - executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap - ("collection", "x", - "path", "/update/*", - "role", "dev"))), "harry", "HarryIsUberCool" ); - - verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[1]/collection", "x", 20); - - executeCommand(baseUrl + authzPrefix, cl,Utils.toJSONString(singletonMap("set-permission", Utils.makeMap - ("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" ); - verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20); - - CollectionAdminRequest.Reload reload = new CollectionAdminRequest.Reload(); - reload.setCollectionName(defaultCollName); - - try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) { + GenericSolrRequest genericReq = new GenericSolrRequest(SolrRequest.METHOD.POST, authcPrefix, new ModifiableSolrParams()); + genericReq.setContentStreams(Collections.singletonList(new ContentStreamBase.ByteArrayStream(command.getBytes(UTF_8), ""))); try { - rsp = solrClient.request(reload); - fail("must have failed"); + cloudSolrClient.request(genericReq); + fail("Should have failed with a 401"); + } catch (HttpSolrClient.RemoteSolrException e) { + } + command = "{\n" + + "'set-user': {'harry':'HarryIsUberCool'}\n" + + "}"; + + HttpPost httpPost = new HttpPost(baseUrl + authcPrefix); + setBasicAuthHeader(httpPost, "solr", "SolrRocks"); + httpPost.setEntity(new ByteArrayEntity(command.getBytes(UTF_8))); + httpPost.addHeader("Content-Type", "application/json; charset=UTF-8"); + verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication.enabled", "true", 20); + HttpResponse r = cl.execute(httpPost); + int statusCode = r.getStatusLine().getStatusCode(); + Utils.consumeFully(r.getEntity()); + assertEquals("proper_cred sent, but access denied", 200, statusCode); + baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); + + verifySecurityStatus(cl, baseUrl + authcPrefix, "authentication/credentials/harry", NOT_NULL_PREDICATE, 20); + command = "{\n" + + "'set-user-role': {'harry':'admin'}\n" + + "}"; + + executeCommand(baseUrl + authzPrefix, cl,command, "solr", "SolrRocks"); + + baseUrl = getRandomReplica(zkStateReader.getClusterState().getCollection(defaultCollName), random()).getStr(BASE_URL_PROP); + verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/user-role/harry", NOT_NULL_PREDICATE, 20); + + executeCommand(baseUrl + authzPrefix, cl, Utils.toJSONString(singletonMap("set-permission", Utils.makeMap + ("collection", "x", + "path", "/update/*", + "role", "dev"))), "harry", "HarryIsUberCool" ); + + verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[1]/collection", "x", 20); + + executeCommand(baseUrl + authzPrefix, cl,Utils.toJSONString(singletonMap("set-permission", Utils.makeMap + ("name", "collection-admin-edit", "role", "admin"))), "harry", "HarryIsUberCool" ); + verifySecurityStatus(cl, baseUrl + authzPrefix, "authorization/permissions[2]/name", "collection-admin-edit", 20); + + CollectionAdminRequest.Reload reload = CollectionAdminRequest.reloadCollection(defaultCollName); + + try (HttpSolrClient solrClient = getHttpSolrClient(baseUrl)) { + try { + rsp = solrClient.request(reload); + fail("must have failed"); + } catch (HttpSolrClient.RemoteSolrException e) { + + } + reload.setMethod(SolrRequest.METHOD.POST); + try { + rsp = solrClient.request(reload); + fail("must have failed"); + } catch (HttpSolrClient.RemoteSolrException e) { + + } + } + cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName) + .setBasicAuthCredentials("harry", "HarryIsUberCool")); + + try { + cloudSolrClient.request(CollectionAdminRequest.reloadCollection(defaultCollName) + .setBasicAuthCredentials("harry", "Cool12345")); + fail("This should not succeed"); } catch (HttpSolrClient.RemoteSolrException e) { } - reload.setMethod(SolrRequest.METHOD.POST); - try { - rsp = solrClient.request(reload); - fail("must have failed"); - } catch (HttpSolrClient.RemoteSolrException e) { + cloudSolrClient.setDefaultCollection(old); + executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool"); + + SolrInputDocument doc = new SolrInputDocument(); + doc.setField("id","4"); + UpdateRequest update = new UpdateRequest(); + update.setBasicAuthCredentials("harry","HarryIsUberCool"); + update.add(doc); + update.setCommitWithin(100); + cloudSolrClient.request(update); + + + executeCommand(baseUrl + authzPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool"); + String[] toolArgs = new String[]{ + "status", "-solr", baseUrl}; + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + PrintStream stdoutSim = new PrintStream(baos, true, StandardCharsets.UTF_8.name()); + SolrCLI.StatusTool tool = new SolrCLI.StatusTool(stdoutSim); + try { + System.setProperty("basicauth", "harry:HarryIsUberCool"); + tool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(tool.getOptions()), toolArgs)); + Map obj = (Map) Utils.fromJSON(new ByteArrayInputStream(baos.toByteArray())); + assertTrue(obj.containsKey("version")); + assertTrue(obj.containsKey("startTime")); + assertTrue(obj.containsKey("uptime")); + assertTrue(obj.containsKey("memory")); + } catch (Exception e) { + log.error("RunExampleTool failed due to: " + e + + "; stdout from tool prior to failure: " + baos.toString(StandardCharsets.UTF_8.name())); + } + executeCommand(baseUrl + authzPrefix, cl, "{set-property : { blockUnknown: false}}", "harry", "HarryIsUberCool"); + } finally { + if (cl != null) { + HttpClientUtil.close(cl); } } - cloudSolrClient.request(new CollectionAdminRequest.Reload() - .setCollectionName(defaultCollName) - .setBasicAuthCredentials("harry", "HarryIsUberCool")); - - try { - cloudSolrClient.request(new CollectionAdminRequest.Reload() - .setCollectionName(defaultCollName) - .setBasicAuthCredentials("harry", "Cool12345")); - fail("This should not succeed"); - } catch (HttpSolrClient.RemoteSolrException e) { - - } - - cloudSolrClient.setDefaultCollection(old); - executeCommand(baseUrl + authzPrefix, cl,"{set-permission : { name : update , role : admin}}", "harry", "HarryIsUberCool"); - - SolrInputDocument doc = new SolrInputDocument(); - doc.setField("id","4"); - UpdateRequest update = new UpdateRequest(); - update.setBasicAuthCredentials("harry","HarryIsUberCool"); - update.add(doc); - update.setCommitWithin(100); - cloudSolrClient.request(update); - - - executeCommand(baseUrl + authzPrefix, cl, "{set-property : { blockUnknown: true}}", "harry", "HarryIsUberCool"); - String[] toolArgs = new String[]{ - "status", "-solr", baseUrl}; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - PrintStream stdoutSim = new PrintStream(baos, true, StandardCharsets.UTF_8.name()); - SolrCLI.StatusTool tool = new SolrCLI.StatusTool(stdoutSim); - try { - System.setProperty("basicauth", "harry:HarryIsUberCool"); - tool.runTool(SolrCLI.processCommandLineArgs(SolrCLI.joinCommonAndToolOptions(tool.getOptions()), toolArgs)); - Map obj = (Map) Utils.fromJSON(new ByteArrayInputStream(baos.toByteArray())); - assertTrue(obj.containsKey("version")); - assertTrue(obj.containsKey("startTime")); - assertTrue(obj.containsKey("uptime")); - assertTrue(obj.containsKey("memory")); - } catch (Exception e) { - log.error("RunExampleTool failed due to: " + e + - "; stdout from tool prior to failure: " + baos.toString(StandardCharsets.UTF_8.name())); - } - executeCommand(baseUrl + authzPrefix, cl, "{set-property : { blockUnknown: false}}", "harry", "HarryIsUberCool"); - HttpClientUtil.close(cl); } public static void executeCommand(String url, HttpClient cl, String payload, String user, String pwd) throws IOException { diff --git a/solr/example/example-DIH/solr/db/conf/elevate.xml b/solr/example/example-DIH/solr/db/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/example/example-DIH/solr/db/conf/elevate.xml +++ b/solr/example/example-DIH/solr/db/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/example/example-DIH/solr/mail/conf/elevate.xml b/solr/example/example-DIH/solr/mail/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/example/example-DIH/solr/mail/conf/elevate.xml +++ b/solr/example/example-DIH/solr/mail/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/example/example-DIH/solr/rss/conf/elevate.xml b/solr/example/example-DIH/solr/rss/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/example/example-DIH/solr/rss/conf/elevate.xml +++ b/solr/example/example-DIH/solr/rss/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/example/example-DIH/solr/solr/conf/elevate.xml b/solr/example/example-DIH/solr/solr/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/example/example-DIH/solr/solr/conf/elevate.xml +++ b/solr/example/example-DIH/solr/solr/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/example/files/conf/elevate.xml b/solr/example/files/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/example/files/conf/elevate.xml +++ b/solr/example/files/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml b/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml +++ b/solr/server/solr/configsets/data_driven_schema_configs/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml b/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml index 25d5cebe4fb..2c09ebed669 100644 --- a/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml +++ b/solr/server/solr/configsets/sample_techproducts_configs/conf/elevate.xml @@ -24,15 +24,19 @@ --> - - - - - - - - - - + + diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java new file mode 100644 index 00000000000..6c8247f746f --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java @@ -0,0 +1,256 @@ +package org.apache.solr.client.solrj.io.stream; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.HashMap; + +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.io.SolrClientCache; +import org.apache.solr.client.solrj.io.Tuple; +import org.apache.solr.client.solrj.io.comp.StreamComparator; +import org.apache.solr.client.solrj.io.stream.expr.Explanation; +import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType; +import org.apache.solr.client.solrj.io.stream.expr.Expressible; +import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.TermsParams; +import org.apache.solr.common.util.NamedList; + +/** + * Iterates over a gatherNodes() expression and scores the Tuples based on tf-idf. + * + * Expression Syntax: + * + * Default function call uses the "count(*)" field for termFreq. + * + * You can use a different value for termFreq by providing the termFreq param + * scoreNodes(gatherNodes(...), termFreq="min(weight)") + * + **/ + +public class ScoreNodesStream extends TupleStream implements Expressible +{ + + private static final long serialVersionUID = 1; + + protected String zkHost; + private TupleStream stream; + private transient SolrClientCache clientCache; + private Map nodes = new HashMap(); + private Iterator tuples; + private String termFreq; + + public ScoreNodesStream(TupleStream tupleStream, String nodeFreqField) throws IOException { + init(tupleStream, nodeFreqField); + } + + public ScoreNodesStream(StreamExpression expression, StreamFactory factory) throws IOException { + // grab all parameters out + List streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class); + StreamExpressionNamedParameter nodeFreqParam = factory.getNamedOperand(expression, "termFreq"); + + String docFreqField = "count(*)"; + if(nodeFreqParam != null) { + docFreqField = nodeFreqParam.getParameter().toString(); + } + + if(1 != streamExpressions.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a single stream but found %d",expression, streamExpressions.size())); + } + + zkHost = factory.getDefaultZkHost(); + + if(null == zkHost){ + throw new IOException("zkHost not found"); + } + + TupleStream stream = factory.constructStream(streamExpressions.get(0)); + + init(stream, docFreqField); + } + + private void init(TupleStream tupleStream, String termFreq) throws IOException{ + this.stream = tupleStream; + this.termFreq = termFreq; + } + + @Override + public StreamExpression toExpression(StreamFactory factory) throws IOException{ + return toExpression(factory, true); + } + + private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException { + // function name + StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass())); + + // nodeFreqField + expression.addParameter(new StreamExpressionNamedParameter("termFreq", termFreq)); + + if(includeStreams){ + // stream + if(stream instanceof Expressible){ + expression.addParameter(((Expressible)stream).toExpression(factory)); + } + else{ + throw new IOException("This ScoreNodesStream contains a non-expressible TupleStream - it cannot be converted to an expression"); + } + } + else{ + expression.addParameter(""); + } + + return expression; + } + + @Override + public Explanation toExplanation(StreamFactory factory) throws IOException { + + return new StreamExplanation(getStreamNodeId().toString()) + .withChildren(new Explanation[]{ + stream.toExplanation(factory) + }) + .withFunctionName(factory.getFunctionName(this.getClass())) + .withImplementingClass(this.getClass().getName()) + .withExpressionType(ExpressionType.STREAM_DECORATOR) + .withExpression(toExpression(factory, false).toString()); + } + + public void setStreamContext(StreamContext context) { + this.clientCache = context.getSolrClientCache(); + this.stream.setStreamContext(context); + } + + public List children() { + List l = new ArrayList(); + l.add(stream); + return l; + } + + public void open() throws IOException { + stream.open(); + Tuple node = null; + StringBuilder builder = new StringBuilder(); + String field = null; + String collection = null; + while(true) { + node = stream.read(); + if(node.EOF) { + break; + } + + if(!node.fields.containsKey("node")) { + throw new IOException("node field not present in the Tuple"); + } + + String nodeId = node.getString("node"); + + + nodes.put(nodeId, node); + if(builder.length() > 0) { + builder.append(","); + field = node.getString("field"); + collection = node.getString("collection"); + } + builder.append(nodeId); + } + + CloudSolrClient client = clientCache.getCloudSolrClient(zkHost); + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(CommonParams.QT, "/terms"); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, field); + params.add(TermsParams.TERMS_STATS, "true"); + params.add(TermsParams.TERMS_LIST, builder.toString()); + params.add(TermsParams.TERMS_LIMIT, Integer.toString(nodes.size())); + params.add("distrib", "true"); + + QueryRequest request = new QueryRequest(params); + + + try { + + //Get the response from the terms component + NamedList response = client.request(request, collection); + NamedList stats = (NamedList)response.get("indexstats"); + long numDocs = stats.get("numDocs").longValue(); + NamedList> fields = (NamedList>)response.get("terms"); + + int size = fields.size(); + for(int i=0; i terms = fields.get(fieldName); + int tsize = terms.size(); + for(int t=0; t // BACKUP request public static class Backup extends AsyncCollectionSpecificAdminRequest { protected final String name; + protected Optional repositoryName; protected String location; public Backup(String collection, String name) { @@ -625,12 +627,24 @@ public abstract class CollectionAdminRequest return this; } + public Optional getRepositoryName() { + return repositoryName; + } + + public Backup setRepositoryName(String repositoryName) { + this.repositoryName = Optional.ofNullable(repositoryName); + return this; + } + @Override public SolrParams getParams() { ModifiableSolrParams params = (ModifiableSolrParams) super.getParams(); params.set(CoreAdminParams.COLLECTION, collection); params.set(CoreAdminParams.NAME, name); - params.set("location", location); //note: optional + params.set(CoreAdminParams.BACKUP_LOCATION, location); //note: optional + if (repositoryName.isPresent()) { + params.set(CoreAdminParams.BACKUP_REPOSITORY, repositoryName.get()); + } return params; } @@ -643,6 +657,7 @@ public abstract class CollectionAdminRequest // RESTORE request public static class Restore extends AsyncCollectionSpecificAdminRequest { protected final String backupName; + protected Optional repositoryName; protected String location; // in common with collection creation: @@ -678,6 +693,15 @@ public abstract class CollectionAdminRequest return this; } + public Optional getRepositoryName() { + return repositoryName; + } + + public Restore setRepositoryName(String repositoryName) { + this.repositoryName = Optional.ofNullable(repositoryName); + return this; + } + // Collection creation params in common: public Restore setConfigName(String config) { this.configName = config; return this; } public String getConfigName() { return configName; } @@ -703,7 +727,7 @@ public abstract class CollectionAdminRequest ModifiableSolrParams params = (ModifiableSolrParams) super.getParams(); params.set(CoreAdminParams.COLLECTION, collection); params.set(CoreAdminParams.NAME, backupName); - params.set("location", location); //note: optional + params.set(CoreAdminParams.BACKUP_LOCATION, location); //note: optional params.set("collection.configName", configName); //note: optional if (maxShardsPerNode != null) { params.set( "maxShardsPerNode", maxShardsPerNode); @@ -717,6 +741,10 @@ public abstract class CollectionAdminRequest if (properties != null) { addProperties(params, properties); } + if (repositoryName.isPresent()) { + params.set(CoreAdminParams.BACKUP_REPOSITORY, repositoryName.get()); + } + return params; } diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java index 16813198320..f106b9c8612 100644 --- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java +++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java @@ -44,6 +44,7 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.solr.common.Callable; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.Pair; import org.apache.solr.common.util.Utils; @@ -106,7 +107,6 @@ public class ZkStateReader implements Closeable { public static final String URL_SCHEME = "urlScheme"; - public static final String BACKUP_LOCATION = "location"; /** A view of the current state of all collections; combines all the different state sources into a single view. */ protected volatile ClusterState clusterState; @@ -160,7 +160,7 @@ public class ZkStateReader implements Closeable { LEGACY_CLOUD, URL_SCHEME, AUTO_ADD_REPLICAS, - BACKUP_LOCATION, + CoreAdminParams.BACKUP_LOCATION, MAX_CORES_PER_NODE))); /** @@ -398,6 +398,7 @@ public class ZkStateReader implements Closeable { final Stat stat = new Stat(); final byte[] data = zkClient.getData(ALIASES, thisWatch, stat, true); ZkStateReader.this.aliases = ClusterState.load(data); + LOG.info("New alias definition is: " + ZkStateReader.this.aliases.toString()); } } catch (KeeperException.ConnectionLossException | KeeperException.SessionExpiredException e) { LOG.warn("ZooKeeper watch triggered, but Solr cannot talk to ZK: [{}]", e.getMessage()); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java index 716dfee0c88..7455cbf1071 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java @@ -108,6 +108,16 @@ public abstract class CoreAdminParams // Node to create a replica on for ADDREPLICA at least. public static final String NODE = "node"; + /** + * A parameter to specify the name of the backup repository to be used. + */ + public static final String BACKUP_REPOSITORY = "repository"; + + /** + * A parameter to specify the location where the backup should be stored. + */ + public static final String BACKUP_LOCATION = "location"; + public enum CoreAdminAction { STATUS(true), UNLOAD, diff --git a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java index ff1be5f8d65..d719500d096 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java @@ -38,6 +38,18 @@ public interface TermsParams { */ public static final String TERMS_FIELD = TERMS_PREFIX + "fl"; + /** + * Optional. The list of terms to be retrieved. + * + */ + public static final String TERMS_LIST = TERMS_PREFIX + "list"; + + /** + * Optional. The list of terms to be retrieved. + * + */ + public static final String TERMS_STATS = TERMS_PREFIX + "stats"; + /** * Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary. * diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java index 79579d16732..a141b7341ab 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java @@ -40,6 +40,8 @@ import org.apache.solr.client.solrj.io.comp.ComparatorOrder; import org.apache.solr.client.solrj.io.comp.FieldComparator; import org.apache.solr.client.solrj.io.stream.CloudSolrStream; import org.apache.solr.client.solrj.io.stream.HashJoinStream; +import org.apache.solr.client.solrj.io.stream.ScoreNodesStream; +import org.apache.solr.client.solrj.io.stream.SortStream; import org.apache.solr.client.solrj.io.stream.StreamContext; import org.apache.solr.client.solrj.io.stream.TupleStream; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; @@ -384,6 +386,130 @@ public class GraphExpressionTest extends SolrCloudTestCase { } + + @Test + public void testScoreNodesStream() throws Exception { + + + new UpdateRequest() + .add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "1") + .add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "1") + .add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "100") + .add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "1") + .add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "1") + .add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "1") + .add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "1") + .add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "1") + .add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "1") + .add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "1") + .add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "1") + .add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "1") + .add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "1") + .add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "1") + .add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "1") + .add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "1") + .commit(cluster.getSolrClient(), COLLECTION); + + List tuples = null; + TupleStream stream = null; + StreamContext context = new StreamContext(); + SolrClientCache cache = new SolrClientCache(); + context.setSolrClientCache(cache); + + StreamFactory factory = new StreamFactory() + .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()) + .withDefaultZkHost(cluster.getZkServer().getZkAddress()) + .withFunctionName("gatherNodes", GatherNodesStream.class) + .withFunctionName("scoreNodes", ScoreNodesStream.class) + .withFunctionName("search", CloudSolrStream.class) + .withFunctionName("sort", SortStream.class) + .withFunctionName("count", CountMetric.class) + .withFunctionName("avg", MeanMetric.class) + .withFunctionName("sum", SumMetric.class) + .withFunctionName("min", MinMetric.class) + .withFunctionName("max", MaxMetric.class); + + String expr = "gatherNodes(collection1, " + + "walk=\"product3->product_s\"," + + "gather=\"basket_s\")"; + + + String expr2 = "sort(by=\"nodeScore desc\", " + + "scoreNodes(gatherNodes(collection1, " + + expr+","+ + "walk=\"node->basket_s\"," + + "gather=\"product_s\", " + + "count(*), " + + "avg(price_f), " + + "sum(price_f), " + + "min(price_f), " + + "max(price_f))))"; + + stream = factory.constructStream(expr2); + + context = new StreamContext(); + context.setSolrClientCache(cache); + + stream.setStreamContext(context); + + tuples = getTuples(stream); + + Tuple tuple0 = tuples.get(0); + assert(tuple0.getString("node").equals("product4")); + assert(tuple0.getLong("docFreq") == 2); + assert(tuple0.getLong("count(*)") == 2); + + Tuple tuple1 = tuples.get(1); + assert(tuple1.getString("node").equals("product1")); + assert(tuple1.getLong("docFreq") == 8); + assert(tuple1.getLong("count(*)") == 3); + + Tuple tuple2 = tuples.get(2); + assert(tuple2.getString("node").equals("product5")); + assert(tuple2.getLong("docFreq") == 1); + assert(tuple2.getLong("count(*)") == 1); + + + //Test using a different termFreq field then the default count(*) + expr2 = "sort(by=\"nodeScore desc\", " + + "scoreNodes(termFreq=\"avg(price_f)\",gatherNodes(collection1, " + + expr+","+ + "walk=\"node->basket_s\"," + + "gather=\"product_s\", " + + "count(*), " + + "avg(price_f), " + + "sum(price_f), " + + "min(price_f), " + + "max(price_f))))"; + + stream = factory.constructStream(expr2); + + context = new StreamContext(); + context.setSolrClientCache(cache); + + stream.setStreamContext(context); + + tuples = getTuples(stream); + + tuple0 = tuples.get(0); + assert(tuple0.getString("node").equals("product5")); + assert(tuple0.getLong("docFreq") == 1); + assert(tuple0.getDouble("avg(price_f)") == 100); + + tuple1 = tuples.get(1); + assert(tuple1.getString("node").equals("product4")); + assert(tuple1.getLong("docFreq") == 2); + assert(tuple1.getDouble("avg(price_f)") == 1); + + tuple2 = tuples.get(2); + assert(tuple2.getString("node").equals("product1")); + assert(tuple2.getLong("docFreq") == 8); + assert(tuple2.getDouble("avg(price_f)") == 1); + + cache.close(); + } + + @Test public void testGatherNodesFriendsStream() throws Exception { @@ -707,6 +833,11 @@ public class GraphExpressionTest extends SolrCloudTestCase { client.close(); } + + + + + private String readString(InputStreamReader reader) throws Exception{ StringBuilder builder = new StringBuilder(); int c = 0; diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index a0bb84ec03f..d5ff1c79cab 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -230,6 +230,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { System.setProperty("enable.update.log", usually() ? "true" : "false"); System.setProperty("tests.shardhandler.randomSeed", Long.toString(random().nextLong())); System.setProperty("solr.clustering.enabled", "false"); + System.setProperty("solr.peerSync.useRangeVersions", String.valueOf(random().nextBoolean())); startTrackingSearchers(); ignoreException("ignore_exception"); newRandomConfig(); @@ -277,6 +278,7 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { System.clearProperty("enable.update.log"); System.clearProperty("useCompoundFile"); System.clearProperty("urlScheme"); + System.clearProperty("solr.peerSync.useRangeVersions"); HttpClientUtil.setConfigurer(new HttpClientConfigurer());