diff --git a/build.xml b/build.xml index 5335957bea0..9db8312f383 100755 --- a/build.xml +++ b/build.xml @@ -186,20 +186,26 @@ } def checkLicenseHeaderPrecedes = { f, description, contentPattern, commentPattern, text, ratDocument -> - def contentMatcher = contentPattern.matcher(text); - if (contentMatcher.find()) { - def contentStartPos = contentMatcher.start(); - def commentMatcher = commentPattern.matcher(text); - while (commentMatcher.find()) { - if (isLicense(commentMatcher, ratDocument)) { - if (commentMatcher.start() < contentStartPos) { - break; // This file is all good, so break loop: license header precedes 'description' definition - } else { - reportViolation(f, description+' declaration precedes license header'); - } + def contentMatcher = contentPattern.matcher(text); + if (contentMatcher.find()) { + def contentStartPos = contentMatcher.start(); + def commentMatcher = commentPattern.matcher(text); + while (commentMatcher.find()) { + if (isLicense(commentMatcher, ratDocument)) { + if (commentMatcher.start() < contentStartPos) { + break; // This file is all good, so break loop: license header precedes 'description' definition + } else { + reportViolation(f, description+' declaration precedes license header'); } } } + } + } + + def checkMockitoAssume = { f, text -> + if (text.contains("mockito") && !text.contains("assumeWorkingMockito()")) { + reportViolation(f, 'File uses Mockito but has no assumeWorkingMockito() call'); + } } def checkForUnescapedSymbolSubstitutions = { f, text -> @@ -265,18 +271,21 @@ ratDocument.getMetaData().value(MetaData.RAT_URL_LICENSE_FAMILY_NAME))); } } - if (f.toString().endsWith('.java')) { + if (f.name.endsWith('.java')) { if (text.contains('org.slf4j.LoggerFactory')) { if (!validLoggerPattern.matcher(text).find()) { reportViolation(f, 'invalid logging pattern [not private static final, uses static class name]'); } } checkLicenseHeaderPrecedes(f, 'package', packagePattern, javaCommentPattern, text, ratDocument); + if (f.name.contains("Test")) { + checkMockitoAssume(f, text); + } } - if (f.toString().endsWith('.xml') || f.toString().endsWith('.xml.template')) { + if (f.name.endsWith('.xml') || f.name.endsWith('.xml.template')) { checkLicenseHeaderPrecedes(f, '', xmlTagPattern, xmlCommentPattern, text, ratDocument); } - if (f.toString().endsWith('.adoc')) { + if (f.name.endsWith('.adoc')) { checkForUnescapedSymbolSubstitutions(f, text); } }; diff --git a/dev-tools/idea/lucene/facet/facet.iml b/dev-tools/idea/lucene/facet/facet.iml index 91b8f91701b..d5f93275707 100644 --- a/dev-tools/idea/lucene/facet/facet.iml +++ b/dev-tools/idea/lucene/facet/facet.iml @@ -12,6 +12,16 @@ + + + + + + + + + + diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a69c8176b91..87ddccab2e8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -5,6 +5,14 @@ http://s.apache.org/luceneversions ======================= Lucene 8.0.0 ======================= +API Changes + +* LUCENE-8007: Index statistics Terms.getSumDocFreq(), Terms.getDocCount() are + now required to be stored by codecs. Additionally, TermsEnum.totalTermFreq() + and Terms.getSumTotalTermFreq() are now required: if frequencies are not + stored they are equal to TermsEnum.docFreq() and Terms.getSumDocFreq(), + respectively, because all freq() values equal 1. (Adrien Grand, Robert Muir) + Changes in Runtime Behavior * LUCENE-7837: Indices that were created before the previous major version @@ -25,6 +33,11 @@ Improvements ======================= Lucene 7.2.0 ======================= +API Changes + +* LUCENE-8017: Weight now exposes a getCacheHelper() method to help query caches + determine whether or not a query can be cached. (Alan Woodward) + Bug Fixes * LUCENE-7991: KNearestNeighborDocumentClassifier.knnSearch no longer applies @@ -49,6 +62,16 @@ Optimizations * LUCENE-7994: Use int/int scatter map to gather facet counts when the number of hits is small relative to the number of unique facet labels (Dawid Weiss, Robert Muir, Mike McCandless) + +Tests + +* LUCENE-8035: Run tests with JDK-specific options: --illegal-access=deny + on Java 9+. (Uwe Schindler) + +Build + +* LUCENE-6144: Upgrade Ivy to 2.4.0; 'ant ivy-bootstrap' now removes old Ivy + jars in ~/.ant/lib/. (Shawn Heisey, Steve Rowe) ======================= Lucene 7.1.0 ======================= diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java index b8d23bdb3d6..17024d8a9f1 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/BlockTermsReader.java @@ -139,8 +139,9 @@ public class BlockTermsReader extends FieldsProducer { assert numTerms >= 0; final long termsStartPointer = in.readVLong(); final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); - final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); - final long sumDocFreq = in.readVLong(); + final long sumTotalTermFreq = in.readVLong(); + // when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value + final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong(); final int docCount = in.readVInt(); final int longsSize = in.readVInt(); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs @@ -149,7 +150,7 @@ public class BlockTermsReader extends FieldsProducer { if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } - if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); @@ -810,7 +811,9 @@ public class BlockTermsReader extends FieldsProducer { // docFreq, totalTermFreq state.docFreq = freqReader.readVInt(); //System.out.println(" dF=" + state.docFreq); - if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + state.totalTermFreq = state.docFreq; // all postings have tf=1 + } else { state.totalTermFreq = state.docFreq + freqReader.readVLong(); //System.out.println(" totTF=" + state.totalTermFreq); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java index afdaf5aad17..e07cee099de 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsReader.java @@ -126,8 +126,9 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer { final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); assert fieldInfo != null: "field=" + field; assert numTerms <= Integer.MAX_VALUE; - final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); - final long sumDocFreq = in.readVLong(); + final long sumTotalTermFreq = in.readVLong(); + // when frequencies are omitted, sumDocFreq=totalTermFreq and we only write one value + final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong(); final int docCount = in.readVInt(); final int longsSize = in.readVInt(); // System.out.println(" longsSize=" + longsSize); @@ -140,7 +141,7 @@ public final class OrdsBlockTreeTermsReader extends FieldsProducer { if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } - if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } final long indexStartFP = indexIn.readVLong(); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java index 9312ff9ed3a..a34f0fda1d0 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsIntersectTermsEnumFrame.java @@ -292,7 +292,9 @@ final class OrdsIntersectTermsEnumFrame { // stats termState.docFreq = statsReader.readVInt(); //if (DEBUG) System.out.println(" dF=" + state.docFreq); - if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + termState.totalTermFreq = termState.docFreq; // all tf values are 1 + } else { termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java index 76a30c4c11e..ee3782f29cd 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsSegmentTermsEnumFrame.java @@ -499,7 +499,9 @@ final class OrdsSegmentTermsEnumFrame { // stats state.docFreq = statsReader.readVInt(); //if (DEBUG) System.out.println(" dF=" + state.docFreq); - if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + state.totalTermFreq = state.docFreq; // all tf values are 1 + } else { state.totalTermFreq = state.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java index 97bbea3ddef..5ba4c5ff0a3 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java @@ -19,7 +19,6 @@ package org.apache.lucene.codecs.memory; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.BitSet; import java.util.Collection; import java.util.Collections; @@ -111,8 +110,9 @@ public class FSTOrdTermsReader extends FieldsProducer { FieldInfo fieldInfo = fieldInfos.fieldInfo(blockIn.readVInt()); boolean hasFreq = fieldInfo.getIndexOptions() != IndexOptions.DOCS; long numTerms = blockIn.readVLong(); - long sumTotalTermFreq = hasFreq ? blockIn.readVLong() : -1; - long sumDocFreq = blockIn.readVLong(); + long sumTotalTermFreq = blockIn.readVLong(); + // if freqs are omitted, sumDocFreq=sumTotalTermFreq and we only write one value + long sumDocFreq = hasFreq ? blockIn.readVLong() : sumTotalTermFreq; int docCount = blockIn.readVInt(); int longsSize = blockIn.readVInt(); FST index = new FST<>(indexIn, PositiveIntOutputs.getSingleton()); @@ -146,7 +146,7 @@ public class FSTOrdTermsReader extends FieldsProducer { throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount + " (blockIn=" + blockIn + ")", indexIn); } // #positions must be >= #postings - if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { + if (field.sumTotalTermFreq < field.sumDocFreq) { throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq + " (blockIn=" + blockIn + ")", indexIn); } if (previous != null) { @@ -343,9 +343,6 @@ public class FSTOrdTermsReader extends FieldsProducer { this.totalTermFreq = new long[INTERVAL]; this.statsBlockOrd = -1; this.metaBlockOrd = -1; - if (!hasFreqs()) { - Arrays.fill(totalTermFreq, -1); - } } /** Decodes stats data into term state */ @@ -388,6 +385,7 @@ public class FSTOrdTermsReader extends FieldsProducer { } } else { docFreq[i] = code; + totalTermFreq[i] = code; } } } diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java index b120656688c..8dda05c3805 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java @@ -94,8 +94,9 @@ public class FSTTermsReader extends FieldsProducer { int fieldNumber = in.readVInt(); FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); long numTerms = in.readVLong(); - long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : in.readVLong(); - long sumDocFreq = in.readVLong(); + long sumTotalTermFreq = in.readVLong(); + // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value + long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong(); int docCount = in.readVInt(); int longsSize = in.readVInt(); TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize); @@ -126,7 +127,7 @@ public class FSTTermsReader extends FieldsProducer { throw new CorruptIndexException("invalid sumDocFreq: " + field.sumDocFreq + " docCount: " + field.docCount, in); } // #positions must be >= #postings - if (field.sumTotalTermFreq != -1 && field.sumTotalTermFreq < field.sumDocFreq) { + if (field.sumTotalTermFreq < field.sumDocFreq) { throw new CorruptIndexException("invalid sumTotalTermFreq: " + field.sumTotalTermFreq + " sumDocFreq: " + field.sumDocFreq, in); } if (previous != null) { @@ -288,7 +289,7 @@ public class FSTTermsReader extends FieldsProducer { @Override public long totalTermFreq() throws IOException { - return state.totalTermFreq; + return state.totalTermFreq == -1 ? state.docFreq : state.totalTermFreq; } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index dff445eb114..21983c6429b 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -733,10 +733,10 @@ public final class MemoryPostingsFormat extends PostingsFormat { if (!didDecode) { buffer.reset(current.output.bytes, current.output.offset, current.output.length); docFreq = buffer.readVInt(); - if (field.getIndexOptions() != IndexOptions.DOCS) { - totalTermFreq = docFreq + buffer.readVLong(); + if (field.getIndexOptions() == IndexOptions.DOCS) { + totalTermFreq = docFreq; } else { - totalTermFreq = -1; + totalTermFreq = docFreq + buffer.readVLong(); } postingsSpare.bytes = current.output.bytes; postingsSpare.offset = buffer.getPosition(); @@ -873,12 +873,15 @@ public final class MemoryPostingsFormat extends PostingsFormat { field = fieldInfos.fieldInfo(fieldNumber); if (field == null) { throw new CorruptIndexException("invalid field number: " + fieldNumber, in); - } else if (field.getIndexOptions() != IndexOptions.DOCS) { - sumTotalTermFreq = in.readVLong(); } else { - sumTotalTermFreq = -1; + sumTotalTermFreq = in.readVLong(); + } + // if frequencies are omitted, sumDocFreq = sumTotalTermFreq and we only write one value. + if (field.getIndexOptions() == IndexOptions.DOCS) { + sumDocFreq = sumTotalTermFreq; + } else { + sumDocFreq = in.readVLong(); } - sumDocFreq = in.readVLong(); docCount = in.readVInt(); fst = new FST<>(in, outputs); diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index faab7885f6c..f5504b3151e 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -202,7 +202,7 @@ class SimpleTextFieldsReader extends FieldsProducer { @Override public long totalTermFreq() { - return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq; + return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq; } @Override @@ -568,12 +568,13 @@ class SimpleTextFieldsReader extends FieldsProducer { } else if (StringHelper.startsWith(scratch.get(), DOC)) { docFreq++; sumDocFreq++; + totalTermFreq++; scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length); int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); visitedDocs.set(docID); } else if (StringHelper.startsWith(scratch.get(), FREQ)) { scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length); - totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); + totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1; } else if (StringHelper.startsWith(scratch.get(), TERM)) { if (lastDocsStart != -1) { b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart, @@ -637,7 +638,7 @@ class SimpleTextFieldsReader extends FieldsProducer { @Override public long getSumTotalTermFreq() { - return fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : sumTotalTermFreq; + return sumTotalTermFreq; } @Override diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 873aaeffe5b..25f2a4d0c08 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -288,7 +288,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { @Override public long getSumTotalTermFreq() throws IOException { - return -1; + // TODO: make it constant-time + long ttf = 0; + TermsEnum iterator = iterator(); + for (BytesRef b = iterator.next(); b != null; b = iterator.next()) { + ttf += iterator.totalTermFreq(); + } + return ttf; } @Override diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 2c70813227f..663e733f6b0 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -80,13 +80,15 @@ - + + + - + @@ -413,12 +415,12 @@ - + - + @@ -482,19 +484,20 @@ Ivy is not available - + - + - + - + + + + + + + + + @@ -948,6 +959,12 @@ + + + + + + @@ -1029,6 +1046,7 @@ + diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java index 0e14bf74182..0ef21292c04 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java @@ -180,8 +180,9 @@ public final class BlockTreeTermsReader extends FieldsProducer { if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } - final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); - final long sumDocFreq = termsIn.readVLong(); + final long sumTotalTermFreq = termsIn.readVLong(); + // when frequencies are omitted, sumDocFreq=sumTotalTermFreq and only one value is written. + final long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { @@ -195,7 +196,7 @@ public final class BlockTreeTermsReader extends FieldsProducer { if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } - if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java index 578e1453007..236e77a7f67 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java @@ -288,7 +288,9 @@ final class IntersectTermsEnumFrame { // stats termState.docFreq = statsReader.readVInt(); - if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + if (ite.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + termState.totalTermFreq = termState.docFreq; // all postings have freq=1 + } else { termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); } // metadata diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java index 0860b30cbf5..a32bdac427c 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java @@ -417,7 +417,9 @@ final class SegmentTermsEnumFrame { // stats state.docFreq = statsReader.readVInt(); //if (DEBUG) System.out.println(" dF=" + state.docFreq); - if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + if (ste.fr.fieldInfo.getIndexOptions() == IndexOptions.DOCS) { + state.totalTermFreq = state.docFreq; // all postings have freq=1 + } else { state.totalTermFreq = state.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java index 2e8ed630307..f5318baffe9 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java @@ -745,6 +745,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem private static class TVTerms extends Terms { private final int numTerms, flags; + private final long totalTermFreq; private final int[] prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex; private final BytesRef termBytes, payloadBytes; @@ -764,6 +765,11 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem this.payloadIndex = payloadIndex; this.payloadBytes = payloadBytes; this.termBytes = termBytes; + long ttf = 0; + for (int tf : termFreqs) { + ttf += tf; + } + this.totalTermFreq = ttf; } @Override @@ -782,7 +788,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem @Override public long getSumTotalTermFreq() throws IOException { - return -1L; + return totalTermFreq; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java index c43b708bcd4..e996d286f8d 100644 --- a/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/RangeFieldQuery.java @@ -21,6 +21,7 @@ import java.util.Arrays; import java.util.Objects; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -356,6 +357,11 @@ abstract class RangeFieldQuery extends Query { } return scorerSupplier.get(Long.MAX_VALUE); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java index 5da0733da17..c5391d3c2ec 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedNumericDocValuesRangeQuery.java @@ -138,6 +138,11 @@ abstract class SortedNumericDocValuesRangeQuery extends Query { } return new ConstantScoreScorer(this, score(), iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java index 860679adf14..712e17e4383 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesRangeQuery.java @@ -181,6 +181,11 @@ abstract class SortedSetDocValuesRangeQuery extends Query { } return new ConstantScoreScorer(this, score(), iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java index 3256107d426..8a19fe14120 100644 --- a/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java @@ -123,7 +123,10 @@ public abstract class BaseCompositeReader extends Composi ensureOpen(); int total = 0; // sum freqs in subreaders for (int i = 0; i < subReaders.length; i++) { - total += subReaders[i].docFreq(term); + int sub = subReaders[i].docFreq(term); + assert sub >= 0; + assert sub <= subReaders[i].getDocCount(term.field()); + total += sub; } return total; } @@ -134,9 +137,8 @@ public abstract class BaseCompositeReader extends Composi long total = 0; // sum freqs in subreaders for (int i = 0; i < subReaders.length; i++) { long sub = subReaders[i].totalTermFreq(term); - if (sub == -1) { - return -1; - } + assert sub >= 0; + assert sub <= subReaders[i].getSumTotalTermFreq(term.field()); total += sub; } return total; @@ -148,9 +150,8 @@ public abstract class BaseCompositeReader extends Composi long total = 0; // sum doc freqs in subreaders for (R reader : subReaders) { long sub = reader.getSumDocFreq(field); - if (sub == -1) { - return -1; // if any of the subs doesn't support it, return -1 - } + assert sub >= 0; + assert sub <= reader.getSumTotalTermFreq(field); total += sub; } return total; @@ -162,9 +163,8 @@ public abstract class BaseCompositeReader extends Composi int total = 0; // sum doc counts in subreaders for (R reader : subReaders) { int sub = reader.getDocCount(field); - if (sub == -1) { - return -1; // if any of the subs doesn't support it, return -1 - } + assert sub >= 0; + assert sub <= reader.maxDoc(); total += sub; } return total; @@ -176,9 +176,8 @@ public abstract class BaseCompositeReader extends Composi long total = 0; // sum doc total term freqs in subreaders for (R reader : subReaders) { long sub = reader.getSumTotalTermFreq(field); - if (sub == -1) { - return -1; // if any of the subs doesn't support it, return -1 - } + assert sub >= 0; + assert sub >= reader.getSumDocFreq(field); total += sub; } return total; diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 552e83d42d3..028da241fb3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -1253,6 +1253,10 @@ public final class CheckIndex implements Closeable { continue; } + if (terms.getDocCount() > maxDoc) { + throw new RuntimeException("docCount > maxDoc for field: " + field + ", docCount=" + terms.getDocCount() + ", maxDoc=" + maxDoc); + } + final boolean hasFreqs = terms.hasFreqs(); final boolean hasPositions = terms.hasPositions(); final boolean hasPayloads = terms.hasPayloads(); @@ -1295,12 +1299,6 @@ public final class CheckIndex implements Closeable { throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs); } - if (hasFreqs == false) { - if (terms.getSumTotalTermFreq() != -1) { - throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)"); - } - } - if (!isVectors) { final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; if (hasPositions != expectedHasPositions) { @@ -1375,8 +1373,8 @@ public final class CheckIndex implements Closeable { postings = termsEnum.postings(postings, PostingsEnum.ALL); if (hasFreqs == false) { - if (termsEnum.totalTermFreq() != -1) { - throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)"); + if (termsEnum.totalTermFreq() != termsEnum.docFreq()) { + throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be " + termsEnum.docFreq() + ")"); } } @@ -1406,14 +1404,11 @@ public final class CheckIndex implements Closeable { break; } visitedDocs.set(doc); - int freq = -1; - if (hasFreqs) { - freq = postings.freq(); - if (freq <= 0) { - throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); - } - totalTermFreq += freq; - } else { + int freq = postings.freq(); + if (freq <= 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } + if (hasFreqs == false) { // When a field didn't index freq, it must // consistently "lie" and pretend that freq was // 1: @@ -1421,6 +1416,8 @@ public final class CheckIndex implements Closeable { throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); } } + totalTermFreq += freq; + if (liveDocs == null || liveDocs.get(doc)) { hasNonDeletedDocs = true; status.totFreq++; @@ -1490,19 +1487,25 @@ public final class CheckIndex implements Closeable { } final long totalTermFreq2 = termsEnum.totalTermFreq(); - final boolean hasTotalTermFreq = hasFreqs && totalTermFreq2 != -1; if (docCount != docFreq) { throw new RuntimeException("term " + term + " docFreq=" + docFreq + " != tot docs w/o deletions " + docCount); } - if (hasTotalTermFreq) { - if (totalTermFreq2 <= 0) { - throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); - } - sumTotalTermFreq += totalTermFreq; - if (totalTermFreq != totalTermFreq2) { - throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); - } + if (docFreq > terms.getDocCount()) { + throw new RuntimeException("term " + term + " docFreq=" + docFreq + " > docCount=" + terms.getDocCount()); + } + if (totalTermFreq2 <= 0) { + throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds"); + } + sumTotalTermFreq += totalTermFreq; + if (totalTermFreq != totalTermFreq2) { + throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq); + } + if (totalTermFreq2 < docFreq) { + throw new RuntimeException("totalTermFreq: " + totalTermFreq2 + " is out of bounds, docFreq=" + docFreq); + } + if (hasFreqs == false && totalTermFreq != docFreq) { + throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq + " != docFreq=" + docFreq); } // Test skipping @@ -1626,22 +1629,22 @@ public final class CheckIndex implements Closeable { } status.blockTreeStats.put(field, stats); - if (sumTotalTermFreq != 0) { - final long v = fields.terms(field).getSumTotalTermFreq(); - if (v != -1 && sumTotalTermFreq != v) { - throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); - } + final long actualSumDocFreq = fields.terms(field).getSumDocFreq(); + if (sumDocFreq != actualSumDocFreq) { + throw new RuntimeException("sumDocFreq for field " + field + "=" + actualSumDocFreq + " != recomputed sumDocFreq=" + sumDocFreq); } + + final long actualSumTotalTermFreq = fields.terms(field).getSumTotalTermFreq(); + if (sumTotalTermFreq != actualSumTotalTermFreq) { + throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + actualSumTotalTermFreq + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); + } - if (sumDocFreq != 0) { - final long v = fields.terms(field).getSumDocFreq(); - if (v != -1 && sumDocFreq != v) { - throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq); - } + if (hasFreqs == false && sumTotalTermFreq != sumDocFreq) { + throw new RuntimeException("sumTotalTermFreq for field " + field + " should be " + sumDocFreq + ", got sumTotalTermFreq=" + sumTotalTermFreq); } final int v = fieldTerms.getDocCount(); - if (v != -1 && visitedDocs.cardinality() != v) { + if (visitedDocs.cardinality() != v) { throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality()); } diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java index 3efd5874e62..8b9a83b27d3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java @@ -450,25 +450,25 @@ public abstract class IndexReader implements Closeable { /** * Returns the total number of occurrences of {@code term} across all - * documents (the sum of the freq() for each doc that has this term). This - * will be -1 if the codec doesn't support this measure. Note that, like other - * term measures, this measure does not take deleted documents into account. + * documents (the sum of the freq() for each doc that has this term). + * Note that, like other term measures, this measure does not take + * deleted documents into account. */ public abstract long totalTermFreq(Term term) throws IOException; /** - * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field, - * or -1 if this measure isn't stored by the codec. Note that, just like other - * term measures, this measure does not take deleted documents into account. + * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field. + * Note that, just like other term measures, this measure does not take deleted + * documents into account. * * @see Terms#getSumDocFreq() */ public abstract long getSumDocFreq(String field) throws IOException; /** - * Returns the number of documents that have at least one term for this field, - * or -1 if this measure isn't stored by the codec. Note that, just like other - * term measures, this measure does not take deleted documents into account. + * Returns the number of documents that have at least one term for this field. + * Note that, just like other term measures, this measure does not take deleted + * documents into account. * * @see Terms#getDocCount() */ @@ -476,9 +476,8 @@ public abstract class IndexReader implements Closeable { /** * Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this - * field, or -1 if this measure isn't stored by the codec (or if this fields - * omits term freq and positions). Note that, just like other term measures, - * this measure does not take deleted documents into account. + * field. Note that, just like other term measures, this measure does not take + * deleted documents into account. * * @see Terms#getSumTotalTermFreq() */ diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java b/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java index 79e11c45eee..203e366d28e 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java @@ -149,9 +149,7 @@ public final class MultiTerms extends Terms { long sum = 0; for(Terms terms : subs) { final long v = terms.getSumTotalTermFreq(); - if (v == -1) { - return -1; - } + assert v != -1; sum += v; } return sum; @@ -162,9 +160,7 @@ public final class MultiTerms extends Terms { long sum = 0; for(Terms terms : subs) { final long v = terms.getSumDocFreq(); - if (v == -1) { - return -1; - } + assert v != -1; sum += v; } return sum; @@ -175,9 +171,7 @@ public final class MultiTerms extends Terms { int sum = 0; for(Terms terms : subs) { final int v = terms.getDocCount(); - if (v == -1) { - return -1; - } + assert v != -1; sum += v; } return sum; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java index 51f495817fd..7db838b91a4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -326,9 +326,7 @@ public final class MultiTermsEnum extends TermsEnum { long sum = 0; for(int i=0;i= 0; + assert totalTermFreq >= 0; + assert docFreq <= totalTermFreq; this.docFreq += docFreq; - if (this.totalTermFreq >= 0 && totalTermFreq >= 0) - this.totalTermFreq += totalTermFreq; - else - this.totalTermFreq = -1; + this.totalTermFreq += totalTermFreq; } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java index 7197e25e549..dca8a276bbf 100644 --- a/lucene/core/src/java/org/apache/lucene/index/Terms.java +++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java @@ -99,25 +99,21 @@ public abstract class Terms { * other term measures, this measure does not take deleted * documents into account. */ public abstract long size() throws IOException; - + /** Returns the sum of {@link TermsEnum#totalTermFreq} for - * all terms in this field, or -1 if this measure isn't - * stored by the codec (or if this fields omits term freq - * and positions). Note that, just like other term + * all terms in this field. Note that, just like other term * measures, this measure does not take deleted documents * into account. */ public abstract long getSumTotalTermFreq() throws IOException; /** Returns the sum of {@link TermsEnum#docFreq()} for - * all terms in this field, or -1 if this measure isn't - * stored by the codec. Note that, just like other term + * all terms in this field. Note that, just like other term * measures, this measure does not take deleted documents * into account. */ public abstract long getSumDocFreq() throws IOException; /** Returns the number of documents that have at least one - * term for this field, or -1 if this measure isn't - * stored by the codec. Note that, just like other term + * term for this field. Note that, just like other term * measures, this measure does not take deleted documents * into account. */ public abstract int getDocCount() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index c4b1017b079..4b5755a330d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -131,8 +131,7 @@ public abstract class TermsEnum implements BytesRefIterator { /** Returns the total number of occurrences of this term * across all documents (the sum of the freq() for each - * doc that has this term). This will be -1 if the - * codec doesn't support this measure. Note that, like + * doc that has this term). Note that, like * other term measures, this measure does not take * deleted documents into account. */ public abstract long totalTermFreq() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/index/package-info.java b/lucene/core/src/java/org/apache/lucene/index/package-info.java index f5a86d1cb37..d7d337cc78a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/index/package-info.java @@ -148,12 +148,8 @@ * deleted documents, when segments are merged the statistic is updated as * those deleted documents are merged away. *
  • {@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number - * of occurrences of this term across all documents. Note that this statistic - * is unavailable (returns -1) if term frequencies were omitted - * from the index - * ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS}) - * for the field. Like docFreq(), it will also count occurrences that appear in - * deleted documents. + * of occurrences of this term across all documents. Like docFreq(), it will + * also count occurrences that appear in deleted documents. * * *

    @@ -180,10 +176,7 @@ * of tokens for the field. This can be thought of as the sum of * {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the * field, and like totalTermFreq() it will also count occurrences that appear in - * deleted documents, and will be unavailable (returns -1) if term - * frequencies were omitted from the index - * ({@link org.apache.lucene.index.IndexOptions#DOCS DOCS}) - * for the field. + * deleted documents. * * *

    diff --git a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java index 3a0cdc5a1ef..219d4535827 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java @@ -277,11 +277,7 @@ public final class BlendedTermQuery extends Query { long ttf = 0; for (TermContext ctx : contexts) { df = Math.max(df, ctx.docFreq()); - if (ctx.totalTermFreq() == -1L) { - ttf = -1L; - } else if (ttf != -1L) { - ttf += ctx.totalTermFreq(); - } + ttf += ctx.totalTermFreq(); } for (int i = 0; i < contexts.length; ++i) { diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java index 778cb639f2c..cb1c1947b2a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java @@ -26,6 +26,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; @@ -299,6 +300,11 @@ final class BooleanWeight extends Weight { return scorerSupplier.get(Long.MAX_VALUE); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, weights); + } + @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); diff --git a/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java b/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java index a6a1e190c25..2dac05985d6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java +++ b/lucene/core/src/java/org/apache/lucene/search/CollectionStatistics.java @@ -23,7 +23,27 @@ import org.apache.lucene.index.Terms; // javadocs /** - * Contains statistics for a collection (field) + * Contains statistics for a collection (field). + *

    + * This class holds statistics across all documents for scoring purposes: + *

      + *
    • {@link #maxDoc()}: number of documents. + *
    • {@link #docCount()}: number of documents that contain this field. + *
    • {@link #sumDocFreq()}: number of postings-list entries. + *
    • {@link #sumTotalTermFreq()}: number of tokens. + *
    + *

    + * The following conditions are always true: + *

      + *
    • All statistics are positive integers: never zero or negative. + *
    • {@code docCount} <= {@code maxDoc} + *
    • {@code docCount} <= {@code sumDocFreq} <= {@code sumTotalTermFreq} + *
    + *

    + * Values may include statistics on deleted documents that have not yet been merged away. + *

    + * Be careful when performing calculations on these values because they are represented + * as 64-bit integer values, you may need to cast to {@code double} for your use. * @lucene.experimental */ public class CollectionStatistics { @@ -51,33 +71,23 @@ public class CollectionStatistics { if (maxDoc <= 0) { throw new IllegalArgumentException("maxDoc must be positive, maxDoc: " + maxDoc); } - if (docCount != -1) { - if (docCount <= 0) { - throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount); - } - if (docCount > maxDoc) { - throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc); - } + if (docCount <= 0) { + throw new IllegalArgumentException("docCount must be positive, docCount: " + docCount); } - if (sumDocFreq != -1) { - if (sumDocFreq <= 0) { - throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq); - } - if (docCount != -1) { - if (sumDocFreq < docCount) { - throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount); - } - } + if (docCount > maxDoc) { + throw new IllegalArgumentException("docCount must not exceed maxDoc, docCount: " + docCount + ", maxDoc: " + maxDoc); } - if (sumTotalTermFreq != -1) { - if (sumTotalTermFreq <= 0) { - throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq); - } - if (sumDocFreq != -1) { - if (sumTotalTermFreq < sumDocFreq) { - throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq); - } - } + if (sumDocFreq <= 0) { + throw new IllegalArgumentException("sumDocFreq must be positive, sumDocFreq: " + sumDocFreq); + } + if (sumDocFreq < docCount) { + throw new IllegalArgumentException("sumDocFreq must be at least docCount, sumDocFreq: " + sumDocFreq + ", docCount: " + docCount); + } + if (sumTotalTermFreq <= 0) { + throw new IllegalArgumentException("sumTotalTermFreq must be positive, sumTotalTermFreq: " + sumTotalTermFreq); + } + if (sumTotalTermFreq < sumDocFreq) { + throw new IllegalArgumentException("sumTotalTermFreq must be at least sumDocFreq, sumTotalTermFreq: " + sumTotalTermFreq + ", sumDocFreq: " + sumDocFreq); } this.field = field; this.maxDoc = maxDoc; @@ -86,33 +96,65 @@ public class CollectionStatistics { this.sumDocFreq = sumDocFreq; } - /** returns the field name */ + /** + * The field's name. + *

    + * This value is never {@code null}. + * @return field's name, not {@code null} + */ public final String field() { return field; } - /** returns the total number of documents, regardless of - * whether they all contain values for this field. - * @see IndexReader#maxDoc() */ + /** + * The total number of documents, regardless of + * whether they all contain values for this field. + *

    + * This value is always a positive number. + * @return total number of documents, in the range [1 .. {@link Long#MAX_VALUE}] + * @see IndexReader#maxDoc() + */ public final long maxDoc() { return maxDoc; } - /** returns the total number of documents that - * have at least one term for this field. - * @see Terms#getDocCount() */ + /** + * The total number of documents that have at least + * one term for this field. + *

    + * This value is always a positive number, and never + * exceeds {@link #maxDoc()}. + * @return total number of documents containing this field, in the range [1 .. {@link #maxDoc()}] + * @see Terms#getDocCount() + */ public final long docCount() { return docCount; } - /** returns the total number of tokens for this field - * @see Terms#getSumTotalTermFreq() */ + /** + * The total number of tokens for this field. + * This is the "word count" for this field across all documents. + * It is the sum of {@link TermStatistics#totalTermFreq()} across all terms. + * It is also the sum of each document's field length across all documents. + *

    + * This value is always a positive number, and always at least {@link #sumDocFreq()}. + * @return total number of tokens in the field, in the range [{@link #sumDocFreq()} .. {@link Long#MAX_VALUE}] + * @see Terms#getSumTotalTermFreq() + */ public final long sumTotalTermFreq() { return sumTotalTermFreq; } - /** returns the total number of postings for this field - * @see Terms#getSumDocFreq() */ + /** + * The total number of posting list entries for this field. + * This is the sum of term-document pairs: the sum of {@link TermStatistics#docFreq()} across all terms. + * It is also the sum of each document's unique term count for this field across all documents. + *

    + * This value is always a positive number, always at least {@link #docCount()}, and never + * exceeds {@link #sumTotalTermFreq()}. + * @return number of posting list entries, in the range [{@link #docCount()} .. {@link #sumTotalTermFreq()}] + * @see Terms#getSumDocFreq() + */ public final long sumDocFreq() { return sumDocFreq; } diff --git a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java index 72dc442d7cc..2646fb4df73 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -167,6 +167,11 @@ public final class ConstantScoreQuery extends Query { return scorerSupplier.get(Long.MAX_VALUE); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return innerWeight.getCacheHelper(context); + } + }; } else { return innerWeight; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java index 4ea29d6b8ed..38f74ad5615 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java @@ -137,6 +137,11 @@ public final class DisjunctionMaxQuery extends Query implements Iterable } } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, weights); + } + /** Explain the score we computed for doc */ @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java index cbb5e043d7b..3702814ca70 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesFieldExistsQuery.java @@ -23,6 +23,7 @@ import java.util.Objects; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -97,6 +98,11 @@ public final class DocValuesFieldExistsQuery extends Query { return new ConstantScoreScorer(this, score(), iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java index 20266781d03..96d45d6719d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java +++ b/lucene/core/src/java/org/apache/lucene/search/DocValuesRewriteMethod.java @@ -86,17 +86,17 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod { @Override public long getSumTotalTermFreq() { - return -1; + throw new UnsupportedOperationException(); } @Override public long getSumDocFreq() { - return -1; + throw new UnsupportedOperationException(); } @Override public int getDocCount() { - return -1; + throw new UnsupportedOperationException(); } @Override @@ -158,6 +158,11 @@ public final class DocValuesRewriteMethod extends MultiTermQuery.RewriteMethod { } }); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(query.field, context); + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java index 2053067242f..08f6c70772b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterWeight.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -55,6 +56,11 @@ public abstract class FilterWeight extends Weight { this.in = weight; } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return in.getCacheHelper(context); + } + @Override public void extractTerms(Set terms) { in.extractTerms(terms); diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java index be14815ab3a..7a912a229c7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java @@ -169,6 +169,13 @@ public final class IndexOrDocValuesQuery extends Query { } return scorerSupplier.get(Long.MAX_VALUE); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + // Both index and dv query should return the same values, so we can use + // the index query's cachehelper here + return indexWeight.getCacheHelper(context); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java index a682852fda6..e00ca9b4e5b 100644 --- a/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java +++ b/lucene/core/src/java/org/apache/lucene/search/LRUQueryCache.java @@ -722,8 +722,7 @@ public class LRUQueryCache implements QueryCache, Accountable { policy.onUse(getQuery()); } - // TODO: should it be pluggable, eg. for queries that run on doc values? - final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); + final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context); if (cacheHelper == null) { // this segment is not suitable for caching return in.scorerSupplier(context); @@ -788,14 +787,18 @@ public class LRUQueryCache implements QueryCache, Accountable { return scorerSupplier.get(Long.MAX_VALUE); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return in.getCacheHelper(context); + } + @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { if (used.compareAndSet(false, true)) { policy.onUse(getQuery()); } - // TODO: should it be pluggable, eg. for queries that run on doc values? - final IndexReader.CacheHelper cacheHelper = context.reader().getCoreCacheHelper(); + final IndexReader.CacheHelper cacheHelper = in.getCacheHelper(context); if (cacheHelper == null) { // this segment is not suitable for caching return in.bulkScorer(context); diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java index 296f502f95c..e60e4c5717d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java @@ -19,6 +19,7 @@ package org.apache.lucene.search; import java.io.IOException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.Bits; @@ -39,6 +40,12 @@ public final class MatchAllDocsQuery extends Query { public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc())); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { final float score = score(); diff --git a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java index e2d4f9858d3..427ef10fbea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MatchNoDocsQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -58,6 +59,11 @@ public class MatchNoDocsQuery extends Query { return null; } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index d39fdc65d33..6ad41224a0f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -291,6 +291,11 @@ public class MultiPhraseQuery extends Query { } } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Scorer scorer = scorer(context); diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java index 54c6d484a0f..eae489c5e37 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreWrapper.java @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; @@ -211,6 +212,11 @@ final class MultiTermQueryConstantScoreWrapper extends return scorer(weightOrBitSet.set); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java index 10a6ffacd71..419c5a2588e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/NormsFieldExistsQuery.java @@ -23,6 +23,7 @@ import java.util.Objects; import org.apache.lucene.document.StringField; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -75,6 +76,11 @@ public final class NormsFieldExistsQuery extends Query { DocIdSetIterator iterator = reader.getNormValues(field); return new ConstantScoreScorer(this, score(), iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 79703f5908a..5b452ebd88d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -441,7 +441,12 @@ public class PhraseQuery extends Query { needsScores, totalMatchCost); } } - + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + // only called from assert private boolean termNotInReader(LeafReader reader, Term term) throws IOException { return reader.docFreq(term) == 0; @@ -492,14 +497,13 @@ public class PhraseQuery extends Query { * of processing the occurrences of a term * in a document that contains the term. * This is for use by {@link TwoPhaseIterator#matchCost} implementations. - *
    This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. * @param termsEnum The term is the term at which this TermsEnum is positioned. */ static float termPositionsCost(TermsEnum termsEnum) throws IOException { int docFreq = termsEnum.docFreq(); assert docFreq > 0; - long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available - float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); + long totalTermFreq = termsEnum.totalTermFreq(); + float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java index f37d21d07ef..4fffedb5ea2 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Iterator; import java.util.NoSuchElementException; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues.IntersectVisitor; @@ -150,6 +151,11 @@ public abstract class PointInSetQuery extends Query { return new ConstantScoreScorer(this, score(), result.build().iterator()); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java index 4f1076d4e9e..2bc8c3dd030 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Objects; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PointValues; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; @@ -321,6 +322,11 @@ public abstract class PointRangeQuery extends Query { } return scorerSupplier.get(Long.MAX_VALUE); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index e9e663683df..10564f3d829 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -140,11 +140,7 @@ public final class SynonymQuery extends Query { TermStatistics termStats = searcher.termStatistics(terms[i], termContexts[i]); if (termStats != null) { docFreq = Math.max(termStats.docFreq(), docFreq); - if (termStats.totalTermFreq() == -1) { - totalTermFreq = -1; - } else if (totalTermFreq != -1) { - totalTermFreq += termStats.totalTermFreq(); - } + totalTermFreq += termStats.totalTermFreq(); } } this.similarity = searcher.getSimilarity(true); @@ -217,6 +213,11 @@ public final class SynonymQuery extends Query { return new SynonymScorer(simScorer, this, subScorers); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } } static class SynonymScorer extends DisjunctionScorer { diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 9b64d379174..de8b6c68b37 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -315,6 +315,11 @@ public class TermInSetQuery extends Query implements Accountable { return scorer(weightOrBitSet.set); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java index 587c5138355..8d0845031ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Objects; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -65,9 +66,9 @@ public class TermQuery extends Query { collectionStats = searcher.collectionStatistics(term.field()); termStats = searcher.termStatistics(term, termStates); } else { - // we do not need the actual stats, use fake stats with docFreq=maxDoc=1 and ttf=-1 - collectionStats = new CollectionStatistics(term.field(), 1, -1, -1, -1); - termStats = new TermStatistics(term.bytes(), 1, -1); + // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1 + collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1); + termStats = new TermStatistics(term.bytes(), 1, 1); } if (termStats == null) { @@ -99,6 +100,11 @@ public class TermQuery extends Query { return new TermScorer(this, docs, similarity.simScorer(stats, context)); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + /** * Returns a {@link TermsEnum} positioned at this weights Term or null if * the term does not exist in the given context diff --git a/lucene/core/src/java/org/apache/lucene/search/TermStatistics.java b/lucene/core/src/java/org/apache/lucene/search/TermStatistics.java index 7d4f03a7d34..be9669feef0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermStatistics.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermStatistics.java @@ -24,8 +24,29 @@ import org.apache.lucene.index.TermsEnum; // javadocs import org.apache.lucene.util.BytesRef; /** * Contains statistics for a specific term + *

    + * This class holds statistics for this term across all documents for scoring purposes: + *

      + *
    • {@link #docFreq}: number of documents this term occurs in. + *
    • {@link #totalTermFreq}: number of tokens for this term. + *
    + *

    + * The following conditions are always true: + *

      + *
    • All statistics are positive integers: never zero or negative. + *
    • {@code docFreq} <= {@code totalTermFreq} + *
    • {@code docFreq} <= {@code sumDocFreq} of the collection + *
    • {@code totalTermFreq} <= {@code sumTotalTermFreq} of the collection + *
    + *

    + * Values may include statistics on deleted documents that have not yet been merged away. + *

    + * Be careful when performing calculations on these values because they are represented + * as 64-bit integer values, you may need to cast to {@code double} for your use. * @lucene.experimental */ +// TODO: actually add missing cross-checks to guarantee TermStatistics is in bounds of CollectionStatistics, +// otherwise many similarity functions will implode. public class TermStatistics { private final BytesRef term; private final long docFreq; @@ -45,29 +66,52 @@ public class TermStatistics { if (docFreq <= 0) { throw new IllegalArgumentException("docFreq must be positive, docFreq: " + docFreq); } - if (totalTermFreq != -1) { - if (totalTermFreq < docFreq) { - throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq); - } + if (totalTermFreq <= 0) { + throw new IllegalArgumentException("totalTermFreq must be positive, totalTermFreq: " + totalTermFreq); + } + if (totalTermFreq < docFreq) { + throw new IllegalArgumentException("totalTermFreq must be at least docFreq, totalTermFreq: " + totalTermFreq + ", docFreq: " + docFreq); } this.term = term; this.docFreq = docFreq; this.totalTermFreq = totalTermFreq; } - /** returns the term text */ + /** + * The term text. + *

    + * This value is never {@code null}. + * @return term's text, not {@code null} + */ public final BytesRef term() { return term; } - /** returns the number of documents this term occurs in - * @see TermsEnum#docFreq() */ + /** + * The number of documents this term occurs in. + *

    + * This is the document-frequency for the term: the count of documents + * where the term appears at least one time. + *

    + * This value is always a positive number, and never + * exceeds {@link #totalTermFreq}. It also cannot exceed {@link CollectionStatistics#sumDocFreq()}. + * @return document frequency, in the range [1 .. {@link #totalTermFreq()}] + * @see TermsEnum#docFreq() + */ public final long docFreq() { return docFreq; } - /** returns the total number of occurrences of this term - * @see TermsEnum#totalTermFreq() */ + /** + * The total number of occurrences of this term. + *

    + * This is the token count for the term: the number of times it appears in the field across all documents. + *

    + * This value is always a positive number, always at least {@link #docFreq()}, + * and never exceeds {@link CollectionStatistics#sumTotalTermFreq()}. + * @return number of occurrences, in the range [{@link #docFreq()} .. {@link CollectionStatistics#sumTotalTermFreq()}] + * @see TermsEnum#totalTermFreq() + */ public final long totalTermFreq() { return totalTermFreq; } diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index af329ec7ba1..e87dd3f2ecd 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -18,8 +18,11 @@ package org.apache.lucene.search; import java.io.IOException; +import java.util.List; import java.util.Set; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -102,6 +105,55 @@ public abstract class Weight { */ public abstract Scorer scorer(LeafReaderContext context) throws IOException; + /** + * Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} to cache this query against + * + * Weights that rely only on Terms or Points can return {@code context.reader().getCoreCacheHelper()}. + * Weights that use DocValues should call {@link #getDocValuesCacheHelper(String, LeafReaderContext)} + * Weights that should not be cached at all should return {@code null} + * + * @param context the {@link LeafReaderContext} to cache against + * @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level + */ + public abstract IndexReader.CacheHelper getCacheHelper(LeafReaderContext context); + + /** + * Given a collection of Weights, return an {@link org.apache.lucene.index.IndexReader.CacheHelper} that will satisfy + * the requirements of them all. + * @param context the {@link LeafReaderContext} to cache against + * @param weights an array of {@link Weight} to be cached + * @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level + */ + protected static IndexReader.CacheHelper getCacheHelper(LeafReaderContext context, List weights) { + if (weights.size() == 0) + return null; + IndexReader.CacheHelper helper = weights.get(0).getCacheHelper(context); + if (helper == null) + return null; + for (int i = 1; i < weights.size(); i++) { + IndexReader.CacheHelper nextHelper = weights.get(i).getCacheHelper(context); + if (nextHelper == null || nextHelper != helper) + return null; + } + return helper; + } + + /** + * Returns an {@link org.apache.lucene.index.IndexReader.CacheHelper} for a Weight using doc values + * + * This will return the core reader for + * + * @param field the docvalues field + * @param ctx the {@link LeafReaderContext} to cache against + * @return an {@link org.apache.lucene.index.IndexReader.CacheHelper} indicating the cache level + */ + public static IndexReader.CacheHelper getDocValuesCacheHelper(String field, LeafReaderContext ctx) { + FieldInfo fi = ctx.reader().getFieldInfos().fieldInfo(field); + if (fi == null || fi.getDocValuesGen() == -1) + return ctx.reader().getCoreCacheHelper(); + return null; + } + /** * Optional method. * Get a {@link ScorerSupplier}, which allows to know the cost of the {@link Scorer} diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java index 47561e43ceb..812f9cece4e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java @@ -85,19 +85,7 @@ public class BM25Similarity extends Similarity { /** The default implementation computes the average as sumTotalTermFreq / docCount */ protected float avgFieldLength(CollectionStatistics collectionStats) { - final long sumTotalTermFreq; - if (collectionStats.sumTotalTermFreq() == -1) { - // frequencies are omitted (tf=1), its # of postings - if (collectionStats.sumDocFreq() == -1) { - // theoretical case only: remove! - return 1f; - } - sumTotalTermFreq = collectionStats.sumDocFreq(); - } else { - sumTotalTermFreq = collectionStats.sumTotalTermFreq(); - } - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - return (float) (sumTotalTermFreq / (double) docCount); + return (float) (collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount()); } /** @@ -161,7 +149,7 @@ public class BM25Similarity extends Similarity { */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); + final long docCount = collectionStats.docCount(); final float idf = idf(df, docCount); return Explanation.match(idf, "idf, computed as log(1 + (N - n + 0.5) / (n + 0.5)) from:", Explanation.match(df, "n, number of documents containing term"), diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java index f33abdbf774..c7edf706ec7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java @@ -62,7 +62,7 @@ public class ClassicSimilarity extends TFIDFSimilarity { @Override public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); + final long docCount = collectionStats.docCount(); final float idf = idf(df, docCount); return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:", Explanation.match(df, "docFreq, number of documents containing term"), diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java index babef8f5e03..9407b5cd122 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/SimilarityBase.java @@ -100,42 +100,16 @@ public abstract class SimilarityBase extends Similarity { /** Fills all member fields defined in {@code BasicStats} in {@code stats}. * Subclasses can override this method to fill additional stats. */ protected void fillBasicStats(BasicStats stats, CollectionStatistics collectionStats, TermStatistics termStats) { - // #positions(field) must be >= #positions(term) - assert collectionStats.sumTotalTermFreq() == -1 || collectionStats.sumTotalTermFreq() >= termStats.totalTermFreq(); - long numberOfDocuments = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); - - long docFreq = termStats.docFreq(); - long totalTermFreq = termStats.totalTermFreq(); - - // frequencies are omitted, all postings have tf=1, so totalTermFreq = docFreq - if (totalTermFreq == -1) { - totalTermFreq = docFreq; - } - - final long numberOfFieldTokens; - final double avgFieldLength; - - if (collectionStats.sumTotalTermFreq() == -1) { - // frequencies are omitted, so sumTotalTermFreq = # postings - if (collectionStats.sumDocFreq() == -1) { - // theoretical case only: remove! - numberOfFieldTokens = docFreq; - avgFieldLength = 1f; - } else { - numberOfFieldTokens = collectionStats.sumDocFreq(); - avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments); - } - } else { - numberOfFieldTokens = collectionStats.sumTotalTermFreq(); - avgFieldLength = (float) (collectionStats.sumTotalTermFreq() / (double)numberOfDocuments); - } + // TODO: validate this for real, somewhere else + assert termStats.totalTermFreq() <= collectionStats.sumTotalTermFreq(); + assert termStats.docFreq() <= collectionStats.sumDocFreq(); // TODO: add sumDocFreq for field (numberOfFieldPostings) - stats.setNumberOfDocuments(numberOfDocuments); - stats.setNumberOfFieldTokens(numberOfFieldTokens); - stats.setAvgFieldLength(avgFieldLength); - stats.setDocFreq(docFreq); - stats.setTotalTermFreq(totalTermFreq); + stats.setNumberOfDocuments(collectionStats.docCount()); + stats.setNumberOfFieldTokens(collectionStats.sumTotalTermFreq()); + stats.setAvgFieldLength(collectionStats.sumTotalTermFreq() / (double) collectionStats.docCount()); + stats.setDocFreq(termStats.docFreq()); + stats.setTotalTermFreq(termStats.totalTermFreq()); } /** diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java index 5765e6bfae3..54744cecb2f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java @@ -448,7 +448,7 @@ public abstract class TFIDFSimilarity extends Similarity { */ public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) { final long df = termStats.docFreq(); - final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); + final long docCount = collectionStats.docCount(); final float idf = idf(df, docCount); return Explanation.match(idf, "idf(docFreq, docCount)", Explanation.match(df, "docFreq, number of documents containing term"), diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java index 1ed9669147c..34a014badd5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java +++ b/lucene/core/src/java/org/apache/lucene/search/similarities/package-info.java @@ -32,13 +32,13 @@ * *

    Summary of the Ranking Methods

    * - *

    {@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene - * scoring function. It is based on a highly optimized - * Vector Space Model. For more - * information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}. - * *

    {@link org.apache.lucene.search.similarities.BM25Similarity} is an optimized * implementation of the successful Okapi BM25 model. + * + *

    {@link org.apache.lucene.search.similarities.ClassicSimilarity} is the original Lucene + * scoring function. It is based on the + * Vector Space Model. For more + * information, see {@link org.apache.lucene.search.similarities.TFIDFSimilarity}. * *

    {@link org.apache.lucene.search.similarities.SimilarityBase} provides a basic * implementation of the Similarity contract and exposes a highly simplified diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java index 0ce70931e15..0af481f1cdf 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanContainingQuery.java @@ -19,8 +19,10 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Map; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -115,5 +117,10 @@ public final class SpanContainingQuery extends SpanContainQuery { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, Arrays.asList(bigWeight, littleWeight)); + } } } \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 7958f4758b0..a7c42d681c8 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -229,6 +229,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { w.extractTerms(terms); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, subWeights); + } } @Override @@ -319,6 +324,11 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { public void extractTerms(Set terms) { } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 00bcc4c1ac7..154ddfe2582 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; import java.io.IOException; +import java.util.Arrays; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -191,6 +192,11 @@ public final class SpanNotQuery extends SpanQuery { public void extractTerms(Set terms) { includeWeight.extractTerms(terms); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, Arrays.asList(includeWeight, excludeWeight)); + } } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 15abc7ddb27..653fc70f5fa 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -138,6 +138,11 @@ public final class SpanOrQuery extends SpanQuery { } } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, subWeights); + } + @Override public void extractTermContexts(Map contexts) { for (SpanWeight w : subWeights) { diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 9613ceb73ff..376ceeb4f72 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -86,6 +86,11 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea matchWeight.extractTerms(terms); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return matchWeight.getCacheHelper(context); + } + @Override public void extractTermContexts(Map contexts) { matchWeight.extractTermContexts(contexts); diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 3e13be7ecb1..1d1b21f837e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; @@ -91,6 +92,11 @@ public class SpanTermQuery extends SpanQuery { terms.add(term); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public void extractTermContexts(Map contexts) { contexts.put(term, termContext); @@ -135,7 +141,6 @@ public class SpanTermQuery extends SpanQuery { /** Returns an expected cost in simple operations * of processing the occurrences of a term * in a document that contains the term. - *
    This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. * @param termsEnum The term is the term at which this TermsEnum is positioned. *

    * This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost(). @@ -146,8 +151,9 @@ public class SpanTermQuery extends SpanQuery { static float termPositionsCost(TermsEnum termsEnum) throws IOException { int docFreq = termsEnum.docFreq(); assert docFreq > 0; - long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available - float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); + long totalTermFreq = termsEnum.totalTermFreq(); + assert totalTermFreq > 0; + float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq; return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; } diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java index 1bb0f501349..d29b4dd2c6f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanWithinQuery.java @@ -19,8 +19,10 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Map; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -116,6 +118,11 @@ public final class SpanWithinQuery extends SpanContainQuery { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, Arrays.asList(littleWeight, bigWeight)); + } } } \ No newline at end of file diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat3.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat3.java index 1b3b9affdf3..3b99ee17105 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat3.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestBlockPostingsFormat3.java @@ -172,10 +172,11 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different + boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs(); boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions(); TermsEnum leftTermsEnum = leftTerms.iterator(); TermsEnum rightTermsEnum = rightTerms.iterator(); - assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHavePositions); + assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions); assertTermsSeeking(leftTerms, rightTerms); @@ -188,7 +189,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { // TODO: test start term too TermsEnum leftIntersection = leftTerms.intersect(automaton, null); TermsEnum rightIntersection = rightTerms.intersect(automaton, null); - assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHavePositions); + assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions); } } } @@ -263,13 +264,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { * checks collection-level statistics on Terms */ public void assertTermsStatistics(Terms leftTerms, Terms rightTerms) throws Exception { - if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) { - assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount()); - } - if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) { - assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq()); - } - if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) { + assertEquals(leftTerms.getDocCount(), rightTerms.getDocCount()); + assertEquals(leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq()); + if (leftTerms.hasFreqs() && rightTerms.hasFreqs()) { assertEquals(leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq()); } if (leftTerms.size() != -1 && rightTerms.size() != -1) { @@ -281,7 +278,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { * checks the terms enum sequentially * if deep is false, it does a 'shallow' test that doesnt go down to the docsenums */ - public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasPositions) throws Exception { + public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep, boolean hasFreqs, boolean hasPositions) throws Exception { BytesRef term; PostingsEnum leftPositions = null; PostingsEnum rightPositions = null; @@ -290,7 +287,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { while ((term = leftTermsEnum.next()) != null) { assertEquals(term, rightTermsEnum.next()); - assertTermStats(leftTermsEnum, rightTermsEnum); + assertTermStats(leftTermsEnum, rightTermsEnum, hasFreqs); if (deep) { if (hasPositions) { // with payloads + off @@ -350,9 +347,9 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase { /** * checks term-level statistics */ - public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws Exception { + public void assertTermStats(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean bothHaveFreqs) throws Exception { assertEquals(leftTermsEnum.docFreq(), rightTermsEnum.docFreq()); - if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) { + if (bothHaveFreqs) { assertEquals(leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq()); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiTermsEnum.java index a265c9c174e..dbd685ac77e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiTermsEnum.java @@ -148,26 +148,22 @@ public class TestMultiTermsEnum extends LuceneTestCase { @Override public long size() throws IOException { - // Docs say we can return -1 if we don't know. - return -1; + throw new UnsupportedOperationException(); } @Override public long getSumTotalTermFreq() throws IOException { - // Docs say we can return -1 if we don't know. - return -1; + throw new UnsupportedOperationException(); } @Override public long getSumDocFreq() throws IOException { - // Docs say we can return -1 if we don't know. - return -1; + throw new UnsupportedOperationException(); } @Override public int getDocCount() throws IOException { - // Docs say we can return -1 if we don't know. - return -1; + throw new UnsupportedOperationException(); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java index 8af744f78c3..e0f618b9a07 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOmitTf.java @@ -445,7 +445,7 @@ public class TestOmitTf extends LuceneTestCase { } } - /** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are -1 */ + /** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */ public void testStats() throws Exception { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, @@ -459,8 +459,8 @@ public class TestOmitTf extends LuceneTestCase { iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); - assertEquals(-1, ir.totalTermFreq(new Term("foo", new BytesRef("bar")))); - assertEquals(-1, ir.getSumTotalTermFreq("foo")); + assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar")))); + assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo")); ir.close(); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 151d475b9af..c81ad6f0058 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; @@ -262,6 +263,11 @@ final class JustCompileSearch { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java index 065def193f2..fc3d1d37778 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java @@ -94,6 +94,11 @@ public class TestBooleanScorer extends LuceneTestCase { throw new UnsupportedOperationException(); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public BulkScorer bulkScorer(LeafReaderContext context) { return new BulkScorer() { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java index 85d2bf91d6c..094318a42f4 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestLRUQueryCache.java @@ -29,6 +29,7 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -36,15 +37,16 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; import com.carrotsearch.randomizedtesting.generators.RandomPicks; - import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FilterDirectoryReader; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -358,6 +360,11 @@ public class TestLRUQueryCache extends LuceneTestCase { public Scorer scorer(LeafReaderContext context) throws IOException { return null; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } @@ -947,6 +954,11 @@ public class TestLRUQueryCache extends LuceneTestCase { public Scorer scorer(LeafReaderContext context) throws IOException { return null; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } @@ -1276,6 +1288,78 @@ public class TestLRUQueryCache extends LuceneTestCase { dir.close(); } + // A query that returns null from Weight.getCacheHelper + private static class NoCacheQuery extends Query { + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + return new Weight(this) { + @Override + public void extractTerms(Set terms) { + + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + return null; + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + return null; + } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + }; + } + + @Override + public String toString(String field) { + return "NoCacheQuery"; + } + + @Override + public boolean equals(Object obj) { + return sameClassAs(obj); + } + + @Override + public int hashCode() { + return 0; + } + } + + public void testQueryNotSuitedForCaching() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE); + RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + w.addDocument(new Document()); + DirectoryReader reader = w.getReader(); + IndexSearcher searcher = newSearcher(reader); + searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE); + + LRUQueryCache cache = new LRUQueryCache(2, 10000, context -> true); + searcher.setQueryCache(cache); + + assertEquals(0, searcher.count(new NoCacheQuery())); + assertEquals(0, cache.getCacheCount()); + + // BooleanQuery wrapping an uncacheable query should also not be cached + BooleanQuery bq = new BooleanQuery.Builder() + .add(new NoCacheQuery(), Occur.MUST) + .add(new TermQuery(new Term("field", "term")), Occur.MUST).build(); + assertEquals(0, searcher.count(bq)); + assertEquals(0, cache.getCacheCount()); + + reader.close(); + w.close(); + dir.close(); + + } + private static class DummyQuery2 extends Query { private final AtomicBoolean scorerCreated; @@ -1291,6 +1375,12 @@ public class TestLRUQueryCache extends LuceneTestCase { public Scorer scorer(LeafReaderContext context) throws IOException { return scorerSupplier(context).get(Long.MAX_VALUE); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { final Weight weight = this; @@ -1351,4 +1441,110 @@ public class TestLRUQueryCache extends LuceneTestCase { w.close(); dir.close(); } + + static class DVCacheQuery extends Query { + + final String field; + + AtomicInteger scorerCreatedCount = new AtomicInteger(0); + + DVCacheQuery(String field) { + this.field = field; + } + + @Override + public String toString(String field) { + return "DVCacheQuery"; + } + + @Override + public boolean equals(Object obj) { + return sameClassAs(obj); + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + return new ConstantScoreWeight(this, 1) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + scorerCreatedCount.incrementAndGet(); + return new ConstantScoreScorer(this, 1, DocIdSetIterator.all(context.reader().maxDoc())); + } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } + + }; + } + } + + public void testDocValuesUpdatesDontBreakCache() throws IOException { + Directory dir = newDirectory(); + IndexWriterConfig iwc = newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE); + //RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc); + IndexWriter w = new IndexWriter(dir, iwc); + w.addDocument(new Document()); + w.commit(); + DirectoryReader reader = DirectoryReader.open(w); + IndexSearcher searcher = newSearcher(reader); + searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE); + + LRUQueryCache cache = new LRUQueryCache(1, 1000, context -> true); + searcher.setQueryCache(cache); + + DVCacheQuery query = new DVCacheQuery("field"); + assertEquals(1, searcher.count(query)); + assertEquals(1, query.scorerCreatedCount.get()); + assertEquals(1, searcher.count(query)); + assertEquals(1, query.scorerCreatedCount.get()); // should be cached + + Document doc = new Document(); + doc.add(new NumericDocValuesField("field", 1)); + doc.add(newTextField("text", "text", Store.NO)); + w.addDocument(doc); + reader.close(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE); + searcher.setQueryCache(cache); + + assertEquals(2, searcher.count(query)); + assertEquals(2, query.scorerCreatedCount.get()); // first segment cached + + reader.close(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE); + searcher.setQueryCache(cache); + + assertEquals(2, searcher.count(query)); + assertEquals(2, query.scorerCreatedCount.get()); // both segments cached + + + w.updateNumericDocValue(new Term("text", "text"), "field", 2l); + reader.close(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + searcher.setQueryCachingPolicy(QueryCachingPolicy.ALWAYS_CACHE); + searcher.setQueryCache(cache); + + assertEquals(2, searcher.count(query)); + assertEquals(3, query.scorerCreatedCount.get()); // second segment no longer cached due to DV update + + assertEquals(2, searcher.count(query)); + assertEquals(4, query.scorerCreatedCount.get()); // still no caching + + reader.close(); + w.close(); + dir.close(); + + } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java index f8fe82c41c4..3f8b59f36f1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestNeedsScores.java @@ -18,7 +18,6 @@ package org.apache.lucene.search; import java.io.IOException; -import java.util.Set; import java.util.Objects; import org.apache.lucene.document.Document; @@ -103,17 +102,7 @@ public class TestNeedsScores extends LuceneTestCase { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { final Weight w = in.createWeight(searcher, needsScores, boost); - return new Weight(AssertNeedsScores.this) { - @Override - public void extractTerms(Set terms) { - w.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return w.explain(context, doc); - } - + return new FilterWeight(w) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { assertEquals("query=" + in, value, needsScores); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java index ab44297c50e..4829c982dad 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestQueryRescorer.java @@ -487,6 +487,11 @@ public class TestQueryRescorer extends LuceneTestCase { }; } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { return null; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java index 562f2da9e2d..7c86d3bb54b 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -155,6 +155,11 @@ public class TestScorerPerf extends LuceneTestCase { public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), new BitSetIterator(docs, docs.approximateCardinality())); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java index 5c06a74ed74..6107c0242c5 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java @@ -249,6 +249,11 @@ public class TestSortRandom extends LuceneTestCase { return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality())); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java index a6e046a8972..2f25e642917 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java @@ -124,6 +124,11 @@ public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase { public Scorer scorer(LeafReaderContext context) throws IOException { return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(1)); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java index e52c9742f65..8a6227c9552 100644 --- a/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java +++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestSimilarityBase.java @@ -40,6 +40,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.Version; /** @@ -183,7 +184,17 @@ public class TestSimilarityBase extends LuceneTestCase { } private CollectionStatistics toCollectionStats(BasicStats stats) { - return new CollectionStatistics(stats.field, stats.getNumberOfDocuments(), -1, stats.getNumberOfFieldTokens(), -1); + long sumTtf = stats.getNumberOfFieldTokens(); + long sumDf; + if (sumTtf == -1) { + sumDf = TestUtil.nextLong(random(), stats.getNumberOfDocuments(), 2L * stats.getNumberOfDocuments()); + } else { + sumDf = TestUtil.nextLong(random(), Math.min(stats.getNumberOfDocuments(), sumTtf), sumTtf); + } + int docCount = Math.toIntExact(Math.min(sumDf, stats.getNumberOfDocuments())); + int maxDoc = TestUtil.nextInt(random(), docCount, docCount + 10); + + return new CollectionStatistics(stats.field, maxDoc, docCount, sumTtf, sumDf); } private TermStatistics toTermStats(BasicStats stats) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java index bb9db7ae07a..822eeb4210e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java @@ -17,8 +17,10 @@ package org.apache.lucene.facet; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; +import java.util.List; import java.util.Objects; import java.util.Set; @@ -101,6 +103,14 @@ class DrillSidewaysQuery extends Query { throw new UnsupportedOperationException(); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + List weights = new ArrayList<>(); + weights.add(baseWeight); + weights.addAll(Arrays.asList(drillDowns)); + return getCacheHelper(context, weights); + } + @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { Scorer baseScorer = baseWeight.scorer(context); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java index 0c45a82994a..6ce33d47b10 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/DoubleRange.java @@ -171,6 +171,11 @@ public final class DoubleRange extends Range { }; return new ConstantScoreScorer(this, score(), twoPhase); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to LongValuesSource? + } }; } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java index c762342e7f1..e7c3b942d22 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/LongRange.java @@ -163,6 +163,11 @@ public final class LongRange extends Range { }; return new ConstantScoreScorer(this, score(), twoPhase); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to LongValuesSource? + } }; } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java index ff3e2fc302f..846b9b20ca6 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java @@ -16,6 +16,16 @@ */ package org.apache.lucene.facet; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -58,16 +68,6 @@ import org.apache.lucene.util.InPlaceMergeSorter; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.TestUtil; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - public class TestDrillSideways extends FacetTestCase { protected DrillSideways getNewDrillSideways(IndexSearcher searcher, FacetsConfig config, @@ -740,6 +740,11 @@ public class TestDrillSideways extends FacetTestCase { }); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + }; } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java index 233738ceff0..1aff43be91b 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeFacetCounts.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.document.Document; @@ -29,8 +28,8 @@ import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.facet.DrillDownQuery; -import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.DrillSideways; +import org.apache.lucene.facet.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.FacetField; import org.apache.lucene.facet.FacetResult; import org.apache.lucene.facet.FacetTestCase; @@ -46,10 +45,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; import org.apache.lucene.search.DoubleValues; import org.apache.lucene.search.DoubleValuesSource; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FilterWeight; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.LongValuesSource; import org.apache.lucene.search.MatchAllDocsQuery; @@ -717,24 +716,12 @@ public class TestRangeFacetCounts extends FacetTestCase { @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { final Weight in = this.in.createWeight(searcher, needsScores, boost); - return new Weight(in.getQuery()) { - - @Override - public void extractTerms(Set terms) { - in.extractTerms(terms); - } - - @Override - public Explanation explain(LeafReaderContext context, int doc) throws IOException { - return in.explain(context, doc); - } - + return new FilterWeight(in) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { used.set(true); return in.scorer(context); } - }; } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermVector.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermVector.java index 346ecba013d..135bfe88e2b 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermVector.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermVector.java @@ -238,13 +238,8 @@ public final class TokenStreamFromTermVector extends TokenStream { // Estimate the number of position slots we need from term stats. We use some estimation factors taken from // Wikipedia that reduce the likelihood of needing to expand the array. int sumTotalTermFreq = (int) vector.getSumTotalTermFreq(); - if (sumTotalTermFreq == -1) {//unfortunately term vectors seem to not have this stat - int size = (int) vector.size(); - if (size == -1) {//doesn't happen with term vectors, it seems, but pick a default any way - size = 128; - } - sumTotalTermFreq = (int)(size * 2.4); - } + assert sumTotalTermFreq != -1; + final int originalPositionEstimate = (int) (sumTotalTermFreq * 1.5);//less than 1 in 10 docs exceed this // This estimate is based on maxStartOffset. Err on the side of this being larger than needed. diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties index b06adac5d3c..2478f85be69 100644 --- a/lucene/ivy-versions.properties +++ b/lucene/ivy-versions.properties @@ -153,7 +153,7 @@ org.apache.hadoop.version = 2.7.4 /org.apache.httpcomponents/httpcore = 4.4.6 /org.apache.httpcomponents/httpmime = 4.5.3 -/org.apache.ivy/ivy = 2.3.0 +/org.apache.ivy/ivy = 2.4.0 org.apache.james.apache.mime4j.version = 0.7.2 /org.apache.james/apache-mime4j-core = ${org.apache.james.apache.mime4j.version} diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java index 9ddc5eeda10..40c45456d97 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; @@ -154,6 +155,11 @@ final class GlobalOrdinalsQuery extends Query { } } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(joinField, context); + } + } final static class OrdinalMapScorer extends BaseGlobalOrdinalScorer { diff --git a/lucene/join/src/java/org/apache/lucene/search/join/ParentChildrenBlockJoinQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/ParentChildrenBlockJoinQuery.java index a739294d0d9..ef3eb2cb6eb 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/ParentChildrenBlockJoinQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/ParentChildrenBlockJoinQuery.java @@ -194,6 +194,11 @@ public class ParentChildrenBlockJoinQuery extends Query { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to BitSetProducer? + } }; } } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/PointInSetIncludingScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/PointInSetIncludingScoreQuery.java index 3130ae65db8..a1c16fa9c77 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/PointInSetIncludingScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/PointInSetIncludingScoreQuery.java @@ -29,6 +29,7 @@ import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -186,6 +187,11 @@ abstract class PointInSetIncludingScoreQuery extends Query { }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java index cd3beaf646a..98bdbe034f3 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java @@ -21,6 +21,7 @@ import java.util.Locale; import java.util.Objects; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.Term; @@ -140,6 +141,11 @@ class TermsIncludingScoreQuery extends Query { } } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + }; } diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index 12fefd50579..6e1c2cdd5c6 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -562,6 +562,11 @@ public class TestJoinUtil extends LuceneTestCase { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } }; } diff --git a/lucene/licenses/ivy-2.3.0.jar.sha1 b/lucene/licenses/ivy-2.3.0.jar.sha1 deleted file mode 100644 index f4b036fe046..00000000000 --- a/lucene/licenses/ivy-2.3.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -c5ebf1c253ad4959a29f4acfe696ee48cdd9f473 diff --git a/lucene/licenses/ivy-2.4.0.jar.sha1 b/lucene/licenses/ivy-2.4.0.jar.sha1 new file mode 100644 index 00000000000..3863b25583f --- /dev/null +++ b/lucene/licenses/ivy-2.4.0.jar.sha1 @@ -0,0 +1 @@ +5abe4c24bbe992a9ac07ca563d5bd3e8d569e9ed diff --git a/lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java index 41479f2ff5d..1864882f26a 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java @@ -17,6 +17,7 @@ package org.apache.lucene.queries; import java.io.IOException; +import java.util.Arrays; import java.util.Objects; import java.util.Set; @@ -121,6 +122,11 @@ public class BoostingQuery extends Query { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getCacheHelper(context, Arrays.asList(matchWeight, contextWeight)); + } }; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java index 632bcc7b74d..1fb0d97fac0 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java @@ -17,9 +17,11 @@ package org.apache.lucene.queries; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.List; import java.util.Set; import org.apache.lucene.index.IndexReader; @@ -207,6 +209,14 @@ public class CustomScoreQuery extends Query implements Cloneable { return new CustomScorer(CustomScoreQuery.this.getCustomScoreProvider(context), this, queryWeight, subQueryScorer, valSrcScorers); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + List weights = new ArrayList<>(); + weights.add(subQueryWeight); + weights.addAll(Arrays.asList(valSrcWeights)); + return getCacheHelper(context, weights); + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { Explanation explain = doExplain(context, doc); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java index c8b94233ef3..095d6f52839 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java @@ -88,6 +88,11 @@ public final class BoostedQuery extends Query { return new BoostedQuery.CustomScorer(context, this, subQueryScorer, boostVal); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public Explanation explain(LeafReaderContext readerContext, int doc) throws IOException { Explanation subQueryExpl = qWeight.explain(readerContext,doc); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java index 4a9c7099088..dfcd20c93c6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionMatchQuery.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Objects; import java.util.function.DoublePredicate; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; @@ -80,6 +81,11 @@ public final class FunctionMatchQuery extends Query { }; return new ConstantScoreScorer(this, score(), twoPhase); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to DoubleValuesSource? + } }; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java index 4acb563a9c4..b712bf08ac2 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java @@ -74,6 +74,11 @@ public class FunctionQuery extends Query { return new AllScorer(context, this, boost); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { return ((AllScorer)scorer(context)).explain(doc); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java index 41572a54c2f..dbe8d41bb7b 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java @@ -21,6 +21,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation; @@ -153,5 +154,10 @@ public class FunctionRangeQuery extends Query { // getRangeScorer takes String args and parses them. Weird. return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java index aa02c19dad3..6e4eb433b19 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionScoreQuery.java @@ -137,5 +137,10 @@ public final class FunctionScoreQuery extends Query { } }; } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to DoubleValuesSource + } } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java index 746ad8e9d18..716d3045397 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java @@ -29,8 +29,6 @@ import java.util.Map; /** * SumTotalTermFreqValueSource returns the number of tokens. * (sum of term freqs across all documents, across all terms). - * Returns -1 if frequencies were omitted for the field, or if - * the codec doesn't support this statistic. * @lucene.internal */ public class SumTotalTermFreqValueSource extends ValueSource { @@ -61,12 +59,8 @@ public class SumTotalTermFreqValueSource extends ValueSource { Terms terms = readerContext.reader().terms(indexedField); if (terms == null) continue; long v = terms.getSumTotalTermFreq(); - if (v == -1) { - sumTotalTermFreq = -1; - break; - } else { - sumTotalTermFreq += v; - } + assert v != -1; + sumTotalTermFreq += v; } final long ttf = sumTotalTermFreq; context.put(this, new LongDocValues(this) { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java index aabe45f6d8f..5a69e42fcd2 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java @@ -30,8 +30,6 @@ import java.util.Map; /** * TotalTermFreqValueSource returns the total term freq * (sum of term freqs across all documents). - * Returns -1 if frequencies were omitted for the field, or if - * the codec doesn't support this statistic. * @lucene.internal */ public class TotalTermFreqValueSource extends ValueSource { @@ -66,12 +64,8 @@ public class TotalTermFreqValueSource extends ValueSource { long totalTermFreq = 0; for (LeafReaderContext readerContext : searcher.getTopReaderContext().leaves()) { long val = readerContext.reader().totalTermFreq(new Term(indexedField, indexedBytes)); - if (val == -1) { - totalTermFreq = -1; - break; - } else { - totalTermFreq += val; - } + assert val != -1; + totalTermFreq += val; } final long ttf = totalTermFreq; context.put(this, new LongDocValues(this) { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java index 3db80fbe547..5df04c8ebb8 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/PayloadScoreQuery.java @@ -161,6 +161,11 @@ public class PayloadScoreQuery extends SpanQuery { return new PayloadSpanScorer(this, payloadSpans, docScorer); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return innerWeight.getCacheHelper(context); + } + @Override public void extractTerms(Set terms) { innerWeight.extractTerms(terms); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java index 29f3b4a951d..6175adddc16 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/payloads/SpanPayloadCheckQuery.java @@ -129,6 +129,11 @@ public class SpanPayloadCheckQuery extends SpanQuery { final Similarity.SimScorer docScorer = getSimScorer(context); return new SpanScorer(this, spans, docScorer); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return matchWeight.getCacheHelper(context); + } } private class PayloadChecker implements SpanCollector { diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java index 167bb4808e4..b33b258f562 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java +++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsReader.java @@ -137,7 +137,7 @@ public final class VersionBlockTreeTermsReader extends FieldsProducer { if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, in); } - if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings + if (sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, in); } final long indexStartFP = indexIn.readVLong(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesBoxQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesBoxQuery.java index 50ddf1a14d9..1bcea5379f6 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesBoxQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesBoxQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoUtils; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ConstantScoreScorer; @@ -139,6 +140,11 @@ final class LatLonDocValuesBoxQuery extends Query { }; return new ConstantScoreScorer(this, boost, iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java index e38d9feceeb..8a2d73c0215 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonDocValuesDistanceQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoUtils; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ConstantScoreScorer; @@ -126,6 +127,11 @@ final class LatLonDocValuesDistanceQuery extends Query { }; return new ConstantScoreScorer(this, boost, iterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java index b16efe3d34f..dd3be275379 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointDistanceQuery.java @@ -22,6 +22,7 @@ import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.GeoUtils; import org.apache.lucene.geo.Rectangle; import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; @@ -117,6 +118,11 @@ final class LatLonPointDistanceQuery extends Query { return scorerSupplier.get(Long.MAX_VALUE); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { LeafReader reader = context.reader(); diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java index c272b4d4c48..32a3f780b2a 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/document/LatLonPointInPolygonQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Arrays; import org.apache.lucene.geo.Rectangle; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.PointValues.IntersectVisitor; import org.apache.lucene.index.PointValues.Relation; import org.apache.lucene.search.ConstantScoreScorer; @@ -159,6 +160,11 @@ final class LatLonPointInPolygonQuery extends Query { return new ConstantScoreScorer(this, score(), result.build().iterator()); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java index 288e05b05bf..4a9918bcdd5 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/CoveringQuery.java @@ -175,6 +175,11 @@ public final class CoveringQuery extends Query { } return new CoveringScorer(this, scorers, minimumNumberMatch.getValues(context, null), context.reader().maxDoc()); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to LongValuesSource? + } } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java index 772570372f4..de3b2ad8a1e 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java @@ -26,6 +26,7 @@ import java.util.Set; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; @@ -121,6 +122,11 @@ public class DocValuesNumbersQuery extends Query { } }); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java index 6e30baed9cd..7f0d3e162dd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java @@ -203,6 +203,11 @@ public class DocValuesTermsQuery extends Query { }); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return getDocValuesCacheHelper(field, context); + } }; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java index 67d80272afb..346d519f7bd 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonQuery.java @@ -402,7 +402,12 @@ public class TermAutomatonQuery extends Query { return null; } } - + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { // TODO diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java index 5a1d50668bb..18661bf4700 100644 --- a/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java +++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestTermAutomatonQuery.java @@ -650,6 +650,11 @@ public class TestTermAutomatonQuery extends LuceneTestCase { } return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality())); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } }; } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java index 36a9eff3da8..6c1fa21c158 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/CompositeVerifyQuery.java @@ -96,6 +96,11 @@ public class CompositeVerifyQuery extends Query { final TwoPhaseIterator predFuncValues = predicateValueSource.iterator(context, indexQueryScorer.iterator()); return new ConstantScoreScorer(this, score(), predFuncValues); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to PredicateValueSource? + } }; } } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java index a6ea3a3586a..d13d603efc5 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/composite/IntersectsRPTVerifyQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.spatial.composite; import java.io.IOException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; @@ -133,6 +134,11 @@ public class IntersectsRPTVerifyQuery extends Query { return new ConstantScoreScorer(this, score(), twoPhaseIterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to PredicateValueSource? + } }; } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeQuery.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeQuery.java index dbe643e4b99..a496160c347 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeQuery.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/prefix/AbstractPrefixTreeQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.spatial.prefix; import java.io.IOException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; @@ -90,6 +91,11 @@ public abstract class AbstractPrefixTreeQuery extends Query { } return new ConstantScoreScorer(this, score(), disi); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java index c036d345846..4795a564cd5 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/serialized/SerializedDVStrategy.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; @@ -141,6 +142,11 @@ public class SerializedDVStrategy extends SpatialStrategy { TwoPhaseIterator it = predicateValueSource.iterator(context, approximation); return new ConstantScoreScorer(this, score(), it); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to PredicateValueSource + } }; } diff --git a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java index 868897d3630..a89694b5e3b 100644 --- a/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java +++ b/lucene/spatial-extras/src/java/org/apache/lucene/spatial/vector/PointVectorStrategy.java @@ -288,6 +288,11 @@ public class PointVectorStrategy extends SpatialStrategy { }; return new ConstantScoreScorer(this, score(), twoPhase); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; // TODO delegate to DoubleValuesSource? + } }; } diff --git a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java index 56cb45d92d4..534458ab07b 100644 --- a/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java +++ b/lucene/spatial3d/src/java/org/apache/lucene/spatial3d/PointInGeo3DShapeQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.spatial3d; import java.io.IOException; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.spatial3d.geom.BasePlanetObject; import org.apache.lucene.spatial3d.geom.GeoShape; import org.apache.lucene.spatial3d.geom.PlanetModel; @@ -103,6 +104,11 @@ final class PointInGeo3DShapeQuery extends Query { return new ConstantScoreScorer(this, score(), result.build().iterator()); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionWeight.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionWeight.java index d74e56f2ecf..78965039aa5 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionWeight.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionWeight.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest.document; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -132,6 +133,11 @@ public class CompletionWeight extends Weight { throw new UnsupportedOperationException(); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public void extractTerms(Set terms) { // no-op diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java index 42ddb0e1aa8..c87697bcc11 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java @@ -117,7 +117,7 @@ public class AssertingLeafReader extends FilterLeafReader { TermsEnum termsEnum = in.intersect(automaton, bytes); assert termsEnum != null; assert bytes == null || bytes.isValid(); - return new AssertingTermsEnum(termsEnum); + return new AssertingTermsEnum(termsEnum, hasFreqs()); } @Override @@ -134,11 +134,35 @@ public class AssertingLeafReader extends FilterLeafReader { return v; } + @Override + public int getDocCount() throws IOException { + final int docCount = in.getDocCount(); + assert docCount > 0; + return docCount; + } + + @Override + public long getSumDocFreq() throws IOException { + final long sumDf = in.getSumDocFreq(); + assert sumDf >= getDocCount(); + return sumDf; + } + + @Override + public long getSumTotalTermFreq() throws IOException { + final long sumTtf = in.getSumTotalTermFreq(); + if (hasFreqs() == false) { + assert sumTtf == in.getSumDocFreq(); + } + assert sumTtf >= getSumDocFreq(); + return sumTtf; + } + @Override public TermsEnum iterator() throws IOException { TermsEnum termsEnum = super.iterator(); assert termsEnum != null; - return new AssertingTermsEnum(termsEnum); + return new AssertingTermsEnum(termsEnum, hasFreqs()); } @Override @@ -154,10 +178,12 @@ public class AssertingLeafReader extends FilterLeafReader { private enum State {INITIAL, POSITIONED, UNPOSITIONED}; private State state = State.INITIAL; private final boolean delegateOverridesSeekExact; + private final boolean hasFreqs; - public AssertingTermsEnum(TermsEnum in) { + public AssertingTermsEnum(TermsEnum in, boolean hasFreqs) { super(in); delegateOverridesSeekExact = SEEK_EXACT.isOverriddenAsOf(in.getClass()); + this.hasFreqs = hasFreqs; } @Override @@ -210,14 +236,22 @@ public class AssertingLeafReader extends FilterLeafReader { public int docFreq() throws IOException { assertThread("Terms enums", creationThread); assert state == State.POSITIONED : "docFreq() called on unpositioned TermsEnum"; - return super.docFreq(); + final int df = super.docFreq(); + assert df > 0; + return df; } @Override public long totalTermFreq() throws IOException { assertThread("Terms enums", creationThread); assert state == State.POSITIONED : "totalTermFreq() called on unpositioned TermsEnum"; - return super.totalTermFreq(); + final long ttf = super.totalTermFreq(); + if (hasFreqs) { + assert ttf >= docFreq(); + } else { + assert ttf == docFreq(); + } + return ttf; } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java index a6b44b0a92f..918811f0db7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java @@ -281,18 +281,10 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } long nodeDocFreq = subStats.docFreq(); - if (docFreq >= 0 && nodeDocFreq >= 0) { - docFreq += nodeDocFreq; - } else { - docFreq = -1; - } + docFreq += nodeDocFreq; long nodeTotalTermFreq = subStats.totalTermFreq(); - if (totalTermFreq >= 0 && nodeTotalTermFreq >= 0) { - totalTermFreq += nodeTotalTermFreq; - } else { - totalTermFreq = -1; - } + totalTermFreq += nodeTotalTermFreq; } if (docFreq == 0) { @@ -325,25 +317,13 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { } long nodeDocCount = nodeStats.docCount(); - if (docCount >= 0 && nodeDocCount >= 0) { - docCount += nodeDocCount; - } else { - docCount = -1; - } + docCount += nodeDocCount; long nodeSumTotalTermFreq = nodeStats.sumTotalTermFreq(); - if (sumTotalTermFreq >= 0 && nodeSumTotalTermFreq >= 0) { - sumTotalTermFreq += nodeSumTotalTermFreq; - } else { - sumTotalTermFreq = -1; - } + sumTotalTermFreq += nodeSumTotalTermFreq; long nodeSumDocFreq = nodeStats.sumDocFreq(); - if (sumDocFreq >= 0 && nodeSumDocFreq >= 0) { - sumDocFreq += nodeSumDocFreq; - } else { - sumDocFreq = -1; - } + sumDocFreq += nodeSumDocFreq; assert nodeStats.maxDoc() >= 0; maxDoc += nodeStats.maxDoc(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java index d93594d884c..a0f2ece9a43 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java @@ -200,7 +200,6 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { // yuge collection maxDoc = TestUtil.nextLong(random, 1, MAXDOC_FORTESTING); } - // TODO: make this a mandatory statistic, or test it with -1 final long docCount; if (random.nextBoolean()) { // sparse field @@ -216,7 +215,6 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { } catch (ArithmeticException overflow) { upperBound = MAXTOKENS_FORTESTING; } - // TODO: make this a mandatory statistic, or test it with -1 final long sumDocFreq; if (random.nextBoolean()) { // shortest possible docs @@ -228,8 +226,8 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { final long sumTotalTermFreq; switch (random.nextInt(3)) { case 0: - // unsupported (e.g. omitTF) - sumTotalTermFreq = -1; + // term frequencies were omitted + sumTotalTermFreq = sumDocFreq; break; case 1: // no repetition of terms (except to satisfy this norm) @@ -259,9 +257,9 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { docFreq = TestUtil.nextLong(random, 1, corpus.docCount()); } final long totalTermFreq; - if (corpus.sumTotalTermFreq() == -1) { + if (corpus.sumTotalTermFreq() == corpus.sumDocFreq()) { // omitTF - totalTermFreq = -1; + totalTermFreq = docFreq; } else if (random.nextBoolean()) { // no repetition totalTermFreq = docFreq; @@ -307,7 +305,7 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { for (int l = 0; l < 10; l++) { TermStatistics term = newTerm(random, corpus); final float freq; - if (term.totalTermFreq() == -1) { + if (term.totalTermFreq() == term.docFreq()) { // omit TF freq = 1; } else if (term.docFreq() == 1) { @@ -427,14 +425,8 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase { } // check score(term-1), given the same freq/norm it should be >= score(term) [scores non-decreasing as terms get rarer] - if (term.docFreq() > 1 && (term.totalTermFreq() == -1 || freq < term.totalTermFreq())) { - final long prevTotalTermFreq; - if (term.totalTermFreq() == -1) { - prevTotalTermFreq = -1; - } else { - prevTotalTermFreq = term.totalTermFreq() - 1; - } - TermStatistics prevTerm = new TermStatistics(term.term(), term.docFreq() - 1, prevTotalTermFreq); + if (term.docFreq() > 1 && freq < term.totalTermFreq()) { + TermStatistics prevTerm = new TermStatistics(term.term(), term.docFreq() - 1, term.totalTermFreq() - 1); SimWeight prevWeight = similarity.computeWeight(boost, corpus, term); SimScorer prevTermScorer = similarity.simScorer(prevWeight, NORM_VALUES.get(norm).getContext()); float prevTermScore = prevTermScorer.score(0, freq); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java index a0f4f7bf9a2..be79d60eb1a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/spans/AssertingSpanWeight.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Map; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; @@ -72,6 +73,11 @@ public class AssertingSpanWeight extends SpanWeight { return in.scorer(context); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return in.getCacheHelper(context); + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { return in.explain(context, doc); diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java index 3a87c1e24c4..ecdd6111b67 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/LuceneTestCase.java @@ -2025,15 +2025,9 @@ public abstract class LuceneTestCase extends Assert { * checks collection-level statistics on Terms */ public void assertTermsStatisticsEquals(String info, Terms leftTerms, Terms rightTerms) throws IOException { - if (leftTerms.getDocCount() != -1 && rightTerms.getDocCount() != -1) { - assertEquals(info, leftTerms.getDocCount(), rightTerms.getDocCount()); - } - if (leftTerms.getSumDocFreq() != -1 && rightTerms.getSumDocFreq() != -1) { - assertEquals(info, leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq()); - } - if (leftTerms.getSumTotalTermFreq() != -1 && rightTerms.getSumTotalTermFreq() != -1) { - assertEquals(info, leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq()); - } + assertEquals(info, leftTerms.getDocCount(), rightTerms.getDocCount()); + assertEquals(info, leftTerms.getSumDocFreq(), rightTerms.getSumDocFreq()); + assertEquals(info, leftTerms.getSumTotalTermFreq(), rightTerms.getSumTotalTermFreq()); if (leftTerms.size() != -1 && rightTerms.size() != -1) { assertEquals(info, leftTerms.size(), rightTerms.size()); } @@ -2312,9 +2306,7 @@ public abstract class LuceneTestCase extends Assert { */ public void assertTermStatsEquals(String info, TermsEnum leftTermsEnum, TermsEnum rightTermsEnum) throws IOException { assertEquals(info, leftTermsEnum.docFreq(), rightTermsEnum.docFreq()); - if (leftTermsEnum.totalTermFreq() != -1 && rightTermsEnum.totalTermFreq() != -1) { - assertEquals(info, leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq()); - } + assertEquals(info, leftTermsEnum.totalTermFreq(), rightTermsEnum.totalTermFreq()); } /** diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index e889594383a..343bd5b09ad 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -64,6 +64,9 @@ New Features * SOLR-11438: Solr should return rf when min_rf is specified for deletes as well as adds (Erick Erickson) +* SOLR-11003: Support bi-directional syncing of cdcr clusters. We still only support active indexing in one cluster, + but have the ability to switch indexing clusters and cdcr will replicate correctly. (Amrit Sarkar, Varun Thacker) + Bug Fixes ---------------------- @@ -98,6 +101,8 @@ Bug Fixes * SOLR-11586: Restored collection should use stateFormat=2 instead of 1. (Varun Thacker) +* SOLR-11503: Collections created with legacyCloud=true cannot be opened if legacyCloud=false (Erick Erickson) + Optimizations ---------------------- * SOLR-11285: Refactor autoscaling framework to avoid direct references to Zookeeper and Solr @@ -132,6 +137,15 @@ Other Changes * SOLR-11380: SolrJ must stream docs to server instead of writing to a buffer first (noble) +* SOLR-11603: Remove unused (public) LTRScoringModel.hasParams() method. (Christine Poerschke) + +* SOLR-11606: Disable tests automatically if Mockito does not work with Java runtime (Java 10). + (Uwe Schindler) + +* SOLR-11618: Tone down verbosity of BackupManager logging (Varun Thacker) + +* SOLR-11621: Fix spurious failures of TriggerIntegrationTest due to timing issues. (shalin) + ================== 7.1.0 ================== Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java index e9908f983af..e44e7f934b2 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestJdbcDataSource.java @@ -40,6 +40,7 @@ import org.apache.solr.handler.dataimport.JdbcDataSource.ResultSetIterator; import static org.mockito.Mockito.*; import org.junit.After; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -68,6 +69,11 @@ public class TestJdbcDataSource extends AbstractDataImportHandlerTestCase { String sysProp = System.getProperty("java.naming.factory.initial"); + @BeforeClass + public static void beforeClass() { + assumeWorkingMockito(); + } + @Override @Before public void setUp() throws Exception { diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java index 6ecbb6f66fc..b1a32331bbe 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java @@ -31,6 +31,7 @@ import java.util.concurrent.FutureTask; import java.util.concurrent.RunnableFuture; import java.util.concurrent.Semaphore; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.DisiPriorityQueue; @@ -479,6 +480,11 @@ public class LTRScoringQuery extends Query { } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + public class ModelScorer extends Scorer { final private DocInfo docInfo; final private Scorer featureTraversalScorer; diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java index 48e89423ca1..36a8ef5242a 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java @@ -21,6 +21,7 @@ import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSetIterator; @@ -228,6 +229,11 @@ public abstract class Feature extends Query { public abstract FeatureScorer scorer(LeafReaderContext context) throws IOException; + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/ValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/ValueFeature.java index 61aa9e5fb7d..f423ad9124a 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/ValueFeature.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/ValueFeature.java @@ -140,9 +140,6 @@ public class ValueFeature extends Feature { } - - - } } diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LTRScoringModel.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LTRScoringModel.java index 2b4f575be7e..7eb3436be95 100644 --- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LTRScoringModel.java +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/model/LTRScoringModel.java @@ -229,10 +229,6 @@ public abstract class LTRScoringModel { return true; } - public boolean hasParams() { - return !((params == null) || params.isEmpty()); - } - public Collection getAllFeatures() { return allFeatures; } diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java index e5801e0fa1c..cc4a5905b2c 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java @@ -1643,9 +1643,19 @@ public class ZkController { private void checkStateInZk(CoreDescriptor cd) throws InterruptedException { if (!Overseer.isLegacy(zkStateReader)) { CloudDescriptor cloudDesc = cd.getCloudDescriptor(); - String coreNodeName = cloudDesc.getCoreNodeName(); - if (coreNodeName == null) - throw new SolrException(ErrorCode.SERVER_ERROR, "No coreNodeName for " + cd); + String nodeName = cloudDesc.getCoreNodeName(); + if (nodeName == null) { + if (cc.repairCoreProperty(cd, CoreDescriptor.CORE_NODE_NAME) == false) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No coreNodeName for " + cd); + } + nodeName = cloudDesc.getCoreNodeName(); + // verify that the repair worked. + if (nodeName == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No coreNodeName for " + cd); + } + } + final String coreNodeName = nodeName; + if (cloudDesc.getShardId() == null) { throw new SolrException(ErrorCode.SERVER_ERROR, "No shard id for " + cd); } diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java index fee74750cad..3a7c7502723 100644 --- a/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java +++ b/solr/core/src/java/org/apache/solr/cloud/ZkDistributedQueue.java @@ -312,7 +312,7 @@ public class ZkDistributedQueue implements DistributedQueue { } // Allow this client to push up to 1% of the remaining queue capacity without rechecking. - offerPermits.set(maxQueueSize - stat.getNumChildren() / 100); + offerPermits.set(remainingCapacity / 100); } } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java index 98f4927d579..e61536bd5f0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java @@ -153,6 +153,8 @@ public class AutoScaling { return new NodeAddedTrigger(name, props, loader, dataProvider); case NODELOST: return new NodeLostTrigger(name, props, loader, dataProvider); + case SEARCHRATE: + return new SearchRateTrigger(name, props, loader, dataProvider); default: throw new IllegalArgumentException("Unknown event type: " + type + " in trigger: " + name); } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java index 908a9610d76..163183eda1d 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java @@ -28,6 +28,7 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.NoSuchElementException; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; @@ -365,6 +366,8 @@ public class ScheduledTriggers implements Closeable { if (stateManager.hasData(statePath)) { stateManager.removeData(statePath, -1); } + } catch (NoSuchElementException e) { + // already removed by someone else } catch (Exception e) { log.warn("Failed to remove state for removed trigger " + statePath, e); } @@ -378,6 +381,8 @@ public class ScheduledTriggers implements Closeable { ops.add(Op.delete(eventsPath, -1)); stateManager.multi(ops); } + } catch (NoSuchElementException e) { + // already removed by someone else } catch (Exception e) { log.warn("Failed to remove events for removed trigger " + eventsPath, e); } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java index a4dffa38aa3..0c6ffd49913 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/SearchRateTrigger.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import com.google.common.util.concurrent.AtomicDouble; @@ -72,7 +73,7 @@ public class SearchRateTrigger extends TriggerBase { collection = (String)properties.getOrDefault(AutoScalingParams.COLLECTION, Policy.ANY); shard = (String)properties.getOrDefault(AutoScalingParams.SHARD, Policy.ANY); if (collection.equals(Policy.ANY) && !shard.equals(Policy.ANY)) { - throw new IllegalArgumentException("When 'shard' is other than #ANY collection name must be also other than #ANY"); + throw new IllegalArgumentException("When 'shard' is other than #ANY then collection name must be also other than #ANY"); } node = (String)properties.getOrDefault(AutoScalingParams.NODE, Policy.ANY); handler = (String)properties.getOrDefault(AutoScalingParams.HANDLER, "/select"); @@ -231,7 +232,36 @@ public class SearchRateTrigger extends TriggerBase { // generate event - if (processor.process(new SearchRateEvent(getName(), now, hotNodes, hotCollections, hotShards, hotReplicas))) { + // find the earliest time when a condition was exceeded + final AtomicLong eventTime = new AtomicLong(now); + hotCollections.forEach((c, r) -> { + long time = lastCollectionEvent.get(c); + if (eventTime.get() > time) { + eventTime.set(time); + } + }); + hotShards.forEach((c, shards) -> { + shards.forEach((s, r) -> { + long time = lastShardEvent.get(c + "." + s); + if (eventTime.get() > time) { + eventTime.set(time); + } + }); + }); + hotReplicas.forEach(r -> { + long time = lastReplicaEvent.get(r.getCollection() + "." + r.getCore()); + if (eventTime.get() > time) { + eventTime.set(time); + } + }); + hotNodes.forEach((n, r) -> { + long time = lastNodeEvent.get(n); + if (eventTime.get() > time) { + eventTime.set(time); + } + }); + + if (processor.process(new SearchRateEvent(getName(), eventTime.get(), hotNodes, hotCollections, hotShards, hotReplicas))) { // update lastEvent times hotNodes.keySet().forEach(node -> lastNodeEvent.put(node, now)); hotCollections.keySet().forEach(coll -> lastCollectionEvent.put(coll, now)); @@ -244,7 +274,7 @@ public class SearchRateTrigger extends TriggerBase { private boolean waitForElapsed(String name, long now, Map lastEventMap) { Long lastTime = lastEventMap.computeIfAbsent(name, s -> now); long elapsed = TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS); - log.info("name=" + name + ", lastTime=" + lastTime + ", elapsed=" + elapsed); + log.debug("name=" + name + ", lastTime=" + lastTime + ", elapsed=" + elapsed); if (TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS) < getWaitForSecond()) { return false; } diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index 759e9a9f76f..5ed72d752d1 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -950,6 +950,7 @@ public class CoreContainer { try { solrCores.waitAddPendingCoreOps(cd.getName()); core = createFromDescriptor(cd, true, newCollection); + coresLocator.persist(this, cd); // Write out the current core properties in case anything changed when the core was created } finally { solrCores.removeFromPendingOps(cd.getName()); } @@ -1657,7 +1658,45 @@ public class CoreContainer { public TransientSolrCoreCache getTransientCache() { return solrCores.getTransientCacheHandler(); } - + + + /** + * + * @param cd CoreDescriptor, presumably a deficient one + * @param prop The property that needs to be repaired. + * @return true if we were able to successfuly perisist the repaired coreDescriptor, false otherwise. + * + * See SOLR-11503, This can be removed when there's no chance we'll need to upgrade a + * Solr isntallation createged with legacyCloud=true from 6.6.1 through 7.1 + */ + public boolean repairCoreProperty(CoreDescriptor cd, String prop) { + // So far, coreNodeName is the only property that we need to repair, this may get more complex as other properties + // are added. + + if (CoreDescriptor.CORE_NODE_NAME.equals(prop) == false) { + throw new SolrException(ErrorCode.SERVER_ERROR, + String.format(Locale.ROOT,"The only supported property for repair is currently [%s]", + CoreDescriptor.CORE_NODE_NAME)); + } + + // Try to read the coreNodeName from the cluster state. + + String coreName = cd.getName(); + DocCollection coll = getZkController().getZkStateReader().getClusterState().getCollection(cd.getCollectionName()); + for (Replica rep : coll.getReplicas()) { + if (coreName.equals(rep.getCoreName())) { + log.warn("Core properties file for node {} found with no coreNodeName, attempting to repair with value {}. See SOLR-11503. " + + "This message should only appear if upgrading from collections created Solr 6.6.1 through 7.1.", + rep.getCoreName(), rep.getName()); + cd.getCloudDescriptor().setCoreNodeName(rep.getName()); + coresLocator.persist(this, cd); + return true; + } + } + log.error("Could not repair coreNodeName in core.properties file for core {}", coreName); + return false; + } + } class CloserThread extends Thread { diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java index 726e5b9799c..d2094d6ac52 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java +++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java @@ -201,7 +201,7 @@ public class BackupManager { for (String file : files) { List children = zkClient.getChildren(zkPath + "/" + file, null, true); if (children.size() == 0) { - log.info("Writing file {}", file); + log.debug("Writing file {}", file); byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true); try (OutputStream os = repository.createOutput(repository.resolve(dir, file))) { os.write(data); diff --git a/solr/core/src/java/org/apache/solr/handler/CdcrReplicator.java b/solr/core/src/java/org/apache/solr/handler/CdcrReplicator.java index a151a43061a..5dca0d846bf 100644 --- a/solr/core/src/java/org/apache/solr/handler/CdcrReplicator.java +++ b/solr/core/src/java/org/apache/solr/handler/CdcrReplicator.java @@ -77,6 +77,10 @@ public class CdcrReplicator implements Runnable { Object o = subReader.next(); if (o == null) break; // we have reached the end of the update logs, we should close the batch + if (isTargetCluster(o)) { + continue; + } + if (isDelete(o)) { /* @@ -140,6 +144,30 @@ public class CdcrReplicator implements Runnable { state.resetConsecutiveErrors(); } + /** check whether the update read from TLog is received from source + * or received via solr client + */ + private boolean isTargetCluster(Object o) { + List entry = (List) o; + int operationAndFlags = (Integer) entry.get(0); + int oper = operationAndFlags & UpdateLog.OPERATION_MASK; + Boolean isTarget = false; + if (oper == UpdateLog.DELETE_BY_QUERY || oper == UpdateLog.DELETE) { + if (entry.size() == 4) { //back-combat - skip for previous versions + isTarget = (Boolean) entry.get(entry.size() - 1); + } + } else if (oper == UpdateLog.UPDATE_INPLACE) { + if (entry.size() == 6) { //back-combat - skip for previous versions + isTarget = (Boolean) entry.get(entry.size() - 2); + } + } else if (oper == UpdateLog.ADD) { + if (entry.size() == 4) { //back-combat - skip for previous versions + isTarget = (Boolean) entry.get(entry.size() - 2); + } + } + return isTarget; + } + private boolean isDelete(Object o) { List entry = (List) o; int operationAndFlags = (Integer) entry.get(0); diff --git a/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java b/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java index 8345b3c52f2..38b118722aa 100644 --- a/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java @@ -39,6 +39,8 @@ import java.util.concurrent.locks.ReentrantLock; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; +import org.apache.solr.api.Api; +import org.apache.solr.api.ApiBag; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrResponse; @@ -54,7 +56,7 @@ import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; +import org.apache.solr.common.util.CommandOperation; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.StrUtils; @@ -72,21 +74,18 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.SchemaManager; import org.apache.solr.security.AuthorizationContext; import org.apache.solr.security.PermissionNameProvider; -import org.apache.solr.common.util.CommandOperation; import org.apache.solr.util.DefaultSolrThreadFactory; import org.apache.solr.util.RTimer; import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.plugin.SolrCoreAware; -import org.apache.solr.api.Api; -import org.apache.solr.api.ApiBag; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.google.common.base.Strings.isNullOrEmpty; import static java.util.Collections.singletonList; -import static org.apache.solr.common.util.Utils.makeMap; import static org.apache.solr.common.params.CoreAdminParams.NAME; import static org.apache.solr.common.util.StrUtils.formatString; +import static org.apache.solr.common.util.Utils.makeMap; import static org.apache.solr.core.ConfigOverlay.NOT_EDITABLE; import static org.apache.solr.core.ConfigOverlay.ZNODEVER; import static org.apache.solr.core.ConfigSetProperties.IMMUTABLE_CONFIGSET_ARG; @@ -879,10 +878,6 @@ public class SolrConfigHandler extends RequestHandlerBase implements SolrCoreAwa return true; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } @Override protected SolrResponse createResponse(SolrClient client) { diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index f6b9686604c..f8f5deac0c9 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -217,7 +217,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("hist", HistogramEvaluator.class) .withFunctionName("length", LengthEvaluator.class) .withFunctionName("movingAvg", MovingAverageEvaluator.class) - .withFunctionName("normalize", NormalizeEvaluator.class) + .withFunctionName("standardize", NormalizeEvaluator.class) .withFunctionName("percentile", PercentileEvaluator.class) .withFunctionName("predict", PredictEvaluator.class) .withFunctionName("rank", RankEvaluator.class) @@ -270,9 +270,18 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("loess", LoessEvaluator.class) .withFunctionName("matrix", MatrixEvaluator.class) .withFunctionName("transpose", TransposeEvaluator.class) - .withFunctionName("unit", UnitEvaluator.class) + .withFunctionName("unitize", UnitEvaluator.class) .withFunctionName("triangularDistribution", TriangularDistributionEvaluator.class) .withFunctionName("precision", PrecisionEvaluator.class) + .withFunctionName("minMaxScale", MinMaxScaleEvaluator.class) + .withFunctionName("markovChain", MarkovChainEvaluator.class) + .withFunctionName("grandSum", GrandSumEvaluator.class) + .withFunctionName("scalarAdd", ScalarAddEvaluator.class) + .withFunctionName("scalarSubtract", ScalarSubtractEvaluator.class) + .withFunctionName("scalarMultiply", ScalarMultiplyEvaluator.class) + .withFunctionName("scalarDivide", ScalarDivideEvaluator.class) + .withFunctionName("sumRows", SumRowsEvaluator.class) + .withFunctionName("sumColumns", SumColumnsEvaluator.class) // Boolean Stream Evaluators diff --git a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java index 78c01f8f03b..a8c0f20b7d2 100644 --- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java @@ -486,6 +486,11 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro return scorer(weightOrBitSet.set); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } } } diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index 6898369b962..b8f595752b6 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -332,6 +332,11 @@ class SpatialDistanceQuery extends ExtendedQueryBase implements PostFilter { return new SpatialScorer(context, this, score()); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { return ((SpatialScorer)scorer(context)).explain(super.explain(context, doc), doc); diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java index c141e2680df..04b26063ced 100644 --- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchema.java @@ -15,6 +15,7 @@ * limitations under the License. */ package org.apache.solr.schema; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -61,7 +62,6 @@ import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrConfig; @@ -360,10 +360,6 @@ public final class ManagedIndexSchema extends IndexSchema { return remoteVersion; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } @Override protected SolrResponse createResponse(SolrClient client) { diff --git a/solr/core/src/java/org/apache/solr/search/Filter.java b/solr/core/src/java/org/apache/solr/search/Filter.java index e37d573af4d..555e24e323c 100644 --- a/solr/core/src/java/org/apache/solr/search/Filter.java +++ b/solr/core/src/java/org/apache/solr/search/Filter.java @@ -19,6 +19,7 @@ package org.apache.solr.search; import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreScorer; @@ -135,6 +136,11 @@ public abstract class Filter extends Query { return new ConstantScoreScorer(this, 0f, iterator); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getReaderCacheHelper(); + } + }; } } diff --git a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java index 4656afe0928..2abc930c02a 100644 --- a/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/GraphTermsQParserPlugin.java @@ -330,6 +330,11 @@ public class GraphTermsQParserPlugin extends QParserPlugin { return scorer(weightOrBitSet.set); } } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } @@ -624,6 +629,11 @@ abstract class PointSetQuery extends Query implements DocSetProducer { } return new ConstantScoreScorer(this, score(), readerSetIterator); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java index 7afdb5ffccc..e60cfb10623 100644 --- a/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java @@ -282,6 +282,11 @@ class JoinQuery extends Query { return new ConstantScoreScorer(this, score(), readerSetIterator); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return null; + } + // most of these statistics are only used for the enum method int fromSetSize; // number of docs in the fromSet (that match the from query) diff --git a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java index 15d15f63e51..d8481f7b6cc 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java +++ b/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.Map; import java.util.Objects; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.ConstantScoreScorer; @@ -104,6 +105,11 @@ public class SolrConstantScoreQuery extends Query implements ExtendedQuery { return new ConstantScoreScorer(this, score(), iterator); } + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + } @Override diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java index f63a6644205..d56f026f67b 100644 --- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java +++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java @@ -23,6 +23,7 @@ import java.util.Objects; import java.util.Set; import java.util.TreeSet; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; @@ -275,7 +276,12 @@ public class GraphQuery extends Query { // create a scrorer on the result set, if results from right query are empty, use empty iterator. return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), 1); } - + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } + @Override public void extractTerms(Set terms) { // NoOp for now , not used.. / supported diff --git a/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java b/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java index e57f768cfdc..1ef75397e29 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java +++ b/solr/core/src/java/org/apache/solr/search/stats/CollectionStats.java @@ -50,27 +50,11 @@ public class CollectionStats { this.sumDocFreq = stats.sumDocFreq(); } - /* - * If any stats being added uses -1 then reset the total stats to -1 - * as that parameter becomes unknowable. - */ public void add(CollectionStats stats) { this.maxDoc += stats.maxDoc; - if (this.docCount < 0 || stats.docCount < 0) { - this.docCount = -1; - } else { - this.docCount += stats.docCount; - } - if (this.sumTotalTermFreq < 0 || stats.sumTotalTermFreq < 0) { - this.sumTotalTermFreq = -1; - } else { - this.sumTotalTermFreq += stats.sumTotalTermFreq; - } - if (this.sumDocFreq < 0 || stats.sumDocFreq < 0) { - this.sumDocFreq = -1; - } else { - this.sumDocFreq += stats.sumDocFreq; - } + this.docCount += stats.docCount; + this.sumTotalTermFreq += stats.sumTotalTermFreq; + this.sumDocFreq += stats.sumDocFreq; } public CollectionStatistics toCollectionStatistics() { diff --git a/solr/core/src/java/org/apache/solr/search/stats/TermStats.java b/solr/core/src/java/org/apache/solr/search/stats/TermStats.java index 62b311e6756..9977b285d56 100644 --- a/solr/core/src/java/org/apache/solr/search/stats/TermStats.java +++ b/solr/core/src/java/org/apache/solr/search/stats/TermStats.java @@ -55,20 +55,9 @@ public class TermStats { this.totalTermFreq = stats.totalTermFreq(); } - /* - * If any of the stats is -1 then reset total stats to -1. - */ public void add(TermStats stats) { - if (this.docFreq < 0 || stats.docFreq < 0) { - this.docFreq = -1; - } else { - this.docFreq += stats.docFreq; - } - if (this.totalTermFreq < 0 || stats.totalTermFreq < 0) { - this.totalTermFreq = -1; - } else { - this.totalTermFreq += stats.totalTermFreq; - } + this.docFreq += stats.docFreq; + this.totalTermFreq += stats.totalTermFreq; } public TermStatistics toTermStatistics() { diff --git a/solr/core/src/java/org/apache/solr/update/CdcrTransactionLog.java b/solr/core/src/java/org/apache/solr/update/CdcrTransactionLog.java index ce992000c8e..f959e14dc17 100644 --- a/solr/core/src/java/org/apache/solr/update/CdcrTransactionLog.java +++ b/solr/core/src/java/org/apache/solr/update/CdcrTransactionLog.java @@ -24,10 +24,13 @@ import java.nio.channels.Channels; import java.nio.file.Files; import java.util.Collection; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.util.FastOutputStream; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.common.util.ObjectReleaseTracker; +import org.apache.solr.update.processor.CdcrUpdateProcessor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -104,6 +107,147 @@ public class CdcrTransactionLog extends TransactionLog { return 0; } + @Override + public long write(AddUpdateCommand cmd, long prevPointer, int flags) { + assert (-1 <= prevPointer && (cmd.isInPlaceUpdate() || (-1 == prevPointer))); + + LogCodec codec = new LogCodec(resolver); + SolrInputDocument sdoc = cmd.getSolrInputDocument(); + + try { + checkWriteHeader(codec, sdoc); + + // adaptive buffer sizing + int bufSize = lastAddSize; // unsynchronized access of lastAddSize should be fine + bufSize = Math.min(1024*1024, bufSize+(bufSize>>3)+256); + + MemOutputStream out = new MemOutputStream(new byte[bufSize]); + codec.init(out); + if (cmd.isInPlaceUpdate()) { + codec.writeTag(JavaBinCodec.ARR, 6); + codec.writeInt(UpdateLog.UPDATE_INPLACE | flags); // should just take one byte + codec.writeLong(cmd.getVersion()); + codec.writeLong(prevPointer); + codec.writeLong(cmd.prevVersion); + if (cmd.getReq().getParamString().contains(CdcrUpdateProcessor.CDCR_UPDATE)) { + // if the update is received via cdcr source; add boolean entry + // CdcrReplicator.isTargetCluster() checks that particular boolean to accept or discard the update + // to forward to its own target cluster + codec.writePrimitive(true); + } else { + codec.writePrimitive(false); + } + codec.writeSolrInputDocument(cmd.getSolrInputDocument()); + + } else { + codec.writeTag(JavaBinCodec.ARR, 4); + codec.writeInt(UpdateLog.ADD | flags); // should just take one byte + codec.writeLong(cmd.getVersion()); + if (cmd.getReq().getParamString().contains(CdcrUpdateProcessor.CDCR_UPDATE)) { + // if the update is received via cdcr source; add extra boolean entry + // CdcrReplicator.isTargetCluster() checks that particular boolean to accept or discard the update + // to forward to its own target cluster + codec.writePrimitive(true); + } else { + codec.writePrimitive(false); + } + codec.writeSolrInputDocument(cmd.getSolrInputDocument()); + } + lastAddSize = (int)out.size(); + + synchronized (this) { + long pos = fos.size(); // if we had flushed, this should be equal to channel.position() + assert pos != 0; + + /*** + System.out.println("###writing at " + pos + " fos.size()=" + fos.size() + " raf.length()=" + raf.length()); + if (pos != fos.size()) { + throw new RuntimeException("ERROR" + "###writing at " + pos + " fos.size()=" + fos.size() + " raf.length()=" + raf.length()); + } + ***/ + + out.writeAll(fos); + endRecord(pos); + // fos.flushBuffer(); // flush later + return pos; + } + + } catch (IOException e) { + // TODO: reset our file pointer back to "pos", the start of this record. + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error logging add", e); + } + } + + @Override + public long writeDelete(DeleteUpdateCommand cmd, int flags) { + LogCodec codec = new LogCodec(resolver); + + try { + checkWriteHeader(codec, null); + + BytesRef br = cmd.getIndexedId(); + + MemOutputStream out = new MemOutputStream(new byte[20 + br.length]); + codec.init(out); + codec.writeTag(JavaBinCodec.ARR, 4); + codec.writeInt(UpdateLog.DELETE | flags); // should just take one byte + codec.writeLong(cmd.getVersion()); + codec.writeByteArray(br.bytes, br.offset, br.length); + if (cmd.getReq().getParamString().contains(CdcrUpdateProcessor.CDCR_UPDATE)) { + // if the update is received via cdcr source; add extra boolean entry + // CdcrReplicator.isTargetCluster() checks that particular boolean to accept or discard the update + // to forward to its own target cluster + codec.writePrimitive(true); + } else { + codec.writePrimitive(false); + } + + synchronized (this) { + long pos = fos.size(); // if we had flushed, this should be equal to channel.position() + assert pos != 0; + out.writeAll(fos); + endRecord(pos); + // fos.flushBuffer(); // flush later + return pos; + } + + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } + } + + @Override + public long writeDeleteByQuery(DeleteUpdateCommand cmd, int flags) { + LogCodec codec = new LogCodec(resolver); + try { + checkWriteHeader(codec, null); + + MemOutputStream out = new MemOutputStream(new byte[20 + (cmd.query.length())]); + codec.init(out); + codec.writeTag(JavaBinCodec.ARR, 4); + codec.writeInt(UpdateLog.DELETE_BY_QUERY | flags); // should just take one byte + codec.writeLong(cmd.getVersion()); + codec.writeStr(cmd.query); + if (cmd.getReq().getParamString().contains(CdcrUpdateProcessor.CDCR_UPDATE)) { + // if the update is received via cdcr source; add extra boolean entry + // CdcrReplicator.isTargetCluster() checks that particular boolean to accept or discard the update + // to forward to its own target cluster + codec.writePrimitive(true); + } else { + codec.writePrimitive(false); + } + synchronized (this) { + long pos = fos.size(); // if we had flushed, this should be equal to channel.position() + out.writeAll(fos); + endRecord(pos); + // fos.flushBuffer(); // flush later + return pos; + } + } catch (IOException e) { + throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); + } + } + @Override public long writeCommit(CommitUpdateCommand cmd, int flags) { LogCodec codec = new LogCodec(resolver); diff --git a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java index 87241617db7..469840ba0e8 100644 --- a/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java +++ b/solr/core/src/java/org/apache/solr/update/DeleteByQueryWrapper.java @@ -83,6 +83,11 @@ final class DeleteByQueryWrapper extends Query { public Scorer scorer(LeafReaderContext context) throws IOException { return inner.scorer(privateContext.getIndexReader().leaves().get(0)); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return inner.getCacheHelper(context); + } }; } diff --git a/solr/core/src/java/org/apache/solr/update/TransactionLog.java b/solr/core/src/java/org/apache/solr/update/TransactionLog.java index 73328cf8536..7daf44fda08 100644 --- a/solr/core/src/java/org/apache/solr/update/TransactionLog.java +++ b/solr/core/src/java/org/apache/solr/update/TransactionLog.java @@ -326,7 +326,7 @@ public class TransactionLog implements Closeable { numRecords++; } - private void checkWriteHeader(LogCodec codec, SolrInputDocument optional) throws IOException { + protected void checkWriteHeader(LogCodec codec, SolrInputDocument optional) throws IOException { // Unsynchronized access. We can get away with an unsynchronized access here // since we will never get a false non-zero when the position is in fact 0. diff --git a/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/managed-schema b/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/managed-schema new file mode 100644 index 00000000000..2df6c0a4445 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/managed-schema @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/solrconfig.xml new file mode 100644 index 00000000000..da548c4c4ec --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cdcr-cluster1/conf/solrconfig.xml @@ -0,0 +1,80 @@ + + + + + + + + + ${solr.data.dir:} + + + + ${tests.luceneMatchVersion:LATEST} + + + + + + + + + ${cdcr.cluster2.zkHost} + cdcr-cluster1 + cdcr-cluster2 + + + 1 + 1000 + 1000 + + + 1000 + + + + + + ${solr.ulog.dir:} + + + + + + + + _text_ + + + + + + cdcr-processor-chain + + + \ No newline at end of file diff --git a/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/managed-schema b/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/managed-schema new file mode 100644 index 00000000000..2df6c0a4445 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/managed-schema @@ -0,0 +1,29 @@ + + + + + + + + + + + + + id + diff --git a/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/solrconfig.xml b/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/solrconfig.xml new file mode 100644 index 00000000000..8e26d4507c5 --- /dev/null +++ b/solr/core/src/test-files/solr/configsets/cdcr-cluster2/conf/solrconfig.xml @@ -0,0 +1,80 @@ + + + + + + + + + ${solr.data.dir:} + + + + ${tests.luceneMatchVersion:LATEST} + + + + + + + + + ${cdcr.cluster1.zkHost} + cdcr-cluster2 + cdcr-cluster1 + + + 1 + 1000 + 1000 + + + 1000 + + + + + + ${solr.ulog.dir:} + + + + + + + + _text_ + + + + + + cdcr-processor-chain + + + \ No newline at end of file diff --git a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java index f7a49ace265..447b8303967 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestReversedWildcardFilterFactory.java @@ -53,6 +53,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { + assumeWorkingMockito(); initCore("solrconfig.xml","schema-reversed.xml"); } diff --git a/solr/core/src/test/org/apache/solr/client/solrj/embedded/TestEmbeddedSolrServerAdminHandler.java b/solr/core/src/test/org/apache/solr/client/solrj/embedded/TestEmbeddedSolrServerAdminHandler.java index 84ecedfa7d5..ca3634d51f2 100644 --- a/solr/core/src/test/org/apache/solr/client/solrj/embedded/TestEmbeddedSolrServerAdminHandler.java +++ b/solr/core/src/test/org/apache/solr/client/solrj/embedded/TestEmbeddedSolrServerAdminHandler.java @@ -19,17 +19,14 @@ package org.apache.solr.client.solrj.embedded; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.Collection; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.NodeConfig; import org.apache.solr.core.SolrResourceLoader; @@ -64,11 +61,6 @@ public class TestEmbeddedSolrServerAdminHandler extends SolrTestCaseJ4 { return new ModifiableSolrParams(); } - @Override - public Collection getContentStreams() throws IOException { - return null; - } - @Override protected QueryResponse createResponse(final SolrClient client) { return new QueryResponse(); diff --git a/solr/core/src/test/org/apache/solr/cloud/AssignTest.java b/solr/core/src/test/org/apache/solr/cloud/AssignTest.java index d0273cbcd84..21c001c4e6e 100644 --- a/solr/core/src/test/org/apache/solr/cloud/AssignTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/AssignTest.java @@ -62,6 +62,8 @@ public class AssignTest extends SolrTestCaseJ4 { @Test public void testAssignNode() throws Exception { + assumeWorkingMockito(); + SolrZkClient zkClient = mock(SolrZkClient.class); Map zkClientData = new HashMap<>(); when(zkClient.setData(anyString(), any(), anyInt(), anyBoolean())).then(invocation -> { diff --git a/solr/core/src/test/org/apache/solr/cloud/LegacyCloudClusterPropTest.java b/solr/core/src/test/org/apache/solr/cloud/LegacyCloudClusterPropTest.java new file mode 100644 index 00000000000..72e8e6d2b56 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/LegacyCloudClusterPropTest.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Properties; + +import org.apache.solr.client.solrj.embedded.JettySolrRunner; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.common.cloud.ClusterProperties; +import org.apache.solr.common.cloud.ClusterStateUtil; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.core.CorePropertiesLocator; +import org.junit.BeforeClass; +import org.junit.Test; + + +public class LegacyCloudClusterPropTest extends SolrCloudTestCase { + + @BeforeClass + public static void setupCluster() throws Exception { + + // currently this test is fine with a single shard with a single replica and it's simpler. Could easily be + // extended to multiple shards/replicas, but there's no particular need. + configureCluster(1) + .addConfig("conf", configset("cloud-minimal")) + .configure(); + } + + + // Are all these required? + private static String[] requiredProps = { + "numShards", + "collection.configName", + "name", + "replicaType", + "shard", + "collection", + "coreNodeName" + }; + + @Test + public void testCreateCollectionSwitchLegacyCloud() throws Exception { + createAndTest("legacyTrue", true); + createAndTest("legacyFalse", false); + } + + private void createAndTest(final String coll, final boolean legacy) throws Exception { + + // First, just insure that core.properties file gets created with coreNodeName and all other mandatory parameters. + final String legacyString = Boolean.toString(legacy); + final String legacyAnti = Boolean.toString(!legacy); + CollectionAdminRequest.setClusterProperty(ZkStateReader.LEGACY_CLOUD, legacyString).process(cluster.getSolrClient()); + ClusterProperties props = new ClusterProperties(zkClient()); + + assertEquals("Value of legacyCloud cluster prop unexpected", legacyString, + props.getClusterProperty(ZkStateReader.LEGACY_CLOUD, legacyAnti)); + + CollectionAdminRequest.createCollection(coll, "conf", 1, 1) + .setMaxShardsPerNode(1) + .process(cluster.getSolrClient()); + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 120000)); + + // Insure all mandatory properties are there. + checkMandatoryProps(coll); + + checkCollectionActive(coll); + // The fixes for SOLR-11503 insure that creating a collection has coreNodeName whether legacyCloud is true or false, + // we still need to test repairing a properties file that does _not_ have coreNodeName set, the second part of + // the fix. + + // First, remove the coreNodeName from cluster.properties and write it out it. + removePropertyFromAllReplicas(coll, "coreNodeName"); + + // Now restart Solr, this should repair the removal on core load no matter the value of legacyCloud + JettySolrRunner jetty = cluster.getJettySolrRunner(0); + jetty.stop(); + jetty.start(); + checkMandatoryProps(coll); + checkCollectionActive(coll); + } + + private void checkCollectionActive(String coll) { + assertTrue(ClusterStateUtil.waitForAllActiveAndLiveReplicas(cluster.getSolrClient().getZkStateReader(), 120000)); + DocCollection docColl = getCollectionState(coll); + for (Replica rep : docColl.getReplicas()) { + if (rep.getState() == Replica.State.ACTIVE) return; + } + fail("Replica was not active for collection " + coll); + } + private void removePropertyFromAllReplicas(String coll, String propDel) throws IOException { + DocCollection docColl = getCollectionState(coll); + + // First remove the property from all core.properties files + for (Replica rep : docColl.getReplicas()) { + final String coreName = rep.getCoreName(); + Properties prop = loadPropFileForReplica(coreName); + prop.remove(propDel); + JettySolrRunner jetty = cluster.getJettySolrRunner(0); + Path expected = Paths.get(jetty.getSolrHome()).toAbsolutePath().resolve(coreName); + Path corePropFile = Paths.get(expected.toString(), CorePropertiesLocator.PROPERTIES_FILENAME); + + try (Writer os = new OutputStreamWriter(Files.newOutputStream(corePropFile), StandardCharsets.UTF_8)) { + prop.store(os, ""); + } + } + + // Now insure it's really gone + for (Replica rep : docColl.getReplicas()) { + Properties prop = loadPropFileForReplica(rep.getCoreName()); + assertEquals("Property " + propDel + " should have been deleted", + "bogus", prop.getProperty(propDel, "bogus")); + } + } + + private Properties loadPropFileForReplica(String coreName) throws IOException { + JettySolrRunner jetty = cluster.getJettySolrRunner(0); + Path expected = Paths.get(jetty.getSolrHome()).toAbsolutePath().resolve(coreName); + Path corePropFile = Paths.get(expected.toString(), CorePropertiesLocator.PROPERTIES_FILENAME); + Properties props = new Properties(); + try (InputStream fis = Files.newInputStream(corePropFile)) { + props.load(new InputStreamReader(fis, StandardCharsets.UTF_8)); + } + return props; + } + + private void checkMandatoryProps(String coll) throws IOException { + DocCollection docColl = getCollectionState(coll); + for (Replica rep : docColl.getReplicas()) { + Properties prop = loadPropFileForReplica(rep.getCoreName()); for (String testProp : requiredProps) { + String propVal = prop.getProperty(testProp, "bogus"); + if ("bogus".equals(propVal)) { + fail("Should have found property " + testProp + " in properties file"); + } + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java index 7abd8d3167c..b6e7415c9ed 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java @@ -122,6 +122,8 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 { @BeforeClass public static void setUpOnce() throws Exception { + assumeWorkingMockito(); + workQueueMock = mock(OverseerTaskQueue.class); runningMapMock = mock(DistributedMap.class); completedMapMock = mock(DistributedMap.class); diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java index 527a28b3613..7259d389a2a 100644 --- a/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java @@ -238,6 +238,7 @@ public class OverseerTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { + assumeWorkingMockito(); initCore(); } diff --git a/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java b/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java index 10877d91dd8..fe637d919b7 100644 --- a/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java +++ b/solr/core/src/test/org/apache/solr/cloud/TestCloudSearcherWarming.java @@ -17,10 +17,7 @@ package org.apache.solr.cloud; -import java.io.IOException; import java.lang.invoke.MethodHandles; -import java.util.Collection; -import java.util.Collections; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -32,6 +29,7 @@ import org.apache.solr.client.solrj.SolrResponse; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.RequestWriter; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.SolrResponseBase; import org.apache.solr.common.SolrInputDocument; @@ -40,8 +38,6 @@ import org.apache.solr.common.cloud.CollectionStateWatcher; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.core.SolrEventListener; @@ -56,6 +52,8 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.common.params.CommonParams.JSON_MIME; + /** * Tests related to SOLR-6086 */ @@ -344,9 +342,14 @@ public class TestCloudSearcherWarming extends SolrCloudTestCase { return null; } - @Override + /* @Override public Collection getContentStreams() throws IOException { return message != null ? Collections.singletonList(new ContentStreamBase.StringStream(message)) : null; + }*/ + + @Override + public RequestWriter.ContentWriter getContentWriter(String expectedType) { + return message == null? null: new RequestWriter.StringPayloadContentWriter(message, JSON_MIME); } @Override diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java index ca01220f695..1974182bc88 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/AutoScalingHandlerTest.java @@ -17,10 +17,7 @@ package org.apache.solr.cloud.autoscaling; -import java.io.IOException; import java.lang.invoke.MethodHandles; -import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -34,13 +31,13 @@ import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.RequestWriter; +import org.apache.solr.client.solrj.request.RequestWriter.StringPayloadContentWriter; import org.apache.solr.client.solrj.request.V2Request; import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; -import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Utils; import org.apache.solr.util.TimeOut; @@ -52,6 +49,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH; +import static org.apache.solr.common.params.CommonParams.JSON_MIME; import static org.apache.solr.common.util.Utils.getObjectByPath; /** @@ -828,8 +826,8 @@ public class AutoScalingHandlerTest extends SolrCloudTestCase { } @Override - public Collection getContentStreams() throws IOException { - return message != null ? Collections.singletonList(new ContentStreamBase.StringStream(message)) : null; + public RequestWriter.ContentWriter getContentWriter(String expectedType) { + return message == null ? null : new StringPayloadContentWriter(message, JSON_MIME); } @Override diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java index ef49879c06f..e64f588070c 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java @@ -32,8 +32,10 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; +import com.google.common.util.concurrent.AtomicDouble; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig; +import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo; import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventProcessorStage; import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; @@ -45,6 +47,8 @@ import org.apache.solr.cloud.SolrCloudTestCase; import org.apache.solr.common.cloud.LiveNodesListener; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.Utils; import org.apache.solr.util.LogLevel; @@ -84,6 +88,8 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { // use the same time source as triggers use private static final TimeSource timeSource = TimeSource.CURRENT_TIME; + private static final long WAIT_FOR_DELTA_NANOS = TimeUnit.MILLISECONDS.toNanos(5); + @BeforeClass public static void setupCluster() throws Exception { configureCluster(2) @@ -119,6 +125,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { actionInterrupted = new CountDownLatch(1); actionCompleted = new CountDownLatch(1); events.clear(); + listenerEvents.clear(); // clear any persisted auto scaling configuration Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true); log.info(SOLR_AUTOSCALING_CONF_PATH + " reset, new znode version {}", stat.getVersion()); @@ -582,12 +589,15 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { try { if (triggerFired.compareAndSet(false, true)) { events.add(event); - if (TimeUnit.MILLISECONDS.convert(timeSource.getTime() - event.getEventTime(), TimeUnit.NANOSECONDS) <= TimeUnit.MILLISECONDS.convert(waitForSeconds, TimeUnit.SECONDS)) { - fail("NodeAddedListener was fired before the configured waitFor period"); + long currentTimeNanos = timeSource.getTime(); + long eventTimeNanos = event.getEventTime(); + long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS; + if (currentTimeNanos - eventTimeNanos <= waitForNanos) { + fail(event.source + " was fired before the configured waitFor period"); } getTriggerFiredLatch().countDown(); } else { - fail("NodeAddedTrigger was fired more than once!"); + fail(event.source + " was fired more than once!"); } } catch (Throwable t) { log.debug("--throwable", t); @@ -1209,4 +1219,103 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { // must be larger than cooldown period assertTrue("timestamp delta is less than default cooldown period", ev.timestamp - prevTimestamp > TimeUnit.MILLISECONDS.toNanos(ScheduledTriggers.DEFAULT_COOLDOWN_PERIOD_MS)); } + + public static class TestSearchRateAction extends TriggerActionBase { + + @Override + public void process(TriggerEvent event, ActionContext context) throws Exception { + try { + events.add(event); + long currentTimeNanos = timeSource.getTime(); + long eventTimeNanos = event.getEventTime(); + long waitForNanos = TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS) - WAIT_FOR_DELTA_NANOS; + if (currentTimeNanos - eventTimeNanos <= waitForNanos) { + fail(event.source + " was fired before the configured waitFor period"); + } + getTriggerFiredLatch().countDown(); + } catch (Throwable t) { + log.debug("--throwable", t); + throw t; + } + } + } + + @Test + public void testSearchRate() throws Exception { + CloudSolrClient solrClient = cluster.getSolrClient(); + String COLL1 = "collection1"; + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(COLL1, + "conf", 1, 2); + create.process(solrClient); + String setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'search_rate_trigger'," + + "'event' : 'searchRate'," + + "'waitFor' : '" + waitForSeconds + "s'," + + "'enabled' : true," + + "'rate' : 1.0," + + "'actions' : [" + + "{'name':'test','class':'" + TestSearchRateAction.class.getName() + "'}" + + "]" + + "}}"; + SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand); + NamedList response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + String setListenerCommand1 = "{" + + "'set-listener' : " + + "{" + + "'name' : 'srt'," + + "'trigger' : 'search_rate_trigger'," + + "'stage' : ['FAILED','SUCCEEDED']," + + "'class' : '" + TestTriggerListener.class.getName() + "'" + + "}" + + "}"; + req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + SolrParams query = params(CommonParams.Q, "*:*"); + for (int i = 0; i < 500; i++) { + solrClient.query(COLL1, query); + } + boolean await = triggerFiredLatch.await(20, TimeUnit.SECONDS); + assertTrue("The trigger did not fire at all", await); + // wait for listener to capture the SUCCEEDED stage + Thread.sleep(2000); + assertEquals(listenerEvents.toString(), 1, listenerEvents.get("srt").size()); + TestEvent ev = listenerEvents.get("srt").get(0); + long now = timeSource.getTime(); + // verify waitFor + assertTrue(TimeUnit.SECONDS.convert(waitForSeconds, TimeUnit.NANOSECONDS) - WAIT_FOR_DELTA_NANOS <= now - ev.event.getEventTime()); + Map nodeRates = (Map)ev.event.getProperties().get("node"); + assertNotNull("nodeRates", nodeRates); + assertTrue(nodeRates.toString(), nodeRates.size() > 0); + AtomicDouble totalNodeRate = new AtomicDouble(); + nodeRates.forEach((n, r) -> totalNodeRate.addAndGet(r)); + List replicaRates = (List)ev.event.getProperties().get("replica"); + assertNotNull("replicaRates", replicaRates); + assertTrue(replicaRates.toString(), replicaRates.size() > 0); + AtomicDouble totalReplicaRate = new AtomicDouble(); + replicaRates.forEach(r -> { + assertTrue(r.toString(), r.getVariable("rate") != null); + totalReplicaRate.addAndGet((Double)r.getVariable("rate")); + }); + Map shardRates = (Map)ev.event.getProperties().get("shard"); + assertNotNull("shardRates", shardRates); + assertEquals(shardRates.toString(), 1, shardRates.size()); + shardRates = (Map)shardRates.get(COLL1); + assertNotNull("shardRates", shardRates); + assertEquals(shardRates.toString(), 1, shardRates.size()); + AtomicDouble totalShardRate = new AtomicDouble(); + shardRates.forEach((s, r) -> totalShardRate.addAndGet((Double)r)); + Map collectionRates = (Map)ev.event.getProperties().get("collection"); + assertNotNull("collectionRates", collectionRates); + assertEquals(collectionRates.toString(), 1, collectionRates.size()); + Double collectionRate = collectionRates.get(COLL1); + assertNotNull(collectionRate); + assertTrue(collectionRate > 5.0); + assertEquals(collectionRate, totalNodeRate.get(), 5.0); + assertEquals(collectionRate, totalShardRate.get(), 5.0); + assertEquals(collectionRate, totalReplicaRate.get(), 5.0); + } } diff --git a/solr/core/src/test/org/apache/solr/cloud/BaseCdcrDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/BaseCdcrDistributedZkTest.java similarity index 99% rename from solr/core/src/test/org/apache/solr/cloud/BaseCdcrDistributedZkTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/BaseCdcrDistributedZkTest.java index fc563aabde5..e3a00a894ea 100644 --- a/solr/core/src/test/org/apache/solr/cloud/BaseCdcrDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/BaseCdcrDistributedZkTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.cdcr; import java.io.File; import java.io.IOException; @@ -40,6 +40,10 @@ import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.QueryRequest; import org.apache.solr.client.solrj.response.CollectionAdminResponse; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.AbstractZkTestCase; +import org.apache.solr.cloud.ChaosMonkey; +import org.apache.solr.cloud.OverseerCollectionMessageHandler; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java new file mode 100644 index 00000000000..70cb5eb0217 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBidirectionalTest.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.cdcr; + +import java.lang.invoke.MethodHandles; +import java.util.concurrent.TimeUnit; + +import com.carrotsearch.ant.tasks.junit4.dependencies.com.google.common.collect.ImmutableMap; +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.request.AbstractUpdateRequest; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.UpdateRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.MiniSolrCloudCluster; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.handler.CdcrParams; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CdcrBidirectionalTest extends SolrTestCaseJ4 { + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + @Test + public void testBiDir() throws Exception { + MiniSolrCloudCluster cluster2 = new MiniSolrCloudCluster(1, createTempDir("cdcr-cluster2"), buildJettyConfig("/solr")); + cluster2.waitForAllNodes(30); + MiniSolrCloudCluster cluster1 = new MiniSolrCloudCluster(1, createTempDir("cdcr-cluster1"), buildJettyConfig("/solr")); + cluster1.waitForAllNodes(30); + try { + log.info("cluster2 zkHost = " + cluster2.getZkServer().getZkAddress()); + System.setProperty("cdcr.cluster2.zkHost", cluster2.getZkServer().getZkAddress()); + + log.info("cluster1 zkHost = " + cluster1.getZkServer().getZkAddress()); + System.setProperty("cdcr.cluster1.zkHost", cluster1.getZkServer().getZkAddress()); + + + cluster1.uploadConfigSet(configset("cdcr-cluster1"), "cdcr-cluster1"); + CollectionAdminRequest.createCollection("cdcr-cluster1", "cdcr-cluster1", 2, 1) + .withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory") + .setMaxShardsPerNode(2) + .process(cluster1.getSolrClient()); + CloudSolrClient cluster1SolrClient = cluster1.getSolrClient(); + cluster1SolrClient.setDefaultCollection("cdcr-cluster1"); + + cluster2.uploadConfigSet(configset("cdcr-cluster2"), "cdcr-cluster2"); + CollectionAdminRequest.createCollection("cdcr-cluster2", "cdcr-cluster2", 2, 1) + .withProperty("solr.directoryFactory", "solr.StandardDirectoryFactory") + .setMaxShardsPerNode(2) + .process(cluster2.getSolrClient()); + CloudSolrClient cluster2SolrClient = cluster2.getSolrClient(); + cluster2SolrClient.setDefaultCollection("cdcr-cluster2"); + + UpdateRequest req = null; + + CdcrTestsUtil.cdcrStart(cluster1SolrClient); + Thread.sleep(2000); + + // ADD operation on cluster 1 + int docs = (TEST_NIGHTLY ? 100 : 10); + int numDocs_c1 = 0; + for (int k = 0; k < docs; k++) { + req = new UpdateRequest(); + for (; numDocs_c1 < (k + 1) * 100; numDocs_c1++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "cluster1_" + numDocs_c1); + doc.addField("xyz", numDocs_c1); + req.add(doc); + } + req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); + log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs_c1); + req.process(cluster1SolrClient); + } + + QueryResponse response = cluster1SolrClient.query(new SolrQuery("*:*")); + assertEquals("cluster 1 docs mismatch", numDocs_c1, response.getResults().getNumFound()); + + assertEquals("cluster 2 docs mismatch", numDocs_c1, CdcrTestsUtil.waitForClusterToSync(numDocs_c1, cluster2SolrClient)); + + CdcrTestsUtil.cdcrStart(cluster2SolrClient); // FULL BI-DIRECTIONAL CDCR FORWARDING ON + Thread.sleep(2000); + + // ADD operation on cluster 2 + int numDocs_c2 = 0; + for (int k = 0; k < docs; k++) { + req = new UpdateRequest(); + for (; numDocs_c2 < (k + 1) * 100; numDocs_c2++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "cluster2_" + numDocs_c2); + doc.addField("xyz", numDocs_c2); + req.add(doc); + } + req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); + log.info("Adding " + docs + " docs with commit=true, numDocs=" + numDocs_c2); + req.process(cluster2SolrClient); + } + + int numDocs = numDocs_c1 + numDocs_c2; + + response = cluster2SolrClient.query(new SolrQuery("*:*")); + assertEquals("cluster 2 docs mismatch", numDocs, response.getResults().getNumFound()); + + assertEquals("cluster 1 docs mismatch", numDocs, CdcrTestsUtil.waitForClusterToSync(numDocs, cluster1SolrClient)); + + // logging cdcr clusters queue response + response = CdcrTestsUtil.getCdcrQueue(cluster1SolrClient); + log.info("Cdcr cluster1 queue response: " + response.getResponse()); + response = CdcrTestsUtil.getCdcrQueue(cluster2SolrClient); + log.info("Cdcr cluster2 queue response: " + response.getResponse()); + + // lets find and keep the maximum version assigned by cluster1 & cluster2 across all our updates + + long maxVersion_c1 = Math.min((long)CdcrTestsUtil.getFingerPrintMaxVersion(cluster1SolrClient, "shard1", numDocs), + (long)CdcrTestsUtil.getFingerPrintMaxVersion(cluster1SolrClient, "shard2", numDocs)); + long maxVersion_c2 = Math.min((long)CdcrTestsUtil.getFingerPrintMaxVersion(cluster2SolrClient, "shard1", numDocs), + (long)CdcrTestsUtil.getFingerPrintMaxVersion(cluster2SolrClient, "shard2", numDocs)); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString()); + params.set(CommonParams.QT, "/cdcr"); + response = cluster2SolrClient.query(params); + Long checkpoint_2 = (Long) response.getResponse().get(CdcrParams.CHECKPOINT); + assertNotNull(checkpoint_2); + + params = new ModifiableSolrParams(); + params.set(CommonParams.ACTION, CdcrParams.CdcrAction.COLLECTIONCHECKPOINT.toString()); + params.set(CommonParams.QT, "/cdcr"); + response = cluster1SolrClient.query(params); + Long checkpoint_1 = (Long) response.getResponse().get(CdcrParams.CHECKPOINT); + assertNotNull(checkpoint_1); + + log.info("v1: " + maxVersion_c1 + "\t" + "v2: " + maxVersion_c2 + "\t" + + "checkpoint1: " + checkpoint_1 + "\t" + "checkpoint2: " + checkpoint_2); + + assertEquals("COLLECTIONCHECKPOINT from cluster2 should have returned the maximum " + + "version across all updates made to cluster1", maxVersion_c1, checkpoint_2.longValue()); + assertEquals("COLLECTIONCHECKPOINT from cluster1 should have returned the maximum " + + "version across all updates made to cluster2", maxVersion_c2, checkpoint_1.longValue()); + assertEquals("max versions of updates in both clusters should be same", maxVersion_c1, maxVersion_c2); + + // DELETE BY QUERY + String deleteByQuery = "id:cluster1_" +String.valueOf(random().nextInt(numDocs_c1)); + response = cluster1SolrClient.query(new SolrQuery(deleteByQuery)); + assertEquals("should match exactly one doc", 1, response.getResults().getNumFound()); + cluster1SolrClient.deleteByQuery(deleteByQuery); + cluster1SolrClient.commit(); + numDocs--; + numDocs_c1--; + + response = cluster1SolrClient.query(new SolrQuery("*:*")); + assertEquals("cluster 1 docs mismatch", numDocs, response.getResults().getNumFound()); + assertEquals("cluster 2 docs mismatch", numDocs, CdcrTestsUtil.waitForClusterToSync(numDocs, cluster2SolrClient)); + + // DELETE BY ID + SolrInputDocument doc; + String delete_id_query = "cluster2_" + random().nextInt(numDocs_c2); + cluster2SolrClient.deleteById(delete_id_query); + cluster2SolrClient.commit(); + numDocs--; + numDocs_c2--; + response = cluster2SolrClient.query(new SolrQuery("*:*")); + assertEquals("cluster 2 docs mismatch", numDocs, response.getResults().getNumFound()); + assertEquals("cluster 1 docs mismatch", numDocs, CdcrTestsUtil.waitForClusterToSync(numDocs, cluster1SolrClient)); + + // ATOMIC UPDATES + req = new UpdateRequest(); + doc = new SolrInputDocument(); + ImmutableMap.of("", ""); + String atomicUpdateId = "cluster2_" + random().nextInt(numDocs_c2); + doc.addField("id", atomicUpdateId); + doc.addField("xyz", ImmutableMap.of("delete", "")); + doc.addField("abc", ImmutableMap.of("set", "ABC")); + req.add(doc); + req.process(cluster2SolrClient); + cluster2SolrClient.commit(); + + String atomicQuery = "id:" + atomicUpdateId; + response = cluster2SolrClient.query(new SolrQuery(atomicQuery)); + assertEquals("cluster 2 wrong doc", "ABC", response.getResults().get(0).get("abc")); + assertEquals("cluster 1 wrong doc", "ABC", getDocFieldValue(cluster1SolrClient, atomicQuery, "ABC")); + + + // logging cdcr clusters queue response + response = CdcrTestsUtil.getCdcrQueue(cluster1SolrClient); + log.info("Cdcr cluster1 queue response at end of testcase: " + response.getResponse()); + response = CdcrTestsUtil.getCdcrQueue(cluster2SolrClient); + log.info("Cdcr cluster2 queue response at end of testcase: " + response.getResponse()); + + CdcrTestsUtil.cdcrStop(cluster1SolrClient); + CdcrTestsUtil.cdcrStop(cluster2SolrClient); + } finally { + if (cluster1 != null) { + cluster1.shutdown(); + } + if (cluster2 != null) { + cluster2.shutdown(); + } + } + } + + private String getDocFieldValue(CloudSolrClient clusterSolrClient, String query, String match) throws Exception { + long start = System.nanoTime(); + QueryResponse response = null; + while (System.nanoTime() - start <= TimeUnit.NANOSECONDS.convert(120, TimeUnit.SECONDS)) { + clusterSolrClient.commit(); + response = clusterSolrClient.query(new SolrQuery(query)); + if (match.equals(response.getResults().get(0).get("abc"))) { + break; + } + Thread.sleep(1000); + } + return response != null ? (String) response.getResults().get(0).get("abc") : ""; + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java similarity index 77% rename from solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java index 580c047d7f5..2cabb502daf 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrBootstrapTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrBootstrapTest.java @@ -15,12 +15,11 @@ * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.cdcr; import java.io.IOException; import java.lang.invoke.MethodHandles; -import java.util.List; -import java.util.concurrent.TimeUnit; +import java.util.LinkedHashMap; import org.apache.lucene.store.FSDirectory; import org.apache.solr.SolrTestCaseJ4; @@ -32,10 +31,11 @@ import org.apache.solr.client.solrj.request.AbstractUpdateRequest; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.AbstractDistribZkTestBase; +import org.apache.solr.cloud.MiniSolrCloudCluster; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.NamedList; import org.apache.solr.handler.CdcrParams; import org.junit.Test; import org.slf4j.Logger; @@ -89,11 +89,9 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { ModifiableSolrParams params = new ModifiableSolrParams(); params.set(CommonParams.QT, "/get"); params.set("getVersions", numDocs); + params.set("fingerprint", true); response = sourceSolrClient.query(params); - List versions = (List) response.getResponse().get("versions"); - for (Long version : versions) { - maxVersion = Math.max(maxVersion, version); - } + maxVersion = (long)(((LinkedHashMap)response.getResponse().get("fingerprint")).get("maxVersionEncountered")); // upload the cdcr-enabled config and restart source cluster source.uploadConfigSet(configset("cdcr-source"), "cdcr-source"); @@ -113,12 +111,12 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { targetSolrClient.setDefaultCollection("cdcr-target"); Thread.sleep(1000); - cdcrStart(targetSolrClient); - cdcrStart(sourceSolrClient); + CdcrTestsUtil.cdcrStart(targetSolrClient); + CdcrTestsUtil.cdcrStart(sourceSolrClient); - response = getCdcrQueue(sourceSolrClient); + response = CdcrTestsUtil.getCdcrQueue(sourceSolrClient); log.info("Cdcr queue response: " + response.getResponse()); - long foundDocs = waitForTargetToSync(numDocs, targetSolrClient); + long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient); assertEquals("Document mismatch on target after sync", numDocs, foundDocs); params = new ModifiableSolrParams(); @@ -189,12 +187,12 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { CloudSolrClient targetSolrClient = target.getSolrClient(); targetSolrClient.setDefaultCollection("cdcr-target"); - cdcrStart(targetSolrClient); - cdcrStart(sourceSolrClient); + CdcrTestsUtil.cdcrStart(targetSolrClient); + CdcrTestsUtil.cdcrStart(sourceSolrClient); - response = getCdcrQueue(sourceSolrClient); + response = CdcrTestsUtil.getCdcrQueue(sourceSolrClient); log.info("Cdcr queue response: " + response.getResponse()); - long foundDocs = waitForTargetToSync(numDocs, targetSolrClient); + long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient); assertEquals("Document mismatch on target after sync", numDocs, foundDocs); int total_tlogs_in_index = FSDirectory.open(target.getBaseDir().resolve("node1"). @@ -203,8 +201,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { assertEquals("tlogs count should be ZERO",0, total_tlogs_in_index); - cdcrStop(sourceSolrClient); - cdcrDisableBuffer(sourceSolrClient); + CdcrTestsUtil.cdcrStop(sourceSolrClient); + CdcrTestsUtil.cdcrDisableBuffer(sourceSolrClient); int c = 0; for (int k = 0; k < 10; k++) { @@ -223,10 +221,10 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { response = sourceSolrClient.query(new SolrQuery("*:*")); assertEquals("", numDocs, response.getResults().getNumFound()); - cdcrStart(sourceSolrClient); - cdcrEnableBuffer(sourceSolrClient); + CdcrTestsUtil.cdcrStart(sourceSolrClient); + CdcrTestsUtil.cdcrEnableBuffer(sourceSolrClient); - foundDocs = waitForTargetToSync(numDocs, targetSolrClient); + foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient); assertEquals("Document mismatch on target after sync", numDocs, foundDocs); } finally { @@ -269,8 +267,8 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { targetSolrClient.setDefaultCollection("cdcr-target"); Thread.sleep(1000); - cdcrStart(targetSolrClient); - cdcrStart(sourceSolrClient); + CdcrTestsUtil.cdcrStart(targetSolrClient); + CdcrTestsUtil.cdcrStart(sourceSolrClient); int c = 0; for (int k = 0; k < docs; k++) { UpdateRequest req = new UpdateRequest(); @@ -288,9 +286,9 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { response = sourceSolrClient.query(new SolrQuery("*:*")); assertEquals("", numDocs, response.getResults().getNumFound()); - response = getCdcrQueue(sourceSolrClient); + response = CdcrTestsUtil.getCdcrQueue(sourceSolrClient); log.info("Cdcr queue response: " + response.getResponse()); - long foundDocs = waitForTargetToSync(numDocs, targetSolrClient); + long foundDocs = CdcrTestsUtil.waitForClusterToSync(numDocs, targetSolrClient); assertEquals("Document mismatch on target after sync", numDocs, foundDocs); } finally { @@ -301,56 +299,4 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 { } } - private long waitForTargetToSync(int numDocs, CloudSolrClient targetSolrClient) throws SolrServerException, IOException, InterruptedException { - long start = System.nanoTime(); - QueryResponse response = null; - while (System.nanoTime() - start <= TimeUnit.NANOSECONDS.convert(120, TimeUnit.SECONDS)) { - try { - targetSolrClient.commit(); - response = targetSolrClient.query(new SolrQuery("*:*")); - if (response.getResults().getNumFound() == numDocs) { - break; - } - } catch (Exception e) { - log.warn("Exception trying to commit on target. This is expected and safe to ignore.", e); - } - Thread.sleep(1000); - } - return response != null ? response.getResults().getNumFound() : 0; - } - - - private void cdcrStart(CloudSolrClient client) throws SolrServerException, IOException { - QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.START); - assertEquals("started", ((NamedList) response.getResponse().get("status")).get("process")); - } - - private void cdcrStop(CloudSolrClient client) throws SolrServerException, IOException { - QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.STOP); - assertEquals("stopped", ((NamedList) response.getResponse().get("status")).get("process")); - } - - private void cdcrEnableBuffer(CloudSolrClient client) throws IOException, SolrServerException { - QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.ENABLEBUFFER); - assertEquals("enabled", ((NamedList) response.getResponse().get("status")).get("buffer")); - } - - private void cdcrDisableBuffer(CloudSolrClient client) throws IOException, SolrServerException { - QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.DISABLEBUFFER); - assertEquals("disabled", ((NamedList) response.getResponse().get("status")).get("buffer")); - } - - private QueryResponse invokeCdcrAction(CloudSolrClient client, CdcrParams.CdcrAction action) throws IOException, SolrServerException { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(CommonParams.QT, "/cdcr"); - params.set(CommonParams.ACTION, action.toLower()); - return client.query(params); - } - - private QueryResponse getCdcrQueue(CloudSolrClient client) throws SolrServerException, IOException { - ModifiableSolrParams params = new ModifiableSolrParams(); - params.set(CommonParams.QT, "/cdcr"); - params.set(CommonParams.ACTION, CdcrParams.QUEUES); - return client.query(params); - } } diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrReplicationDistributedZkTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationDistributedZkTest.java similarity index 99% rename from solr/core/src/test/org/apache/solr/cloud/CdcrReplicationDistributedZkTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationDistributedZkTest.java index 35592ffb8a7..206a08f807f 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrReplicationDistributedZkTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationDistributedZkTest.java @@ -14,7 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.cdcr; + +import java.io.File; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import org.apache.lucene.util.LuceneTestCase.BadApple; import org.apache.lucene.util.LuceneTestCase.Nightly; @@ -25,12 +31,6 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.File; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - @Nightly @BadApple(bugUrl = "https://issues.apache.org/jira/browse/SOLR-10107") public class CdcrReplicationDistributedZkTest extends BaseCdcrDistributedZkTest { diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrReplicationHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationHandlerTest.java similarity index 99% rename from solr/core/src/test/org/apache/solr/cloud/CdcrReplicationHandlerTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationHandlerTest.java index 1663949973c..65826c4c211 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrReplicationHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrReplicationHandlerTest.java @@ -14,18 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; - -import org.apache.lucene.util.LuceneTestCase.Nightly; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.CloudSolrClient; -import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.util.DefaultSolrThreadFactory; -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +package org.apache.solr.cloud.cdcr; import java.io.File; import java.io.IOException; @@ -39,6 +28,18 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.lucene.util.LuceneTestCase.Nightly; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.cloud.ChaosMonkey; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.util.DefaultSolrThreadFactory; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * This class is testing the cdcr extension to the {@link org.apache.solr.handler.ReplicationHandler} and * {@link org.apache.solr.handler.IndexFetcher}. diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java similarity index 99% rename from solr/core/src/test/org/apache/solr/cloud/CdcrRequestHandlerTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java index c74079cc62a..237cc5805fd 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrRequestHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrRequestHandlerTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.cdcr; import org.apache.lucene.util.LuceneTestCase.Nightly; import org.apache.solr.common.util.NamedList; diff --git a/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java new file mode 100644 index 00000000000..99aa47196e0 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrTestsUtil.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.cdcr; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.LinkedHashMap; +import java.util.concurrent.TimeUnit; + +import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.handler.CdcrParams; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CdcrTestsUtil extends SolrTestCaseJ4{ + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + protected static void cdcrStart(CloudSolrClient client) throws SolrServerException, IOException { + QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.START); + assertEquals("started", ((NamedList) response.getResponse().get("status")).get("process")); + } + + protected static void cdcrStop(CloudSolrClient client) throws SolrServerException, IOException { + QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.STOP); + assertEquals("stopped", ((NamedList) response.getResponse().get("status")).get("process")); + } + + protected static void cdcrEnableBuffer(CloudSolrClient client) throws IOException, SolrServerException { + QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.ENABLEBUFFER); + assertEquals("enabled", ((NamedList) response.getResponse().get("status")).get("buffer")); + } + + protected static void cdcrDisableBuffer(CloudSolrClient client) throws IOException, SolrServerException { + QueryResponse response = invokeCdcrAction(client, CdcrParams.CdcrAction.DISABLEBUFFER); + assertEquals("disabled", ((NamedList) response.getResponse().get("status")).get("buffer")); + } + + protected static QueryResponse invokeCdcrAction(CloudSolrClient client, CdcrParams.CdcrAction action) throws IOException, SolrServerException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(CommonParams.QT, "/cdcr"); + params.set(CommonParams.ACTION, action.toLower()); + return client.query(params); + } + + protected static QueryResponse getCdcrQueue(CloudSolrClient client) throws SolrServerException, IOException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(CommonParams.QT, "/cdcr"); + params.set(CommonParams.ACTION, CdcrParams.QUEUES); + return client.query(params); + } + + protected static Object getFingerPrintMaxVersion(CloudSolrClient client, String shardNames, int numDocs) throws SolrServerException, IOException, InterruptedException { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set(CommonParams.QT, "/get"); + params.set("fingerprint", true); + params.set("shards", shardNames); + params.set("getVersions", numDocs); + + QueryResponse response = null; + long start = System.nanoTime(); + while (System.nanoTime() - start <= TimeUnit.NANOSECONDS.convert(20, TimeUnit.SECONDS)) { + response = client.query(params); + if (response.getResponse() != null && response.getResponse().get("fingerprint") != null) { + return (long)((LinkedHashMap)response.getResponse().get("fingerprint")).get("maxVersionEncountered"); + } + Thread.sleep(200); + } + log.error("maxVersionEncountered not found for client : " + client + "in 20 attempts"); + return null; + } + + protected static long waitForClusterToSync(int numDocs, CloudSolrClient clusterSolrClient) throws Exception { + return waitForClusterToSync(numDocs, clusterSolrClient, "*:*"); + } + + protected static long waitForClusterToSync(int numDocs, CloudSolrClient clusterSolrClient, String query) throws Exception { + long start = System.nanoTime(); + QueryResponse response = null; + while (System.nanoTime() - start <= TimeUnit.NANOSECONDS.convert(120, TimeUnit.SECONDS)) { + clusterSolrClient.commit(); + response = clusterSolrClient.query(new SolrQuery(query)); + if (response.getResults().getNumFound() == numDocs) { + break; + } + Thread.sleep(1000); + } + return response != null ? response.getResults().getNumFound() : 0; + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/CdcrVersionReplicationTest.java b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrVersionReplicationTest.java similarity index 99% rename from solr/core/src/test/org/apache/solr/cloud/CdcrVersionReplicationTest.java rename to solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrVersionReplicationTest.java index 59d3818b3d5..ff9afe2c6c4 100644 --- a/solr/core/src/test/org/apache/solr/cloud/CdcrVersionReplicationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/cdcr/CdcrVersionReplicationTest.java @@ -14,7 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.solr.cloud; +package org.apache.solr.cloud.cdcr; + +import java.lang.invoke.MethodHandles; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrServerException; @@ -30,11 +35,6 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.lang.invoke.MethodHandles; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - public class CdcrVersionReplicationTest extends BaseCdcrDistributedZkTest { diff --git a/solr/core/src/test/org/apache/solr/cloud/overseer/TestClusterStateMutator.java b/solr/core/src/test/org/apache/solr/cloud/overseer/TestClusterStateMutator.java index 311e14df060..672c78707ab 100644 --- a/solr/core/src/test/org/apache/solr/cloud/overseer/TestClusterStateMutator.java +++ b/solr/core/src/test/org/apache/solr/cloud/overseer/TestClusterStateMutator.java @@ -27,10 +27,17 @@ import org.apache.solr.common.cloud.ImplicitDocRouter; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.util.Utils; +import org.junit.BeforeClass; import static org.mockito.Mockito.*; public class TestClusterStateMutator extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() { + assumeWorkingMockito(); + } + public void testCreateCollection() throws Exception { ClusterState clusterState = new ClusterState(-1, Collections.emptySet(), Collections.emptyMap()); DistribStateManager mockStateManager = mock(DistribStateManager.class); diff --git a/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java b/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java index 709555f330b..67410ac22a3 100644 --- a/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/rule/ImplicitSnitchTest.java @@ -23,6 +23,7 @@ import java.util.Map; import com.google.common.collect.Sets; import org.apache.lucene.util.LuceneTestCase; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.rule.ImplicitSnitch; @@ -134,6 +135,8 @@ public class ImplicitSnitchTest extends LuceneTestCase { @Test public void testGetTags_withAllHostNameRequestedTags_returns_all_Tags() throws Exception { + SolrTestCaseJ4.assumeWorkingMockito(); + String node = "serv01.dc01.london.uk.apache.org:8983_solr"; SnitchContext context = new ServerSnitchContext(null, node, new HashMap<>(),null); @@ -153,6 +156,8 @@ public class ImplicitSnitchTest extends LuceneTestCase { @Test public void testGetTags_withHostNameRequestedTag_ip3_returns_1_tag() throws Exception { + SolrTestCaseJ4.assumeWorkingMockito(); + String node = "serv01.dc01.london.uk.apache.org:8983_solr"; SnitchContext context = new ServerSnitchContext(null, node, new HashMap<>(),null); @@ -168,6 +173,8 @@ public class ImplicitSnitchTest extends LuceneTestCase { @Test public void testGetTags_withHostNameRequestedTag_ip99999_returns_nothing() throws Exception { + SolrTestCaseJ4.assumeWorkingMockito(); + String node = "serv01.dc01.london.uk.apache.org:8983_solr"; SnitchContext context = new ServerSnitchContext(null, node, new HashMap<>(),null); diff --git a/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java b/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java index b6f0d097dfd..7b4dc06b16b 100644 --- a/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java +++ b/solr/core/src/test/org/apache/solr/core/BlobRepositoryMockingTest.java @@ -25,8 +25,10 @@ import java.nio.charset.Charset; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.io.IOUtils; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import static org.mockito.Mockito.*; @@ -53,6 +55,11 @@ public class BlobRepositoryMockingTest { ByteBuffer blobData = ByteBuffer.wrap(BLOBSTR.getBytes(UTF8)); boolean blobFetched = false; String blobKey = ""; + + @BeforeClass + public static void beforeClass() { + SolrTestCaseJ4.assumeWorkingMockito(); + } @Before public void setUp() throws IllegalAccessException, NoSuchFieldException { diff --git a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java index eedc652dc15..bb8bb678bfd 100644 --- a/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java +++ b/solr/core/src/test/org/apache/solr/core/CoreSorterTest.java @@ -101,6 +101,8 @@ public class CoreSorterTest extends SolrTestCaseJ4 { } private CoreContainer getMockContainer() { + assumeWorkingMockito(); + CoreContainer mockCC = mock(CoreContainer.class); ZkController mockZKC = mock(ZkController.class); ClusterState mockClusterState = mock(ClusterState.class); diff --git a/solr/core/src/test/org/apache/solr/handler/admin/TestCoreAdminApis.java b/solr/core/src/test/org/apache/solr/handler/admin/TestCoreAdminApis.java index 9477563adaf..dcaa64a9f5a 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/TestCoreAdminApis.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/TestCoreAdminApis.java @@ -70,6 +70,8 @@ public class TestCoreAdminApis extends SolrTestCaseJ4 { } public static CoreContainer getCoreContainerMock(final Map in,Map out ) { + assumeWorkingMockito(); + CoreContainer mockCC = mock(CoreContainer.class); when(mockCC.create(any(String.class), any(Path.class) , any(Map.class), anyBoolean())).thenAnswer(invocationOnMock -> { in.put("create", invocationOnMock.getArguments()); diff --git a/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java b/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java index d91ba88b35d..69d135c23cd 100644 --- a/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java +++ b/solr/core/src/test/org/apache/solr/legacy/TestLegacyTerms.java @@ -153,9 +153,9 @@ public class TestLegacyTerms extends LuceneTestCase { private static Terms EMPTY_TERMS = new Terms() { public TermsEnum iterator() { return TermsEnum.EMPTY; } public long size() { return -1; } - public long getSumTotalTermFreq() { return -1; } - public long getSumDocFreq() { return -1; } - public int getDocCount() { return -1; } + public long getSumTotalTermFreq() { return 0; } + public long getSumDocFreq() { return 0; } + public int getDocCount() { return 0; } public boolean hasFreqs() { return false; } public boolean hasOffsets() { return false; } public boolean hasPositions() { return false; } diff --git a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java index eca414cd8ff..23230cd8042 100644 --- a/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java +++ b/solr/core/src/test/org/apache/solr/metrics/reporters/SolrGangliaReporterTest.java @@ -45,6 +45,8 @@ import static org.mockito.Mockito.*; public class SolrGangliaReporterTest extends SolrTestCaseJ4 { @Test public void testReporter() throws Exception { + assumeWorkingMockito(); + Path home = Paths.get(TEST_HOME()); // define these properties, they are used in solrconfig.xml System.setProperty("solr.test.sys.prop1", "propone"); diff --git a/solr/core/src/test/org/apache/solr/schema/SchemaWatcherTest.java b/solr/core/src/test/org/apache/solr/schema/SchemaWatcherTest.java index 4d46aad2dd8..729ec53c601 100644 --- a/solr/core/src/test/org/apache/solr/schema/SchemaWatcherTest.java +++ b/solr/core/src/test/org/apache/solr/schema/SchemaWatcherTest.java @@ -17,6 +17,7 @@ package org.apache.solr.schema; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.schema.ZkIndexSchemaReader.SchemaWatcher; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher.Event.EventType; @@ -35,6 +36,8 @@ public class SchemaWatcherTest { @Before public void setUp() throws Exception { + SolrTestCaseJ4.assumeWorkingMockito(); + mockSchemaReader = mock(ZkIndexSchemaReader.class); schemaWatcher = new SchemaWatcher(mockSchemaReader); } diff --git a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java index a00129a17c5..46f58a171fe 100644 --- a/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java +++ b/solr/core/src/test/org/apache/solr/schema/TestManagedSchemaThreadSafety.java @@ -135,6 +135,7 @@ public class TestManagedSchemaThreadSafety extends SolrTestCaseJ4 { } private ZkController createZkController(SolrZkClient client) throws KeeperException, InterruptedException { + assumeWorkingMockito(); CoreContainer mockAlwaysUpCoreContainer = mock(CoreContainer.class, Mockito.withSettings().defaultAnswer(Mockito.CALLS_REAL_METHODS)); diff --git a/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java b/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java index 25952775a85..a7dadcd7605 100644 --- a/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java +++ b/solr/core/src/test/org/apache/solr/security/TestPKIAuthenticationPlugin.java @@ -70,6 +70,8 @@ public class TestPKIAuthenticationPlugin extends SolrTestCaseJ4 { } public void test() throws Exception { + assumeWorkingMockito(); + AtomicReference principal = new AtomicReference<>(); String nodeName = "node_x_233"; diff --git a/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java b/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java index c8303036910..917b1b4bc81 100644 --- a/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java +++ b/solr/core/src/test/org/apache/solr/servlet/SolrRequestParserTest.java @@ -62,6 +62,7 @@ public class SolrRequestParserTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { + assumeWorkingMockito(); initCore("solrconfig.xml", "schema.xml"); parser = new SolrRequestParsers( h.getCore().getSolrConfig() ); } diff --git a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java index bc04a6f10db..97d7baa9613 100644 --- a/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java +++ b/solr/core/src/test/org/apache/solr/uninverting/TestFieldCacheSortRandom.java @@ -288,6 +288,11 @@ public class TestFieldCacheSortRandom extends LuceneTestCase { return new ConstantScoreScorer(this, score(), new BitSetIterator(bits, bits.approximateCardinality())); } + + @Override + public IndexReader.CacheHelper getCacheHelper(LeafReaderContext context) { + return context.reader().getCoreCacheHelper(); + } }; } diff --git a/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorFactoryTest.java index fe22918ded7..ad39b6ff71b 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorFactoryTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorFactoryTest.java @@ -104,6 +104,8 @@ public class ClassificationUpdateProcessorFactoryTest extends SolrTestCaseJ4 { @Test public void init_unsupportedFilterQuery_shouldThrowExceptionWithDetailedMessage() { + assumeWorkingMockito(); + UpdateRequestProcessor mockProcessor = mock(UpdateRequestProcessor.class); SolrQueryRequest mockRequest = mock(SolrQueryRequest.class); SolrQueryResponse mockResponse = mock(SolrQueryResponse.class); diff --git a/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorTest.java b/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorTest.java index df2e7aa9e14..2a9055a9e20 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/ClassificationUpdateProcessorTest.java @@ -61,6 +61,7 @@ public class ClassificationUpdateProcessorTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { + assumeWorkingMockito(); System.setProperty("enable.update.log", "false"); initCore("solrconfig-classification.xml", "schema-classification.xml"); } diff --git a/solr/core/src/test/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactoryTest.java b/solr/core/src/test/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactoryTest.java index 7e17f715da4..63069df2139 100644 --- a/solr/core/src/test/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactoryTest.java +++ b/solr/core/src/test/org/apache/solr/update/processor/SkipExistingDocumentsProcessorFactoryTest.java @@ -26,6 +26,7 @@ import java.io.IOException; import com.google.common.collect.ImmutableMap; import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.util.NamedList; @@ -34,6 +35,7 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.update.AddUpdateCommand; import org.apache.solr.update.processor.SkipExistingDocumentsProcessorFactory.SkipExistingDocumentsUpdateProcessor; +import org.junit.BeforeClass; import org.junit.Test; import org.mockito.Mockito; @@ -41,6 +43,11 @@ public class SkipExistingDocumentsProcessorFactoryTest { private BytesRef docId = new BytesRef(); private SolrQueryRequest defaultRequest = new LocalSolrQueryRequest(null, new NamedList()); + + @BeforeClass + public static void beforeClass() { + SolrTestCaseJ4.assumeWorkingMockito(); + } // Tests for logic in the factory diff --git a/solr/solr-ref-guide/build.xml b/solr/solr-ref-guide/build.xml index 1ce3778e9f0..6f989b91194 100644 --- a/solr/solr-ref-guide/build.xml +++ b/solr/solr-ref-guide/build.xml @@ -113,17 +113,19 @@ + + includeantruntime="true"> + + includes="**/*.class,**/*.xml"/> @@ -164,58 +166,80 @@ - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + Finished Building ${build.dir}/${pdf-filename} - - - - - - - - - - - - - - - - - - - - - + + + + + + Ready to browse site: ${build.dir}/html-site/${main-page}.html + depends="build-init,build-nav-data-files" > Running Jekyll... + + + + + + + + + + + + + + Validated Links & Anchors via: ${build.dir}/bare-bones-html/ + + depends="-build-pdf-and-reduce-pdf,build-site"> + PDF: ${build.dir}/${pdf-filename} SITE: ${build.dir}/html-site/${main-page}.html + + diff --git a/solr/solr-ref-guide/src/analytics-expression-sources.adoc b/solr/solr-ref-guide/src/analytics-expression-sources.adoc new file mode 100644 index 00000000000..c56337f59c5 --- /dev/null +++ b/solr/solr-ref-guide/src/analytics-expression-sources.adoc @@ -0,0 +1,91 @@ += Analytics Expression Sources +:page-tocclass: right +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Expression sources are the source of the data being aggregated in <>. + +These sources can be either Solr fields indexed with docValues, or constants. + +== Supported Field Types + +The following <> are supported. +Fields of these types can be either multi-valued and single-valued. + +All fields used in analytics expressions *must* have <> enabled. + + +// Since Trie* fields are deprecated as of 7.0, we should consider removing Trie* fields from this list... + +[horizontal] +String:: + StrField +Boolean:: + BoolField +Integer:: + TrieIntField + + IntPointField +Long:: + TrieLongField + + LongPointField +Float:: + TrieFloatField + + FloatPointField +Double:: + TrieDoubleField + + DoublePointField +Date:: + TrieDateField + + DatePointField + +.Multi-valued Field De-duplication +[WARNING] +==== +All multi-valued field types, except for PointFields, are de-duplicated, meaning duplicate values for the same field are removed during indexing. +In order to save duplicates, you must use PointField types. +==== + +== Constants + +Constants can be included in expressions to use along side fields and functions. The available constants are shown below. +Constants do not need to be surrounded by any function to define them, they can be used exactly like fields in an expression. + +=== Strings + +There are two possible ways of specifying constant strings, as shown below. + +* Surrounded by double quotes, inside the quotes both `"` and `\` must be escaped with a `\` character. ++ +`"Inside of 'double' \\ \"quotes\""` \=> `Inside of 'double' \ "quotes"` +* Surrounded by single quotes, inside the quotes both `'` and `\` must be escaped with a `\` character. ++ +`'Inside of "single" \\ \'quotes\''` \=> `Inside of "double" \ 'quotes'` + +=== Dates + +Dates can be specified in the same way as they are in Solr queries. Just use ISO-8601 format. +For more information, refer to the <> section. + +* `2017-07-17T19:35:08Z` + +=== Numeric + +Any non-decimal number will be read as an integer, or as a long if it is too large for an integer. All decimal numbers will be read as doubles. + +* `-123421`: Integer +* `800000000000`: Long +* `230.34`: Double diff --git a/solr/solr-ref-guide/src/analytics-mapping-functions.adoc b/solr/solr-ref-guide/src/analytics-mapping-functions.adoc new file mode 100644 index 00000000000..f6de0de41cd --- /dev/null +++ b/solr/solr-ref-guide/src/analytics-mapping-functions.adoc @@ -0,0 +1,360 @@ += Analytics Mapping Functions +:page-tocclass: right +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Mapping functions map values for each Solr Document or Reduction. + +Below is a list of all mapping functions provided by the Analytics Component. +These mappings can be chained together to implement more complex functionality. + +== Numeric Functions + +=== Negation +Negates the result of a numeric expression. + +`neg(<_Numeric_ T>)` \=> ``:: + * `neg(10.53)` \=> `-10.53` + * `neg([1, -4])` \=> `[-1, 4]` + +=== Absolute Value +Returns the absolute value of the numeric expression. + +`abs(< _Numeric_ T >)` \=> `< T >`:: + * `abs(-10.53)` \=> `10.53` + * `abs([1, -4])` \=> `[1, 4]` + +[[analytics-round]] +=== Round +Rounds the numeric expression to the nearest `Integer` or `Long` value. + +`round(< _Float_ >)` \=> `< _Int_ >`:: +`round(< _Double_ >)` \=> `< _Long_ >`:: + * `round(-1.5)` \=> `-1` + * `round([1.75, 100.34])` \=> `[2, 100]` + +=== Ceiling +Rounds the numeric expression to the nearest `Integer` or `Long` value that is greater than or equal to the original value. + +`ceil(< _Float_ >)` \=> `< _Int_ >`:: +`ceil(< _Double_ >)` \=> `< _Long_ >`:: + * `ceil(5.01)` \=> `5` + * `ceil([-4.999, 6.99])` \=> `[-4, 7]` + +[[analytics-floor]] +=== Floor +Rounds the numeric expression to the nearest `Integer` or `Long` value that is less than or equal to the original value. + +`floor(< _Float_ >)` \=> `< _Int_ >`:: +`floor(< _Double_ >)` \=> `< _Long_ >`:: + * `floor(5.75)` \=> `5` + * `floor([-4.001, 6.01])` \=> `[-5, 6]` + +=== Addition +Adds the values of the numeric expressions. + +`add(< _Multi Double_ >)` \=> `< _Single Double_ >`:: + * `add([1, -4])` \=> `-3.0` +`add(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `add(3.5, [1, -4])` \=> `[4.5, -0.5]` +`add(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `add([1, -4], 3.5)` \=> `[4.5, -0.5]` +`add(< _Single Double_ >, ...)` \=> `< _Single Double_ >`:: + * `add(3.5, 100, -27.6)` \=> `75.9` + +=== Subtraction +Subtracts the values of the numeric expressions. + +`sub(< _Single Double_ >, < _Single Double_ >)` \=> `< _Single Double_ >`:: + * `sub(3.5, 100)` \=> `-76.5` +`sub(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `sub(3.5, [1, -4])` \=> `[2.5, 7.5]` +`sub(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `sub([1, -4], 3.5)` \=> `[-2.5, -7.5]` + +=== Multiplication +Multiplies the values of the numeric expressions. + +`mult(< _Multi Double_ >)` \=> `< _Single Double_ >`:: + * `mult([1, -4])` \=> `-4.0` +`mult(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `mult(3.5, [1, -4])` \=> `[3.5, -16.0]` +`mult(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `mult([1, -4], 3.5)` \=> `[3.5, 16.0]` +`mult(< _Single Double_ >, ...)` \=> `< _Single Double_ >`:: + * `mult(3.5, 100, -27.6)` \=> `-9660` + +=== Division +Divides the values of the numeric expressions. + +`div(< _Single Double_ >, < _Single Double_ >)` \=> `< _Single Double_ >`:: + * `div(3.5, 100)` \=> `.035` +`div(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `div(3.5, [1, -4])` \=> `[3.5, -0.875]` +`div(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `div([1, -4], 25)` \=> `[0.04, -0.16]` + +=== Power +Takes one numeric expression to the power of another. + +*NOTE:* The square root function `sqrt(< _Double_ >)` can be used as shorthand for `pow(< _Double_ >, .5)` + +`pow(< _Single Double_ >, < _Single Double_ >)` \=> `< _Single Double_ >`:: + * `pow(2, 4)` \=> `16.0` +`pow(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `pow(16, [-1, 0])` \=> `[0.0625, 1]` +`pow(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `pow([1, 16], .25)` \=> `[1.0, 2.0]` + +=== Logarithm +Takes one logarithm of numeric expressions, with an optional second numeric expression as the base. +If only one expression is given, the natural log is used. + +`log(< _Double_ >)` \=> `< _Double_ >`:: + * `log(5)` \=> `1.6094...` + * `log([1.0, 100.34])` \=> `[0.0, 4.6085...]` +`log(< _Single Double_ >, < _Single Double_ >)` \=> `< _Single Double_ >`:: + * `log(2, 4)` \=> `0.5` +`log(< _Single Double_ >, < _Multi Double_ >)` \=> `< _Multi Double_ >`:: + * `log(16, [2, 4])` \=> `[4, 2]` +`log(< _Multi Double_ >, < _Single Double_ >)` \=> `< _Multi Double_ >`:: + * `log([81, 3], 9)` \=> `[2.0, 0.5]` + +== Logic + +[[analytics-logic-neg]] +=== Negation +Negates the result of a boolean expression. + +`neg(< _Bool_ >)` \=> `< _Bool_>`:: + * `neg(F)` \=> `T` + * `neg([F, T])` \=> `[T, F]` + +[[analytics-and]] +=== And +ANDs the values of the boolean expressions. + +`and(< _Multi Bool_ >)` \=> `< _Single Bool_ >`:: + * `and([T, F, T])` \=> `F` +`and(< _Single Bool_ >, < _Multi Bool_ >)` \=> `< _Multi Bool_ >`:: + * `and(F, [T, T])` \=> `[F, F]` +`and(< _Multi Bool_ >, < _Single Bool_ >)` \=> `< _Multi Bool_ >`:: + * `and([F, T], T)` \=> `[F, T]` +`and(< _Single Bool_ >, ...)` \=> `< _Single Bool_ >`:: + * `and(T, T, T)` \=> `T` + +[[analytics-or]] +=== Or +ORs the values of the boolean expressions. + +`or(< _Multi Bool_ >)` \=> `< _Single Bool_ >`:: + * `or([T, F, T])` \=> `T` +`or(< _Single Bool_ >, < _Multi Bool_ >)` \=> `< _Multi Bool_ >`:: + * `or(F, [F, T])` \=> `[F, T]` +`or(< _Multi Bool_ >, < _Single Bool_ >)` \=> `< _Multi Bool_ >`:: + * `or([F, T], T)` \=> `[T, T]` +`or(< _Single Bool_ >, ...)` \=> `< _Single Bool_ >`:: + * `or(F, F, F)` \=> `F` + +==== Exists +Checks whether any value(s) exist for the expression. + +`exists( T )` \=> `< _Single Bool_ >`:: + * `exists([1, 2, 3])` \=> `T` + * `exists([])` \=> `F` + * `exists(_empty_)` \=> `F` + * `exists('abc')` \=> `T` + +== Comparison + +=== Equality +Checks whether two expressions' values are equal. The parameters must be the same type, after implicit casting. + +`equal(< _Single_ T >, < _Single_ T >)` \=> `< _Single Bool_ >`:: + * `equal(F, F)` \=> `T` +`equal(< _Single_ T >, < _Multi_ T >)` \=> `< _Multi Bool_ >`:: + * `equal("a", ["a", "ab"])` \=> `[T, F]` +`equal(< _Multi_ T >, < _Single_ T >)` \=> `< _Multi Bool_ >`:: + * `equal([1.5, -3.0], -3)` \=> `[F, T]` + +=== Greater Than +Checks whether a numeric or `Date` expression's values are greater than another expression's values. +The parameters must be the same type, after implicit casting. + +`gt(< _Single Numeric/Date_ T >, < _Single_ T >)` \=> `< _Single Bool_ >`:: + * `gt(1800-01-02, 1799-12-20)` \=> `F` +`gt(< _Single Numeric/Date_ T >, < _Multi_ T >)` \=> `< _Multi Bool_ >`:: + * `gt(30.756, [30, 100])` \=> `[F, T]` +`gt(< _Multi Numeric/Date_ T >, < _Single_ T >)` \=> `< _Multi Bool_ >`:: + * `gt([30, 75.6], 30)` \=> `[F, T]` + +=== Greater Than or Equals +Checks whether a numeric or `Date` expression's values are greater than or equal to another expression's values. +The parameters must be the same type, after implicit casting. + +`gte(< _Single Numeric/Date_ T >, < _Single_ T >)` \=> `< _Single Bool_ >`:: + * `gte(1800-01-02, 1799-12-20)` \=> `F` +`gte(< _Single Numeric/Date_ T >, < _Multi_ T >)` \=> `< _Multi Bool_ >`:: + * `gte(30.756, [30, 100])` \=> `[F, T]` +`gte(< _Multi Numeric/Date_ T >, < _Single_ T >)` \=> `< _Multi Bool_ >`:: + * `gte([30, 75.6], 30)` \=> `[T, T]` + +=== Less Than +Checks whether a numeric or `Date` expression's values are less than another expression's values. +The parameters must be the same type, after implicit casting. + +`lt(< _Single Numeric/Date_ T >, < _Single_ T >)` \=> `< _Single Bool_ >`:: + * `lt(1800-01-02, 1799-12-20)` \=> `T` +`lt(< _Single Numeric/Date_ T >, < _Multi_ T >)` \=> `< _Multi Bool_ >`:: + * `lt(30.756, [30, 100])` \=> `[T, F]` +`lt(< _Multi Numeric/Date_ T >, < _Single_ T >)` \=> `< _Multi Bool_ >`:: + * `lt([30, 75.6], 30)` \=> `[F, F]` + +=== Less Than or Equals +Checks whether a numeric or `Date` expression's values are less than or equal to another expression's values. +The parameters must be the same type, after implicit casting. + +`lte(< _Single Numeric/Date_ T >, < _Single_ T >)` \=> `< _Single Bool_ >`:: + * `lte(1800-01-02, 1799-12-20)` \=> `T` +`lte(< _Single Numeric/Date_ T >, < _Multi_ T >)` \=> `< _Multi Bool_ >`:: + * `lte(30.756, [30, 100])` \=> `[T, F]` +`lte(< _Multi Numeric/Date_ T >, < _Single_ T >)` \=> `< _Multi Bool_ >`:: + * `lte([30, 75.6], 30)` \=> `[T, F]` + +[[analytics-top]] +=== Top +Returns the maximum of the numeric, `Date` or `String` expression(s)' values. +The parameters must be the same type, after implicit casting. +(Currently the only type not compatible is `Boolean`, which will be converted to a `String` implicitly in order to compile the expression) + +`top(< _Multi_ T >)` \=> `< _Single_ T >`:: + * `top([30, 400, -10, 0])` \=> `400` +`top(< _Single_ T >, ...)` \=> `< _Single_ T >`:: + * `top("a", 1, "d")` \=> `"d"` + +=== Bottom +Returns the minimum of the numeric, `Date` or `String` expression(s)' values. +The parameters must be the same type, after implicit casting. +(Currently the only type not compatible is `Boolean`, which will be converted to a `String` implicitly in order to compile the expression) + +`bottom(< _Multi_ T >)` \=> `< _Single_ T >`:: + * `bottom([30, 400, -10, 0])` \=> `-10` +`bottom(< _Single_ T >, ...)` \=> `< _Single_ T >`:: + * `bottom("a", 1, "d")` \=> `"1"` + +== Conditional + +[[analytics-if]] +=== If +Returns the value(s) of the `THEN` or `ELSE` expressions depending on whether the boolean conditional expression's value is `true` or `false`. +The `THEN` and `ELSE` expressions must be of the same type and cardinality after implicit casting is done. + +`if(< _Single Bool_>, < T >, < T >)` \=> `< T >`:: + * `if(true, "abc", [1,2])` \=> `["abc"]` + * `if(false, "abc", 123)` \=> `"123"` + +=== Replace +Replace all values from the 1^st^ expression that are equal to the value of the 2^nd^ expression with the value of the 3^rd^ expression. +All parameters must be the same type after implicit casting is done. + +`replace(< T >, < _Single_ T >, < _Single_ T >)` \=> `< T >`:: + * `replace([1,3], 3, "4")` \=> `["1", "4"]` + * `replace("abc", "abc", 18)` \=> `"18"` + * `replace("abc", 1, "def")` \=> `"abc"` + +=== Fill Missing +If the 1^st^ expression does not have values, fill it with the values for the 2^nd^ expression. +Both expressions must be of the same type and cardinality after implicit casting is done + +`fill_missing(< T >, < T >)` \=> `< T >`:: + * `fill_missing([], 3)` \=> `[3]` + * `fill_missing(_empty_, "abc")` \=> `"abc"` + * `fill_missing("abc", [1])` \=> `["abc"]` + +=== Remove +Remove all occurrences of the 2^nd^ expression's value from the values of the 1^st^ expression. +Both expressions must be of the same type after implicit casting is done + +`remove(< T >, < _Single_ T >)` \=> `< T >`:: + * `remove([1,2,3,2], 2)` \=> `[1, 3]` + * `remove("1", 1)` \=> `_empty_` + * `remove(1, "abc")` \=> `"1"` + +=== Filter +Return the values of the 1^st^ expression if the value of the 2^nd^ expression is `true`, otherwise return no values. + +`filter(< T >, < _Single Boolean_ >)` \=> `< T >`:: + * `filter([1,2,3], true)` \=> `[1,2,3]` + * `filter([1,2,3], false)` \=> `[]` + * `filter("abc", false)` \=> `_empty_` + * `filter("abc", true)` \=> `1` + +== Date + +=== Date Parse +Explicitly converts the values of a `String` or `Long` expression into `Dates`. + +`date(< _String_ >)` \=> `< _Date_ >`:: + * `date('1800-01-02')` \=> `1800-01-02T​00:00:00Z` + * `date(['1800-01-02', '2016-05-23'])` \=> `[1800-01-02T..., 2016-05-23T...]` +`date(< _Long_ >)` \=> `< _Date_ >`:: + * `date(1232343246648)` \=> `2009-01-19T​05:34:06Z` + * `date([1232343246648, 223234324664])` \=> `[2009-01-19T..., 1977-01-27T...]` + +[[analytics-date-math]] +=== Date Math +Compute the given date math strings for the values of a `Date` expression. The date math strings *must* be <>. + +`date_math(< _Date_ >, < _Constant String_ >...)` \=> `< _Date_ >`:: + * `date_math(1800-04-15, '+1DAY', '-1MONTH')` \=> `1800-03-16` + * `date_math([1800-04-15,2016-05-24], '+1DAY', '-1MONTH')` \=> `[1800-03-16, 2016-04-25]` + +== String + +=== Explicit Casting +Explicitly casts the expression to a `String` expression. + +`string(< _String_ >)` \=> `< _String_ >`:: + * `string(1)` \=> `'1'` + * `string([1.5, -2.0])` \=> `['1.5', '-2.0']` + +=== Concatenation +Concatenations the values of the `String` expression(s) together. + +`concat(< _Multi String_ >)` \=> `< _Single String_ >`:: + * `concat(['a','b','c'])` \=> `'abc'` +`concat(< _Single String_ >, < _Multi String_ >)` \=> `< _Multi String_ >`:: + * `concat(1, ['a','b','c'])` \=> `['1a','1b','1c']` +`concat(< _Multi String_ >, < _Single String_ >)` \=> `< _Multi String_ >`:: + * `concat(['a','b','c'], 1)` \=> `['a1','b1','c1']` +`concat(< _Single String_ >...)` \=> `< _Single String_ >`:: + * `concat('a','b','c')` \=> `'abc'` + * `concat('a',_empty_,'c')` \=> `'ac'` + + _Empty values are ignored_ + +=== Separated Concatenation +Concatenations the values of the `String` expression(s) together using the given <> value as a separator. + +`concat_sep(< _Constant String_ >, < _Multi String_ >)` \=> `< _Single String_ >`:: + * `concat_sep('-', ['a','b'])` \=> `'a-b'` +`concat_sep(< _Constant String_ >, < _Single String_ >, < _Multi String_ >)` \=> `< _Multi String_ >`:: + * `concat_sep(2,1,['a','b'])` \=> `['12a','12b']` +`concat_sep(< _Constant String_ >, < _Multi String_ >, < _Single String_ >)` \=> `< _Multi String_ >`:: + * `concat_sep(2,['a','b'],1)` \=> `['a21','b21']` + * `concat_sep('-','a',2,3)` \=> `'a-2-3'` + * `concat_sep(';','a',_empty_,'c')` \=> `'a;c'` + +_Empty values are ignored_ diff --git a/solr/solr-ref-guide/src/analytics-reduction-functions.adoc b/solr/solr-ref-guide/src/analytics-reduction-functions.adoc new file mode 100644 index 00000000000..60c65fab3de --- /dev/null +++ b/solr/solr-ref-guide/src/analytics-reduction-functions.adoc @@ -0,0 +1,120 @@ += Analytics Reduction Functions +:page-tocclass: right +:page-toclevels: 2 +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +Reduction functions reduce the values of <> +and/or unreduced <> +for every Solr Document to a single value. + +Below is a list of all reduction functions provided by the Analytics Component. +These can be combined using mapping functions to implement more complex functionality. + +== Counting Reductions + +=== Count +The number of existing values for an expression. For single-valued expressions, this is equivalent to `docCount`. +If no expression is given, the number of matching documents is returned. + +`count()` \=> `< _Single Long_ >` +`count(< T >)` \=> `< _Single Long_ >` + +=== Doc Count +The number of documents for which an expression has existing values. For single-valued expressions, this is equivalent to `count`. +If no expression is given, the number of matching documents is returned. + +`doc_count()` \=> `< _Single Long_ >` + +`doc_count(< T >)` \=> `< _Single Long_ >` + +=== Missing +The number of documents for which an expression has no existing value. + +`missing(< T >)` \=> `< _Single Long_ >` + +[[analytics-unique]] +=== Unique +The number of unique values for an expression. This function accepts `Numeric`, `Date` and `String` expressions. + +`unique(< T >)` \=> `< _Single Long_ >` + +== Math Reductions + +=== Sum +Returns the sum of all values for the expression. + +`sum(< _Double_ >)` \=> `< _Single Double_ >` + +=== Variance +Returns the variance of all values for the expression. + +`variance(< _Double_ >)` \=> `< _Single Double_ >` + +=== Standard Deviation +Returns the standard deviation of all values for the expression. + +`stddev(< _Double_ >)` \=> `< _Single Double_ >` + +=== Mean +Returns the arithmetic mean of all values for the expression. + +`mean(< _Double_ >)` \=> `< _Single Double_ >` + +=== Weighted Mean +Returns the arithmetic mean of all values for the second expression weighted by the values of the first expression. + +`wmean(< _Double_ >, < _Double_ >)` \=> `< _Single Double_ >` + +NOTE: The expressions must satisfy the rules for `mult` function parameters. + +== Ordering Reductions + +=== Minimum +Returns the minimum value for the expression. This function accepts `Numeric`, `Date` and `String` expressions. + +`min(< T >)` \=> `< _Single_ T >` + +=== Maximum +Returns the maximum value for the expression. This function accepts `Numeric`, `Date` and `String` expressions. + +`max(< T >)` \=> `< _Single_ T >` + +=== Median +Returns the median of all values for the expression. This function accepts `Numeric` and `Date` expressions. + +`median(< T >)` \=> `< _Single_ T >` + +=== Percentile +Calculates the given percentile of all values for the expression. +This function accepts `Numeric`, `Date` and `String` expressions for the 2^nd^ parameter. + +The percentile, given as the 1^st^ parameter, must be a <> between [0, 100). + +`percentile(, < T >)` \=> `< _Single_ T >` + +=== Ordinal +Calculates the given ordinal of all values for the expression. +This function accepts `Numeric`, `Date` and `String` expressions for the 2^nd^ parameter. +The ordinal, given as the 1^st^ parameter, must be a <>. +*0 is not accepted as an ordinal value.* + +If the ordinal is positive, the returned value will be the _n_^th^ smallest value. + +If the ordinal is negative, the returned value will be the _n_^th^ largest value. + +`ordinal(, < T >)` \=> `< _Single_ T >` diff --git a/solr/solr-ref-guide/src/analytics.adoc b/solr/solr-ref-guide/src/analytics.adoc new file mode 100644 index 00000000000..fe9b1105ce2 --- /dev/null +++ b/solr/solr-ref-guide/src/analytics.adoc @@ -0,0 +1,819 @@ += Analytics Component +:page-children: analytics-expression-sources, analytics-mapping-functions, analytics-reduction-functions +:page-tocclass: right +:page-toclevel: 2 +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +The Analytics Component allows users to calculate complex statistical aggregations over result sets. + +The component enables interacting with data in a variety of ways, both through a diverse set of analytics functions as well as powerful faceting functionality. +The standard facets are supported within the analytics component with additions that leverage its analytical capabilities. + +== Analytics Configuration + +The Analytics component is in a contrib module, therefore it will need to be enabled in the `solrconfig.xml` for each collection where you would like to use it. + +Since the Analytics framework is a _search component_, it must be declared as such and added to the search handler. + +For distributed analytics requests over cloud collections, the component uses the `AnalyticsHandler` strictly for inter-shard communication. +The Analytics Handler should not be used by users to submit analytics requests. + +To configure Solr to use the Analytics Component, the first step is to add a `lib` directive so Solr loads the Analytic Component classes (for more about the `lib` directive, see <>). In the section of `solrconfig.xml` where the default `lib` directive are, add a line: + +[source,xml] + + +Next you need to enable the request handler and search component. Add the following lines to `solrconfig.xml`, near the defintions for other request handlers: + +[source,xml] +.solrconfig.xml +---- + + + + + + analytics + + + + + +---- + +For these changes to take effect, restart Solr or reload the core or collection. + +== Request Syntax + +An Analytics request is passed to Solr with the parameter `analytics` in a request sent to the +<>. +Since the analytics request is sent inside of a search handler request, it will compute results based on the result set determined by the search handler. + +For example, this curl command encodes and POSTs a simple analytics request to the the search handler: + +[source,bash] +---- +curl --data-urlencode 'analytics={ + "expressions" : { + "revenue" : "sum(mult(price,quantity))" + } + }' + http://localhost:8983/solr/sales/select?q=*:*&wt=json&rows=0 +---- + +There are 3 main parts of any analytics request: + +Expressions:: +A list of calculations to perform over the entire result set. Expressions aggregate the search results into a single value to return. +This list is entirely independent of the expressions defined in each of the groupings. Find out more about them in the section <>. + +Functions:: +One or more <> to be used throughout the rest of the request. These are essentially lambda functions and can be combined in a number of ways. +These functions for the expressions defined in `expressions` as well as `groupings`. + +Groupings:: +The list of <> to calculate in addition to the expressions. +Groupings hold a set of facets and a list of expressions to compute over those facets. +The expressions defined in a grouping are only calculated over the facets defined in that grouping. + +[NOTE] +.Optional Parameters +Either the `expressions` or the `groupings` parameter must be present in the request, or else there will be no analytics to compute. +The `functions` parameter is always optional. + +[source,json] +.Example Analytics Request +---- +{ + "functions": { + "sale()": "mult(price,quantity)" + }, + "expressions" : { + "max_sale" : "max(sale())", + "med_sale" : "median(sale())" + }, + "groupings" : { + "sales" : { + "expressions" : { + "stddev_sale" : "stddev(sale())", + "min_price" : "min(price)", + "max_quantity" : "max(quantity)" + }, + "facets" : { + "category" : { + "type" : "value", + "expression" : "fill_missing(category, 'No Category')", + "sort" : { + "criteria" : [ + { + "type" : "expression", + "expression" : "min_price", + "direction" : "ascending" + }, + { + "type" : "facetvalue", + "direction" : "descending" + } + ], + "limit" : 10 + } + }, + "temps" : { + "type" : "query", + "queries" : { + "hot" : "temp:[90 TO *]", + "cold" : "temp:[* TO 50]" + } + } + } + } + } +} +---- + +== Expressions + +Expressions are the way to request pieces of information from the analytics component. These are the statistical expressions that you want computed and returned in your response. + +=== Constructing an Expression + +==== Expression Components + +An expression is built using fields, constants, mapping functions and reduction functions. The ways that these can be defined are described below. + +Sources:: +* Constants: The values defined in the expression. +The supported constant types are described in the <>. + +* Fields: Solr fields that are read from the index. +The supported fields are listed in the <>. + +Mapping Functions:: +Mapping functions map values for each Solr Document or Reduction. +The provided mapping functions are detailed in the <>. + +* Unreduced Mapping: Mapping a Field with another Field or Constant returns a value for every Solr Document. +Unreduced mapping functions can take fields, constants as well as other unreduced mapping functions as input. + +* Reduced Mapping: Mapping a Reduction Function with another Reduction Function or Constant returns a single value. + +Reduction Functions:: +Functions that reduce the values of sources and/or unreduced mapping functions for every Solr Document to a single value. +The provided reduction functions are detailed in the <>. + +==== Component Ordering + +The expression components must be used in the following order to create valid expressions. + +. Reduced Mapping Function +.. Constants +.. Reduction Function +... Sources +... Unreduced Mapping Function +.... Sources +.... Unreduced Mapping Function +.. Reduced Mapping Function +. Reduction Function + +This ordering is based on the following rules: + +* No reduction function can be an argument of another reduction function. +Since all reduction is done together in one step, one reduction function cannot rely on the result of another. +* No fields can be left unreduced, since the analytics component cannot return a list of values for an expression (one for every document). +Every expression must be reduced to a single value. +* Mapping functions are not necessary when creating functions, however as many nested mappings as needed can be used. +* Nested mapping functions must be the same type, so either both must be unreduced or both must be reduced. +A reduced mapping function cannot take an unreduced mapping function as a parameter and vice versa. + +==== Example Construction + +With the above definitions and ordering, an example expression can be broken up into its components: + +[source,bash] +div(sum(a,fill_missing(b,0)),add(10.5,count(mult(a,c))))) + +As a whole, this is a reduced mapping function. The `div` function is a reduced mapping function since it is a <> and has reduced arguments. + +If we break down the expression further: + +* `sum(a,fill_missing(b,0))`: Reduction Function + +`sum` is a <>. +** `a`: Field +** `fill_missing(b,0)`: Unreduced Mapping Function + +`fill_missing` is an unreduced mapping function since it is a <> and has a field argument. +*** `b`: Field +*** `0`: Constant + +* `add(10.5,count(mult(a,c)))`: Reduced Mapping Function + +`add` is a reduced mapping function since it is a <> and has a reduction function argument. +** `10.5`: Constant +** `count(mult(a,c))`: Reduction Function + +`count` is a <> +*** `mult(a,c)`: Unreduced Mapping Function + +`mult` is an unreduced mapping function since it is a <> and has two field arguments. +**** `a`: Field +**** `c`: Field + +=== Expression Cardinality (Multi-Valued and Single-Valued) + +The root of all multi-valued expressions are multi-valued fields. Single-valued expressions can be started with constants or single-valued fields. +All single-valued expressions can be treated as multi-valued expressions that contain one value. + +Single-valued expressions and multi-valued expressions can be used together in many mapping functions, as well as multi-valued expressions being used alone, and many single-valued expressions being used together. For example: + +`add(, , ...)`:: +Returns a single-valued double expression where the value of the values of each expression are added. + +`add(, )`:: +Returns a multi-valued double expression where each value of the second expression is added to the single value of the first expression. + +`add(, )`:: +Acts the same as the above function. + +`add()`:: +Returns a single-valued double expression which is the sum of the multiple values of the parameter expression. + +=== Types and Implicit Casting + +The new analytics component currently supports the types listed in the below table. +These types have one-way implicit casting enabled for the following relationships: + +[cols="20s,80",options="header"] +|=== +| Type | Implicitly Casts To +| Boolean | String +| Date | Long, String +| Integer | Long, Float, Double, String +| Long | Double, String +| Float | Double, String +| Double | String +| String | _none_ +|=== + +An implicit cast means that if a function requires a certain type of value as a parameter, arguments will be automatically converted to that type if it is possible. + +For example, `concat()` only accepts string parameters and since all types can be implicitly cast to strings, any type is accepted as an argument. + +This also goes for dynamically typed functions. `fill_missing()` requires two arguments of the same type. However, two types that implicitly cast to the same type can also be used. + +For example, `fill_missing(,)` will be cast to `fill_missing(,)` since long cannot be cast to float and float cannot be cast to long implicitly. + +There is an ordering to implicit casts, where the more specialized type is ordered ahead of the more general type. +Therefore even though both long and float can be implicitly cast to double and string, they will be cast to double. +This is because double is a more specialized type than string, which every type can be cast to. + +The ordering is the same as their order in the above table. + +Cardinality can also be implicitly cast. +Single-valued expressions can always be implicitly cast to multi-valued expressions, since all single-valued expressions are multi-valued expressions with one value. + +Implicit casting will only occur when an expression will not "compile" without it. +If an expression follows all typing rules initially, no implicit casting will occur. +Certain functions such as `string()`, `date()`, `round()`, `floor()`, and `ceil()` act as explicit casts, declaring the type that is desired. +However `round()`, `floor()` and `cell()` can return either int or long, depending on the argument type. + +== Variable Functions + +Variable functions are a way to shorten your expressions and make writing analytics queries easier. They are essentially lambda functions defined in a request. + +[source,json] +.Example Basic Function +---- +{ + "functions" : { + "sale()" : "mult(price,quantity)" + }, + "expressions" : { + "max_sale" : "max(sale())", + "med_sale" : "median(sale())" + } +} +---- + +In the above request, instead of writing `mult(price,quantity)` twice, a function `sale()` was defined to abstract this idea. Then that function was used in the multiple expressions. + +Suppose that we want to look at the sales of specific categories: + +[source,json] +---- +{ + "functions" : { + "clothing_sale()" : "filter(mult(price,quantity),equal(category,'Clothing'))", + "kitchen_sale()" : "filter(mult(price,quantity),equal(category,\"Kitchen\"))" + }, + "expressions" : { + "max_clothing_sale" : "max(clothing_sale())" + , "med_clothing_sale" : "median(clothing_sale())" + , "max_kitchen_sale" : "max(kitchen_sale())" + , "med_kitchen_sale" : "median(kitchen_sale())" + } +} +---- + +=== Arguments + +Instead of making a function for each category, it would be much easier to use `category` as an input to the `sale()` function. +An example of this functionality is shown below: + +[source,json] +.Example Function with Arguments +---- +{ + "functions" : { + "sale(cat)" : "filter(mult(price,quantity),equal(category,cat))" + }, + "expressions" : { + "max_clothing_sale" : "max(sale(\"Clothing\"))" + , "med_clothing_sale" : "median(sale('Clothing'))" + , "max_kitchen_sale" : "max(sale(\"Kitchen\"))" + , "med_kitchen_sale" : "median(sale('Kitchen'))" + } +} +---- + +Variable Functions can take any number of arguments and use them in the function expression as if they were a field or constant. + +=== Variable Length Arguments + +There are analytics functions that take a variable amount of parameters. +Therefore there are use cases where variable functions would need to take a variable amount of parameters. + +For example, maybe there are multiple, yet undetermined, number of components to the price of a product. +Functions can take a variable length of parameters if the last parameter is followed by `..` + +[source,json] +.Example Function with a Variable Length Argument +---- +{ + "functions" : { + "sale(cat, costs..)" : "filter(mult(add(costs),quantity),equal(category,cat))" + }, + "expressions" : { + "max_clothing_sale" : "max(sale('Clothing', material, tariff, tax))" + , "med_clothing_sale" : "median(sale('Clothing', material, tariff, tax))" + , "max_kitchen_sale" : "max(sale('Kitchen', material, construction))" + , "med_kitchen_sale" : "median(sale('Kitchen', material, construction))" + } +} +---- + +In the above example a variable length argument is used to encapsulate all of the costs to use for a product. +There is no definite number of arguments requested for the variable length parameter, therefore the clothing expressions can use 3 and the kitchen expressions can use 2. +When the `sale()` function is called, `costs` is expanded to the arguments given. + +Therefore in the above request, inside of the `sale` function: + +* `add(costs)` + +is expanded to both of the following: + +* `add(material, tariff, tax)` +* `add(material, construction)` + +=== For-Each Functions + +[CAUTION] +.Advanced Functionality +==== +The following function details are for advanced requests. +==== + +Although the above functionality allows for an undefined number of arguments to be passed to a function, it does not allow for interacting with those arguments. + +Many times we might want to wrap each argument in additional functions. +For example maybe we want to be able to look at multiple categories at the same time. +So we want to see if `category EQUALS x *OR* category EQUALS y` and so on. + +In order to do this we need to use for-each lambda functions, which transform each value of the variable length parameter. +The for-each is started with the `:` character after the variable length parameter. + +[source,json] +.Example Function with a For-Each +---- +{ + "functions" : { + "sale(cats..)" : "filter(mult(price,quantity),or(cats:equal(category,_)))" + }, + "expressions" : { + "max_sale_1" : "max(sale('Clothing', 'Kitchen'))" + , "med_sale_1" : "median(sale('Clothing', 'Kitchen'))" + , "max_sale_2" : "max(sale('Electronics', 'Entertainment', 'Travel'))" + , "med_sale_2" : "median(sale('Electronics', 'Entertainment', 'Travel'))" + } +} +---- + +In this example, `cats:` is the syntax that starts a for-each lambda function over every parameter `cats`, and the `\_` character is used to refer to the value of `cats` in each iteration in the for-each. +When `sale("Clothing", "Kitchen")` is called, the lambda function `equal(category,_)` is applied to both Clothing and Kitchen inside of the `or()` function. + +Using all of these rules, the expression: + +[source,text] +`sale("Clothing","Kitchen")` + +is expanded to: + +[source,text] +`filter(mult(price,quantity),or(equal(category,"Kitchen"),equal(category,"Clothing")))` + +by the expression parser. + +== Groupings And Facets + +Facets, much like in other parts of Solr, allow analytics results to be broken up and grouped by attributes of the data that the expressions are being calculated over. + +The currently available facets for use in the analytics component are Value Facets, Pivot Facets, Range Facets and Query Facets. +Each facet is required to have a unique name within the grouping it is defined in, and no facet can be defined outside of a grouping. + +Groupings allow users to calculate the same grouping of expressions over a set of facets. +Groupings must have both `expressions` and `facets` given. + +[source,json] +.Example Base Facet Request +---- +{ + "functions" : { + "sale()" : "mult(price,quantity)" + }, + "groupings" : { + "sales_numbers" : { + "expressions" : { + "max_sale" : "max(sale())", + "med_sale" : "median(sale())" + }, + "facets" : { + "" : "< facet request >" + } + } + } +} +---- + +[source,json] +.Example Base Facet Response +---- +{ + "analytics_response" : { + "groupings" : { + "sales_numbers" : { + "" : "< facet response >" + } + } + } +} +---- + +=== Facet Sorting + +Some Analytics facets allow for complex sorting of their results. +The two current sortable facets are <> and <>. + +==== Parameters + +`criteria`:: +The list of criteria to sort the facet by. ++ +It takes the following parameters: + +`type`::: The type of sort. There are two possible values: +* `expression`: Sort by the value of an expression defined in the same grouping. +* `facetvalue`: Sort by the string-representation of the facet value. + +`Direction`::: +_(Optional)_ The direction to sort. +* `ascending` _(Default)_ +* `descending` + +`expression`::: +When `type = expression`, the name of an expression defined in the same grouping. + +`limit`:: +Limit the number of returned facet values to the top _N_. _(Optional)_ + +`offset`:: + When a limit is set, skip the top _N_ facet values. _(Optional)_ + +[source,json] +.Example Sort Request +---- +{ + "criteria" : [ + { + "type" : "expression", + "expression" : "max_sale", + "direction" : "ascending" + }, + { + "type" : "facetvalue", + "direction" : "descending" + } + ], + "limit" : 10, + "offset" : 5 +} +---- + +=== Value Facets + +Value Facets are used to group documents by the value of a mapping expression applied to each document. +Mapping expressions are expressions that do not include a reduction function. + +For more information, refer to the <>. + +* `mult(quantity, sum(price, tax))`: breakup documents by the revenue generated +* `fillmissing(state, "N/A")`: breakup documents by state, where N/A is used when the document doesn't contain a state + +Value Facets can be sorted. + +==== Parameters + +`expression`:: The expression to choose a facet bucket for each document. +`sort`:: A <> for the results of the pivot. + +[NOTE] +.Optional Parameters +The `sort` parameter is optional. + +[source,json] +.Example Value Facet Request +---- +{ + "type" : "value", + "expression" : "fillmissing(category,'No Category')", + "sort" : {} +} +---- + +[source,json] +.Example Value Facet Response +---- +[ + { "..." : "..." }, + { + "value" : "Electronics", + "results" : { + "max_sale" : 103.75, + "med_sale" : 15.5 + } + }, + { + "value" : "Kitchen", + "results" : { + "max_sale" : 88.25, + "med_sale" : 11.37 + } + }, + { "..." : "..." } +] +---- + +[NOTE] +.Field Facets +This is a replacement for Field Facets in the original Analytics Component. +Field Facet functionality is maintained in Value Facets by using the name of a field as the expression. + +=== Analytic Pivot Facets + +Pivot Facets are used to group documents by the value of multiple mapping expressions applied to each document. + +Pivot Facets work much like layers of <>. +A list of pivots is required, and the order of the list directly impacts the results returned. +The first pivot given will be treated like a normal value facet. +The second pivot given will be treated like one value facet for each value of the first pivot. +Each of these second-level value facets will be limited to the documents in their first-level facet bucket. +This continues for however many pivots are provided. + +Sorting is enabled on a per-pivot basis. This means that if your top pivot has a sort with `limit:1`, then only that first value of the facet will be drilled down into. Sorting in each pivot is independent of the other pivots. + +==== Parameters + +`pivots`:: The list of pivots to calculate a drill-down facet for. The list is ordered by top-most to bottom-most level. +`name`::: The name of the pivot. +`expression`::: The expression to choose a facet bucket for each document. +`sort`::: A <> for the results of the pivot. + +[NOTE] +.Optional Parameters +The `sort` parameter within the pivot object is optional, and can be given in any, none or all of the provided pivots. + +[source,json] +.Example Pivot Facet Request +---- +{ + "type" : "pivot", + "pivots" : [ + { + "name" : "country", + "expression" : "country", + "sort" : {} + }, + { + "name" : "state", + "expression" : "fillmissing(state, fillmissing(providence, territory))" + }, + { + "name" : "city", + "expression" : "fillmissing(city, 'N/A')", + "sort" : {} + } + ] +} +---- + + +[source,json] +.Example Pivot Facet Response +---- +[ + { "..." : "..." }, + { + "pivot" : "Country", + "value" : "USA", + "results" : { + "max_sale" : 103.75, + "med_sale" : 15.5 + }, + "children" : [ + { "..." : "..." }, + { + "pivot" : "State", + "value" : "Texas", + "results" : { + "max_sale" : 99.2, + "med_sale" : 20.35 + }, + "children" : [ + { "..." : "..." }, + { + "pivot" : "City", + "value" : "Austin", + "results" : { + "max_sale" : 94.34, + "med_sale" : 17.60 + } + }, + { "..." : "..." } + ] + }, + { "..." : "..." } + ] + }, + { "..." : "..." } +] +---- + +=== Analytics Range Facets + +Range Facets are used to group documents by the value of a field into a given set of ranges. +The inputs for analytics range facets are identical to those used for Solr range facets. +Refer to the <> for additional questions regarding use. + +==== Parameters + +`field`:: Field to be faceted over +`start`:: The bottom end of the range +`end`:: The top end of the range +`gap`:: A list of range gaps to generate facet buckets. If the buckets do not add up to fit the `start` to `end` range, +then the last `gap` value will repeated as many times as needed to fill any unused range. +`hardend`:: Whether to cutoff the last facet bucket range at the `end` value if it spills over. Defaults to `false`. +`include`:: The boundaries to include in the facet buckets. Defaults to `lower`. +* `lower` - All gap-based ranges include their lower bound. +* `upper` - All gap-based ranges include their upper bound. +* `edge` - The first and last gap ranges include their edge bounds (lower for the first one, upper for the last one) even if the corresponding upper/lower option is not specified. +* `outer` - The `before` and `after` ranges will be inclusive of their bounds, even if the first or last ranges already include those boundaries. +* `all` - Includes all options: `lower`, `upper`, `edge`, and `outer` +`others`:: Additional ranges to include in the facet. Defaults to `none`. +* `before` - All records with field values lower then lower bound of the first range. +* `after` - All records with field values greater then the upper bound of the last range. +* `between` - All records with field values between the lower bound of the first range and the upper bound of the last range. +* `none` - Include facet buckets for none of the above. +* `all` - Include facet buckets for `before`, `after` and `between`. + +[NOTE] +.Optional Parameters +The `hardend`, `include` and `others` parameters are all optional. + +[source,json] +.Example Range Facet Request +---- +{ + "type" : "range", + "field" : "price", + "start" : "0", + "end" : "100", + "gap" : [ + "5", + "10", + "10", + "25" + ], + "hardend" : true, + "include" : [ + "lower", + "upper" + ], + "others" : [ + "after", + "between" + ] +} +---- + +[source,json] +.Example Range Facet Response +---- +[ + { + "value" : "[0 TO 5]", + "results" : { + "max_sale" : 4.75, + "med_sale" : 3.45 + } + }, + { + "value" : "[5 TO 15]", + "results" : { + "max_sale" : 13.25, + "med_sale" : 10.20 + } + }, + { + "value" : "[15 TO 25]", + "results" : { + "max_sale" : 22.75, + "med_sale" : 18.50 + } + }, + { + "value" : "[25 TO 50]", + "results" : { + "max_sale" : 47.55, + "med_sale" : 30.33 + } + }, + { + "value" : "[50 TO 75]", + "results" : { + "max_sale" : 70.25, + "med_sale" : 64.54 + } + }, + { "..." : "..." } +] +---- + +=== Query Facets + +Query Facets are used to group documents by given set of queries. + +==== Parameters + +`queries`:: The list of queries to facet by. + +[source,json] +.Example Query Facet Request +---- +{ + "type" : "query", + "queries" : { + "high_quantity" : "quantity:[ 5 TO 14 ] AND price:[ 100 TO * ]", + "low_quantity" : "quantity:[ 1 TO 4 ] AND price:[ 100 TO * ]" + } +} +---- + +[source,json] +.Example Query Facet Response +---- +[ + { + "value" : "high_quantity", + "results" : { + "max_sale" : 4.75, + "med_sale" : 3.45 + } + }, + { + "value" : "low_quantity", + "results" : { + "max_sale" : 13.25, + "med_sale" : 10.20 + } + } +] +---- diff --git a/solr/solr-ref-guide/src/aws-solrcloud-tutorial.adoc b/solr/solr-ref-guide/src/aws-solrcloud-tutorial.adoc new file mode 100644 index 00000000000..d1ac4aaa377 --- /dev/null +++ b/solr/solr-ref-guide/src/aws-solrcloud-tutorial.adoc @@ -0,0 +1,271 @@ += SolrCloud on AWS EC2 +:experimental: +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +This guide is a tutorial on how to set up a multi-node SolrCloud cluster on https://aws.amazon.com/ec2[Amazon Web Services (AWS) EC2] instances for early development and design. + +This tutorial is not meant for production systems. For one, it uses Solr's embedded ZooKeeper instance, and for production you should have at least 3 ZooKeeper nodes in an ensemble. There are additional steps you should take for a production installation; refer to <> for how to deploy Solr in production. + +In this guide we are going to: + +. Launch multiple AWS EC2 instances +* Create new _Security Group_ +* Configure instances and launch +. Install, configure and start Solr on newly launched EC2 instances +* Install system prerequisites: Java 1.8 and later +* Download latest version of Solr +* Start the Solr nodes in cloud mode +. Create a collection, index documents and query the system +* Create collection with multiple shards and replicas +* Index documents to the newly created collection +* Verify documents presence by querying the collection + +== Before You Start +To use this guide, you must have the following: + +* An https://aws.amazon.com[AWS] account. +* Familiarity with setting up a single-node SolrCloud on local machine. Refer to the <> if you have never used Solr before. + +== Launch EC2 instances + +=== Create new Security Group + +. Navigate to the https://console.aws.amazon.com/ec2/v2/home[AWS EC2 console] and to the region of your choice. + . Configure an http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-network-security.html[AWS security group] which will limit access to the installation and allow our launched EC2 instances to talk to each other without restrictions. + .. From the EC2 Dashboard, click btn:[Security Groups] from the left-hand menu, under "Network & Security". + .. Click btn:[Create Security Group] under the _Security Groups_ section. Give your security group a descriptive name. + .. You can select one of the existing https://aws.amazon.com/vpc[VPCs] or create a new one. + .. We need two ports open for our cloud here: + ... Solr port. In this example we will use Solr's default port 8983. + ... ZooKeeper Port: We'll use Solr's embedded ZooKeeper, so we'll use the default port 9983 (see the <> to configure external ZooKeeper). +.. Click btn:[Inbound] to set inbound network rules, then select btn:[Add Rule]. Select "Custom TCP" as the type. Enter 8983 for the "Port Range" and choose "My IP for the Source, then enter your public IP. Create a second rule with the same type and source, but enter 9983 for the port. ++ +This will limit access to your current machine. If you want wider access to the instance in order to collaborate with others, you can specify that, but make sure you only allow as much access as needed. A Solr instance should not be exposed to general Internet traffic. +.. Add another rule for SSH access. Choose "SSH" as the type, and again "My IP" for the source and again enter your public IP. You need SSH access on all instances to install and configure Solr. +.. Review the details, your group configuration should look like this: ++ +image::images/aws-solrcloud-tutorial/aws-security-create.png[image,width=600,height=400] +.. Click btn:[Create] when finished. +.. We need to modify the rules so that instances that are part of the group can talk to all other instances that are part of the same group. We could not do this while creating the group, so we need to edit the group after creating it to add this. +... Select the newly created group in the Security Group overview table. Under the "Inbound" tab, click btn:[Edit]. +... Click btn:[Add rule]. Choose `All TCP` from the pulldown list for the type, and enter `0-65535` for the port range. Specify the name of the current Security Group as the `solr-sample`. +.. Review the details, your group configuration should now look like this: ++ +image::images/aws-solrcloud-tutorial/aws-security-edit.png[image,width=600,height=400] +.. Click btn:[Save] when finished. + +=== Configure Instances and Launch + +Once the security group is in place, you can choose btn:[Instances] from the left-hand navigation menu. + +Under Instances, click btn:[Launch Instance] button and follow the wizard steps: + +. Choose your Amazon Machine Image (AMI): +Choose *Amazon Linux AMI, SSD Volume Type* as the AMI. There are both commercial AMIs and Community based AMIs available, e.g., Amazon Linux AMI (HVM), SSD Volume Type, but this is a nice AMI to use for our purposes. Click btn:[Select] next to the image you choose. +. The next screen asks you to choose the instance type, *t2.medium* is sufficient. Choose it from the list, then click btn:[Configure Instance Details]. +. Configure the instance. Enter *2* in the "Number of instances" field. Make sure the setting for "Auto-assign Public IP" is "Enabled". +. When finished, click btn:[Add Storage]. The default of *8 GB* for size and *General Purpose SSD* for the volume type is sufficient for running this quick start. Optionally select "Delete on termination" if you know you won't need the data stored in Solr indexes after you terminate the instances. +. When finished, click btn:[Add Tags]. You do not have to add any tags for this quick start, but you can add them if you want. + . Click btn:[Configure Security Group]. Choose *Select an existing security group* and select the security group you created earlier: `solr-sample`. You should see the expected inbound rules at the bottom of the page. +. Click btn:[Review]. +. If everything looks correct, click btn:[Launch]. +. Select an existing “private key file” or create a new one and download to your local machine so you will be able to login into the instances via SSH. ++ +image::images/aws-solrcloud-tutorial/aws-key.png[image,width=600,height=400] +. On the instances list, you can watch the states change. You cannot use the instances until they become *“running”*. + + +== Install, Configure and Start + +. Locate the Public DNS record for the instance by selecting the instance from the list of instances, and log on to each machine one by one. ++ +Using SSH, if your AWS identity key file is `aws-key.pem` and the AMI uses `ec2-user` as login user, on each AWS instance, do the following: ++ +[source,bash] +$ ssh-add aws-key.pem +$ ssh -A ec2-user@ ++ +. While logged in to each of the AWS EC2 instances, configure Java 1.8 and download Solr: ++ +[source,bash] +# verify default java version packaged with AWS instances is 1.7 +$ java -version +$ sudo yum install java-1.8.0 +$ sudo /usr/sbin/alternatives --config java +# select jdk-1.8 +# verify default java version to java-1.8 +$ java -version ++ +[source,bash,subs="verbatim,attributes+"] +# download desired version of Solr +$ wget http://archive.apache.org/dist/lucene/solr/{solr-docs-version}.0/solr-{solr-docs-version}.0.tgz +# untar +$ tar -zxvf solr-{solr-docs-version}.0.tgz +# set SOLR_HOME +$ export SOLR_HOME=$PWD/solr-{solr-docs-version}.0 +# put the env variable in .bashrc +# vim ~/.bashrc +export SOLR_HOME=/home/ec2-user/solr-{solr-docs-version}.0 + +. Resolve the Public DNS to simpler hostnames. ++ +Let’s assume AWS instances public DNS with IPv4 Public IP are as follows: ++ +* ec2-54-1-2-3.us-east-2.compute.amazonaws.com: 54.1.2.3 +* ec2-54-4-5-6.us-east-2.compute.amazonaws.com: 54.4.5.6 ++ +Edit `/etc/hosts`, and add entries for the above machines: ++ +[source,bash] +$ sudo vim /etc/hosts +54.1.2.3 solr-node-1 +54.4.5.6 solr-node-2 + +. Configure Solr in running EC2 instances. ++ +In this case, one of the machines will host ZooKeeper embedded along with Solr node, say, `ec2-101-1-2-3.us-east-2.compute.amazonaws.com` (aka, `solr-node-1`) ++ +See <> for configure external ZooKeeper. ++ +Inside the `ec2-101-1-2-3.us-east-2.compute.amazonaws.com` (`solr-node-1`) ++ +[source,bash] +$ cd $SOLR_HOME +# start Solr node on 8983 and ZooKeeper will start on 8983+1000 9983 +$ bin/solr start -c -p 8983 -h solr-node-1 + ++ +On the other node, `ec2-101-4-5-6.us-east-2.compute.amazonaws.com` (`solr-node-2`) ++ +[source,bash] +$ cd $SOLR_HOME +# start Solr node on 8983 and connect to ZooKeeper running on first node +$ bin/solr start -c -p 8983 -h solr-node-2 -z solr-node-1:9983 + +. Inspect and Verify. Inspect the Solr nodes state from browser on local machine: ++ +Go to: ++ +[source,bash] +---- +http://ec2-101-1-2-3.us-east-2.compute.amazonaws.com:8983/solr (solr-node-1:8983/solr) + +http://ec2-101-4-5-6.us-east-2.compute.amazonaws.com:8983/solr (solr-node-2:8983/solr) +---- ++ +You should able to see Solr UI dashboard for both nodes. + +== Create Collection, Index and Query + +You can refer <> for an extensive walkthrough on creating collections with multiple shards and replicas, indexing data via different methods and querying documents accordingly. + +[[appendix]] +== Appendix: Deploy with External ZooKeeper + +If you want to configure an external ZooKeeper ensemble to avoid using the embedded single-instance ZooKeeper that runs in the same JVM as the Solr node, you need to make few tweaks in the above listed steps as follows. + +* When creating the security group, instead of opening port `9983` for ZooKeeper, you'll open `2181` (or whatever port you are using for ZooKeeper: it's default is 2181). +* When configuring the number of instances to launch, choose to open 3 instances instead of 2. +* When modifying the `/etc/hosts` on each machine, add a third line for the 3rd instance and give it a recognizable name: ++ +[source,text,subs="verbatim"] +$ sudo vim /etc/hosts +54.1.2.3 solr-node-1 +54.4.5.6 solr-node-2 +54.7.8.9 zookeeper-node + +* You'll need to install ZooKeeper manually, described in the next section. + +=== Install ZooKeeper + +These steps will help you install and configure a single instance of ZooKeeper on AWS. This is not sufficient for a production, use, however, where a ZooKeeper ensemble of at least three nodes is recommended. See the section <> for information about how to change this single-instance into an ensemble. + +. Download a stable version of ZooKeeper. In this example we're using ZooKeeper v3.4.6. On the node you're using to host ZooKeeper (`zookeeper-node`), download the package and untar it: ++ +[source,bash] +---- +# download stable version of ZooKeeper, here 3.4.6 +$ wget https://archive.apache.org/dist/zookeeper/zookeeper-3.4.6/zookeeper-3.4.6.tar.gz +# untar +$ tar -zxvf zookeeper-3.4.6.tar.gz +---- ++ +Add an environment variable for ZooKeeper's home directory (`ZOO_HOME`) to the `.bashrc` for the user who will be running the process. The rest of the instructions assume you have set this variable. Correct the path to the ZooKeeper installation as appropriate if where you put it does not match the below. ++ +[source,bash] +---- +$ export ZOO_HOME=$PWD/zookeeper-3.4.6 +# put the env variable in .bashrc +# vim ~/.bashrc +export ZOO_HOME=/home/ec2-user/zookeeper-3.4.6 +---- +. Change directories to `ZOO_HOME`, and create the ZooKeeper configuration by using the template provided by ZooKeeper. ++ +[source,bash] +---- +$ cd $ZOO_HOME +# create ZooKeeper config by using zoo_sample.cfg +$ cp conf/zoo_sample.cfg conf/zoo.cfg +---- +. Create the ZooKeeper data directory in the filesystem, and edit the `zoo.cfg` file to uncomment the autopurge parameters and define the location of the data directory. ++ +[source,bash] +---- +# create data dir for ZooKeeper, edit zoo.cfg, uncomment autopurge parameters +$ mkdir data +$ vim conf/zoo.cfg +# -- uncomment -- +autopurge.snapRetainCount=3 +autopurge.purgeInterval=1 +# -- edit -- +dataDir=data +---- +. Start ZooKeeper. ++ +[source,bash] +---- +$ cd $ZOO_HOME +# start ZooKeeper, default port: 2181 +$ bin/zkServer.sh start +---- + +. On the the first node being used for Solr (`solr-node-1`), start Solr and tell it where to find ZooKeeper. ++ +[source,bash] +---- +$ cd $SOLR_HOME +# start Solr node on 8983 and connect to ZooKeeper running on ZooKeeper node +$ bin/solr start -c -p 8983 -h solr-node-1 -z zookeeper-node:2181 +---- ++ +. On the second Solr node (`solr-node-2`), again start Solr and tell it where to find ZooKeeper. ++ +[source,bash] +---- +$ cd $SOLR_HOME +# start Solr node on 8983 and connect to ZooKeeper running on ZooKeeper node +$ bin/solr start -c -p 8983 -h solr-node-1 -z zookeeper-node:2181 +---- + +[TIP] +==== +As noted earlier, a single ZooKeeper node is not sufficient for a production installation. See these additional resources for more information about deploying Solr in production, which can be used once you have the EC2 instances up and running: + +* <> +* <> +==== diff --git a/solr/solr-ref-guide/src/deployment-and-operations.adoc b/solr/solr-ref-guide/src/deployment-and-operations.adoc index 67a405fcbe3..c92352767c8 100644 --- a/solr/solr-ref-guide/src/deployment-and-operations.adoc +++ b/solr/solr-ref-guide/src/deployment-and-operations.adoc @@ -1,5 +1,5 @@ = Deployment and Operations -:page-children: solr-control-script-reference, solr-configuration-files, taking-solr-to-production, making-and-restoring-backups, running-solr-on-hdfs, upgrading-a-solr-cluster, solr-upgrade-notes +:page-children: solr-control-script-reference, solr-configuration-files, taking-solr-to-production, making-and-restoring-backups, running-solr-on-hdfs, aws-solrcloud-tutorial, upgrading-a-solr-cluster, solr-upgrade-notes // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -31,6 +31,8 @@ Common administrative tasks include: <>: How to use HDFS to store your Solr indexes and transaction logs. +<>: A tutorial on deploying Solr in Amazon Web Services (AWS) using EC2 instances. + <>: Information for upgrading a production SolrCloud cluster. <>: Information about changes made in Solr releases. diff --git a/solr/solr-ref-guide/src/how-to-contribute.adoc b/solr/solr-ref-guide/src/how-to-contribute.adoc index 0116b66d164..7d4485bdaa3 100644 --- a/solr/solr-ref-guide/src/how-to-contribute.adoc +++ b/solr/solr-ref-guide/src/how-to-contribute.adoc @@ -1,6 +1,6 @@ = How to Contribute to Solr Documentation :page-tocclass: right -:page-toclevels: 2 +:page-toclevels: 3 // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -46,5 +46,5 @@ include::meta-docs/jekyll.adoc[leveloffset=+2] include::meta-docs/pdf.adoc[leveloffset=+2] == Building & Publishing the Guide -include::meta-docs/publish.adoc[leveloffset=+2] +include::meta-docs/publish.adoc[leveloffset=+1] endif::[] diff --git a/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-key.png b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-key.png new file mode 100644 index 00000000000..e7b3ab2b52d Binary files /dev/null and b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-key.png differ diff --git a/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-create.png b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-create.png new file mode 100644 index 00000000000..263bd736d6f Binary files /dev/null and b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-create.png differ diff --git a/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-edit.png b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-edit.png new file mode 100644 index 00000000000..2cd3237226c Binary files /dev/null and b/solr/solr-ref-guide/src/images/aws-solrcloud-tutorial/aws-security-edit.png differ diff --git a/solr/solr-ref-guide/src/meta-docs/publish.adoc b/solr/solr-ref-guide/src/meta-docs/publish.adoc index 5476fc30742..d97754e1484 100644 --- a/solr/solr-ref-guide/src/meta-docs/publish.adoc +++ b/solr/solr-ref-guide/src/meta-docs/publish.adoc @@ -1,4 +1,4 @@ -= Publication Process += Publication Steps for Each Process // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -92,7 +92,7 @@ Votes must be sent to the lucene-dev mailing list (`dev@lucene.apache.org`). . You can add your own +1 to the vote announcement email. . If there are issues that need to be resolved, you can start the process over, using RC1, RC2, etc., as needed. -Ideally, the HTML version will also be available for voters to evaluate, see the section <> below for details of how to do that. +NOTE: Ideally, the HTML version will also be available for voters to evaluate. See the section <> below for details of how to do that. === Publish to Production & Archive Old Versions @@ -144,7 +144,24 @@ WARNING: You must send the announcement email from your @apache.org email addres Always use the link to the download redirector for the announcement, as it will automatically direct users to the closest mirror for download: `https://www.apache.org/dyn/closer.cgi/lucene/solr/ref-guide/apache-solr-ref-guide-X.Y.pdf`. -You could also include a link to the HTML version in your announcement, if the publication process for that has finished. +You should include a link to the HTML version in your announcement. There are additional steps to modify the website for the HTML version, so see <> below for details. + +.Sample announcement +[source,text] +---- +The Lucene PMC is pleased to announce that the Solr Reference Guide +for 7.0 is now available. + +This 1,035-page PDF is the definitive guide to using Apache Solr, the +search server built on Lucene. + +The PDF Guide can be downloaded from: +https://www.apache.org/dyn/closer.cgi/lucene/solr/ref-guide/apache-solr-ref-guide-7.0.pdf. + +It is also available online at https://lucene.apache.org/solr/guide/7_0. +---- + +If the Guide is being published more than a day or two after the application itself, you should update the Solr website news page with the announcement (https://lucene.apache.org/solr/news.html). == Publishing HTML Version The steps to publish the Guide differ depending on if it is the first time the Guide has been published or if it is an update to an already published Guide. @@ -192,13 +209,21 @@ The `extpaths.txt` works by listing paths that should be ignored when the CMS sy Production URL: https://lucene.apache.org/extpaths.txt -==== Update Ref Guide Landing Page +=== Release Steps for HTML Version -Update the landing page at https://lucene.apache.org/solr/guide to link to the newest version. +When the PDF is announced as available, the HTML version should already be available on the Solr website. There are a few steps to take to make the new HTML version the default. -You can use the CMS system for this since it is a small change, or you can edit the file locally and commit it to the staging repo. +TIP: You can use the CMS system for these changes, or you can edit the file locally and commit it to the staging repo. -=== Update a Published Guide +. Update the landing page at https://lucene.apache.org/solr/guide (the file is at `content/solr/guide/index.mdtext` in SVN) to link to the newest version. +. Update the Guide redirect rule that looks like the below in `content/.htaccess` so URLs without a version in the path are redirected to the latest available version. ++ +[source,text] +RedirectMatch temp /solr/guide/(?!index.html)([a-z].*) /solr/guide/7_0/$1 ++ +In the above example, you would change the `7_0` part of the path to the right version (`7_1`, etc.). + +=== Updating Files in an Already-Published Guide If you need to re-publish an existing online copy of the Guide, you will need to checkout the directory in production website repository and overwrite the existing files: diff --git a/solr/solr-ref-guide/src/searching.adoc b/solr/solr-ref-guide/src/searching.adoc index 6b9c49c6165..724f379202c 100644 --- a/solr/solr-ref-guide/src/searching.adoc +++ b/solr/solr-ref-guide/src/searching.adoc @@ -1,5 +1,5 @@ = Searching -:page-children: overview-of-searching-in-solr, velocity-search-ui, relevance, query-syntax-and-parsing, json-request-api, faceting, highlighting, spell-checking, query-re-ranking, transforming-result-documents, suggester, morelikethis, pagination-of-results, collapse-and-expand-results, result-grouping, result-clustering, spatial-search, the-terms-component, the-term-vector-component, the-stats-component, the-query-elevation-component, response-writers, near-real-time-searching, realtime-get, exporting-result-sets, streaming-expressions, parallel-sql-interface +:page-children: overview-of-searching-in-solr, velocity-search-ui, relevance, query-syntax-and-parsing, json-request-api, faceting, highlighting, spell-checking, query-re-ranking, transforming-result-documents, suggester, morelikethis, pagination-of-results, collapse-and-expand-results, result-grouping, result-clustering, spatial-search, the-terms-component, the-term-vector-component, the-stats-component, the-query-elevation-component, response-writers, near-real-time-searching, realtime-get, exporting-result-sets, streaming-expressions, parallel-sql-interface, analytics // Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information @@ -55,3 +55,4 @@ This section describes how Solr works with search requests. It covers the follow * <>: Functionality to export large result sets out of Solr. * <>: A stream processing language for Solr, with a suite of functions to perform many types of queries and parallel execution tasks. * <>: An interface for sending SQL statements to Solr, and using advanced parallel query processing and relational algebra for complex data analysis. +* <>: A framework to compute complex analytics over a result set. diff --git a/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java b/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java index c5dcac28f07..0dc88d70f17 100644 --- a/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java +++ b/solr/solr-ref-guide/tools/CheckLinksAndAnchors.java @@ -45,18 +45,41 @@ import org.jsoup.select.Elements; import org.jsoup.select.NodeVisitor; /** - * Check various things regarding links in the generated HTML site. + * Check various things regarding anchors & links in the generated HTML site. *

    * Asciidoctor doesn't do a good job of rectifying situations where multiple documents are included in one * massive (PDF) document may have identical anchors (either explicitly defined, or implicitly defined because of * section headings). Asciidoctor also doesn't support linking directly to another (included) document by name, - * unless there is an explicit '#fragement' used inthe link. + * unless there is an explicit '#fragement' used in the link. *

    *

    * This tool parses the generated HTML site, looking for these situations in order to fail the build -- since the - * equivilent PDF will be broken. It also does sme general check of the relative URLs to ensure the destination + * equivilent PDF will be broken. It also does some general check of the relative URLs to ensure the destination * files/anchors actaully exist. *

    + *

    + * This tool supports 2 modes, depending on wether you want to run it against the HTML generated by Jekyll, or + * the "bare bones" HTML generated directly by asciidoctor... + *

    + *
      + *
    • Jekyll Mode: + *
        + *
      • CheckLinksAndAnchors html-dir-name/ [false]
      • + *
      • Requires all html pages have a "main-content" div; ignores all links & anchors that + * are not decendents of this div (to exclude redundent template based header, footer, & sidebar links) + *
      • + *
      • Expects that the <body/> tag will have an id matching the page shortname.
      • + *
      + *
    • + *
    • Bare Bones Mode: + *
        + *
      • CheckLinksAndAnchors html-dir-name/ true
      • + *
      • Checks all links & anchors in the page.
      • + *
      • "Fakes" the existence of a <body id="..."> tag containing the page shortname.
      • + *
      + *
    • + *
    + * * * TODO: build a list of all known external links so that some other tool could (optionally) ping them all for 200 status? * @@ -74,11 +97,12 @@ public class CheckLinksAndAnchors { public static void main(String[] args) throws Exception { int problems = 0; - if (args.length != 1) { - System.err.println("usage: CheckLinksAndAnchors "); + if (args.length < 1 || 2 < args.length ) { + System.err.println("usage: CheckLinksAndAnchors []"); System.exit(-1); } final File htmlDir = new File(args[0]); + final boolean bareBones = (2 == args.length) ? Boolean.parseBoolean(args[1]) : false; final File[] pages = htmlDir.listFiles(new HtmlFileFilter()); if (0 == pages.length) { @@ -89,6 +113,9 @@ public class CheckLinksAndAnchors { final Map> idsToFiles = new HashMap<>(); final Map> filesToRelativeLinks = new HashMap<>(); final Set idsInMultiFiles = new HashSet<>(0); + + int totalLinks = 0; + int totalRelativeLinks = 0; for (File file : pages) { //System.out.println("input File URI: " + file.toURI().toString()); @@ -99,25 +126,47 @@ public class CheckLinksAndAnchors { final String fileContents = readFile(file.getPath()); final Document doc = Jsoup.parse(fileContents); - // we only care about class='main-content' -- we don't want to worry + + // For Jekyll, we only care about class='main-content' -- we don't want to worry // about ids/links duplicated in the header/footer of every page, - final Element mainContent = doc.select(".main-content").first(); + final String mainContentSelector = bareBones ? "body" : ".main-content"; + final Element mainContent = doc.select(mainContentSelector).first(); if (mainContent == null) { - throw new RuntimeException(file.getName() + " has no main-content div"); + throw new RuntimeException(file.getName() + " has no main content: " + mainContentSelector); } // Add all of the IDs in (the main-content of) this doc to idsToFiles (and idsInMultiFiles if needed) final Elements nodesWithIds = mainContent.select("[id]"); - // NOTE: add to the nodesWithIds so we check the main section anchor as well - nodesWithIds.addAll(doc.select("body[id]")); + + if (bareBones) { + // It's a pain in the ass to customize the HTML output structure asciidoctor's bare-bones html5 backend + // so instead we "fake" that the body tag contains the attribute we use in jekyll + // (and what gets added explicitly to each top level section in the PDF) + nodesWithIds.add(new Element(Tag.valueOf("body"), "").attr("id", file.getName().replaceAll("\\.html$",""))); + } else { + // We have to add Jekyll's to the nodesWithIds so we check the main section anchor as well + // since we've already + nodesWithIds.addAll(doc.select("body[id]")); + } + + boolean foundPreamble = false; for (Element node : nodesWithIds) { final String id = node.id(); assert null != id; assert 0 != id.length(); - // special case ids that we ignore + // special case id: we ignore the first 'preamble' because + // it's part of the core markup that asciidoctor always uses + // if we find it a second time in a single page, fail with a special error... if (id.equals("preamble")) { - continue; + if (foundPreamble) { + problems++; + System.err.println(file.toURI().toString() + + " contains 'preamble' anchor, this is special in jekyll and must not be used in content."); + } else { + foundPreamble = true; + continue; + } } if (idsToFiles.containsKey(id)) { @@ -131,6 +180,7 @@ public class CheckLinksAndAnchors { // check for (relative) links that don't include a fragment final Elements links = mainContent.select("a[href]"); for (Element link : links) { + totalLinks++; final String href = link.attr("href"); if (0 == href.length()) { problems++; @@ -139,6 +189,7 @@ public class CheckLinksAndAnchors { try { final URI uri = new URI(href); if (! uri.isAbsolute()) { + totalRelativeLinks++; final String frag = uri.getFragment(); if (null == frag || "".equals(frag)) { // we must have a fragment for intra-page links to work correctly @@ -200,7 +251,8 @@ public class CheckLinksAndAnchors { } } - + System.err.println("Processed " + totalLinks + " links (" + totalRelativeLinks + " relative) to " + + idsToFiles.size() + " anchors in " + pages.length + " files"); if (0 < problems) { System.err.println("Total of " + problems + " problems found"); System.exit(-1); diff --git a/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java b/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java new file mode 100644 index 00000000000..5c1d700676d --- /dev/null +++ b/solr/solr-ref-guide/tools/CustomizedAsciidoctorAntTask.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.asciidoctor.ant.AsciidoctorAntTask; + +/** + * Customized version of the default AsciidoctorAntTask + * To deal with the fact that we want sourceDocumentName="" treated the same as unspecified (ie: null) + * in order to be able to wrap in a macro with defaults + */ +public class CustomizedAsciidoctorAntTask extends AsciidoctorAntTask { + @SuppressWarnings("UnusedDeclaration") + public void setSourceDocumentName(String sourceDocumentName) { + if ("".equals(sourceDocumentName)) { + sourceDocumentName = null; + } + super.setSourceDocumentName(sourceDocumentName); + } +} + + diff --git a/solr/solr-ref-guide/tools/asciidoctor-antlib.xml b/solr/solr-ref-guide/tools/asciidoctor-antlib.xml new file mode 100644 index 00000000000..d67e3e15e33 --- /dev/null +++ b/solr/solr-ref-guide/tools/asciidoctor-antlib.xml @@ -0,0 +1,22 @@ + + + + + diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrRequest.java index bccf2edbfb2..37ce16631f7 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrRequest.java @@ -154,8 +154,19 @@ public abstract class SolrRequest implements Serializabl public abstract SolrParams getParams(); - public abstract Collection getContentStreams() throws IOException; + /** + * @deprecated Please use {@link SolrRequest#getContentWriter(String)} instead. + */ + @Deprecated + public Collection getContentStreams() throws IOException { + return null; + } + /** + * If a request object wants to do a push write, implement this method. + * + * @param expectedType This is the type that the RequestWriter would like to get. But, it is OK to send any format + */ public RequestWriter.ContentWriter getContentWriter(String expectedType) { return null; } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AutoScalingConfig.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AutoScalingConfig.java index 5312c291eb4..f8ab422725d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AutoScalingConfig.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/AutoScalingConfig.java @@ -135,6 +135,11 @@ public class AutoScalingConfig implements MapWriter { if (!afterActions.equals(that.afterActions)) return false; return properties.equals(that.properties); } + + @Override + public String toString() { + return Utils.toJSONString(this); + } } /** @@ -232,6 +237,11 @@ public class AutoScalingConfig implements MapWriter { ew.put(entry.getKey(), entry.getValue()); } } + + @Override + public String toString() { + return Utils.toJSONString(this); + } } /** @@ -278,6 +288,11 @@ public class AutoScalingConfig implements MapWriter { return properties.equals(that.properties); } + + @Override + public String toString() { + return Utils.toJSONString(this); + } } /** diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java index 398571da204..542c876f0c9 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/BinaryRequestWriter.java @@ -27,6 +27,8 @@ import org.apache.solr.client.solrj.request.RequestWriter; import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.util.ContentStream; +import static org.apache.solr.common.params.CommonParams.JAVABIN_MIME; + /** * A RequestWriter which writes requests in the javabin format * @@ -49,11 +51,11 @@ public class BinaryRequestWriter extends RequestWriter { @Override public String getContentType() { - return "application/javabin"; + return JAVABIN_MIME; } }; } else { - return req.getContentWriter("application/javabin"); + return req.getContentWriter(JAVABIN_MIME); } } @@ -71,7 +73,7 @@ public class BinaryRequestWriter extends RequestWriter { @Override public String getUpdateContentType() { - return "application/javabin"; + return JAVABIN_MIME; } @Override diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/GrandSumEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/GrandSumEvaluator.java new file mode 100644 index 00000000000..754ca0be78e --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/GrandSumEvaluator.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.Locale; + +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class GrandSumEvaluator extends RecursiveObjectEvaluator implements OneValueWorker { + protected static final long serialVersionUID = 1L; + + public GrandSumEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ + super(expression, factory); + + if(1 != containedEvaluators.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting exactly 1 value but found %d",expression,containedEvaluators.size())); + } + } + + @Override + public Object doWork(Object value) throws IOException{ + if(null == value){ + return null; + } else if (value instanceof Matrix) { + Matrix matrix = (Matrix) value; + double[][] data = matrix.getData(); + double grandSum = 0; + for(double[] row : data) { + for(double d : row) { + grandSum += d; + } + } + return grandSum; + } else { + throw new IOException("Grand sum function only operates on a matrix"); + } + } +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MarkovChainEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MarkovChainEvaluator.java new file mode 100644 index 00000000000..714bdde2311 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/MarkovChainEvaluator.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; + +import java.util.Locale; +import java.util.Random; + +import org.apache.commons.math3.distribution.EnumeratedIntegerDistribution; +import org.apache.commons.math3.util.MathArrays; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class MarkovChainEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker { + protected static final long serialVersionUID = 1L; + + public MarkovChainEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ + super(expression, factory); + + if(2 < containedEvaluators.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting no more then two parameters but found %d",expression,containedEvaluators.size())); + } + } + + @Override + public Object doWork(Object... values) throws IOException{ + + int state = -1; + + if(values.length == 2) { + state = ((Number)values[1]).intValue(); + } + + if(values[0] instanceof Matrix) { + Matrix matrix = (Matrix) values[0]; + return new MarkovChain(matrix, state); + } else { + throw new IOException("matrix parameter expected for markovChain function"); + } + } + + public static class MarkovChain { + + private int state; + private EnumeratedIntegerDistribution[] distributions; + + public MarkovChain(Matrix matrix, int state) throws IOException { + double[][] data = matrix.getData(); + + if(data.length != data[0].length) { + throw new IOException("markovChain must be initialized with a square matrix."); + } + + this.distributions = new EnumeratedIntegerDistribution[data.length]; + + if(state > -1) { + this.state = state; + } else { + this.state = new Random().nextInt(data.length); + } + + for(int i=0; i vec = (List)values[0]; + double[] data = new double[vec.size()]; + + for(int i=0; i scaled = new ArrayList(data.length); + for(double d : data) { + scaled.add(d); + } + + return scaled; + } else { + throw new IOException(); + } + } + + private double[] scale(double[] values, double min, double max) { + + double localMin = Double.MAX_VALUE; + double localMax = Double.MIN_VALUE; + for (double d : values) { + if (d > localMax) { + localMax = d; + } + + if (d < localMin) { + localMin = d; + } + } + + //First scale between 0 and 1 + + double[] scaled = new double[values.length]; + + for (int i = 0; i < scaled.length; i++) { + double x = values[i]; + double s = (x - localMin) / (localMax - localMin); + scaled[i] = s; + } + + if (min != 0 || max != 1) { + //Next scale between specific min/max + double scale = max - min; + + for (int i = 0; i < scaled.length; i++) { + double d = scaled[i]; + scaled[i] = (scale * d) + min; + } + } + + return scaled; + } +} diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java index afe4c5dab67..65629cdde90 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeEvaluator.java @@ -27,7 +27,7 @@ import org.apache.commons.math3.stat.StatUtils; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; -public class NormalizeEvaluator extends RecursiveNumericEvaluator implements OneValueWorker { +public class NormalizeEvaluator extends RecursiveObjectEvaluator implements OneValueWorker { protected static final long serialVersionUID = 1L; public NormalizeEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ @@ -45,8 +45,16 @@ public class NormalizeEvaluator extends RecursiveNumericEvaluator implements One } else if(value instanceof List){ return Arrays.stream(StatUtils.normalize(((List)value).stream().mapToDouble(innerValue -> ((Number)innerValue).doubleValue()).toArray())).mapToObj(Double::new).collect(Collectors.toList()); - } - else{ + } else if (value instanceof Matrix) { + Matrix matrix = (Matrix) value; + double[][] data = matrix.getData(); + double[][] standardized = new double[data.length][]; + for(int i=0; i item).collect(Collectors.toList()); + } else { + return markovChain.sample(); + } + } else if (first instanceof RealDistribution) { RealDistribution realDistribution = (RealDistribution) first; if(second != null) { return Arrays.stream(realDistribution.sample(((Number) second).intValue())).mapToObj(item -> item).collect(Collectors.toList()); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScalarAddEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScalarAddEvaluator.java new file mode 100644 index 00000000000..3ae3436fccd --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ScalarAddEvaluator.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; + +public class ScalarAddEvaluator extends RecursiveObjectEvaluator implements TwoValueWorker { + protected static final long serialVersionUID = 1L; + + public ScalarAddEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ + super(expression, factory); + + if(2 != containedEvaluators.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expects exactly 2 values but found %d",expression,containedEvaluators.size())); + } + } + + @Override + public Object doWork(Object value1, Object value2) throws IOException{ + + double d = ((Number)value1).doubleValue(); + if(value2 instanceof List){ + List nums = (List)value2; + List out = new ArrayList(); + for(Number num : nums) { + out.add(operate(num.doubleValue(), d)); + } + + return out; + + } else if (value2 instanceof Matrix) { + Matrix matrix = (Matrix) value2; + double[][] data = matrix.getData(); + double[][] newData = new double[data.length][]; + for(int i=0; i sums = new ArrayList(data.length); + + for(int i=0; i sums = new ArrayList(data.length); + + for(int i=0; i return params; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } - protected void addProperties(ModifiableSolrParams params, Properties props) { for (String propertyName : props.stringPropertyNames()) { params.set(PROPERTY_PREFIX + propertyName, props.getProperty(propertyName)); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/ConfigSetAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/ConfigSetAdminRequest.java index 2e34f9c24c7..69168777154 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/ConfigSetAdminRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/ConfigSetAdminRequest.java @@ -16,12 +16,9 @@ */ package org.apache.solr.client.solrj.request; -import java.io.IOException; -import java.util.Collection; import java.util.Map; import java.util.Properties; - import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.ConfigSetAdminResponse; @@ -29,7 +26,7 @@ import org.apache.solr.common.params.ConfigSetParams; import org.apache.solr.common.params.ConfigSetParams.ConfigSetAction; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; + import static org.apache.solr.common.params.CommonParams.NAME; /** @@ -68,10 +65,6 @@ public abstract class ConfigSetAdminRequest return params; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } @Override protected abstract R createResponse(SolrClient client); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java index 3967d4170ef..692b54d3e69 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CoreAdminRequest.java @@ -18,7 +18,6 @@ package org.apache.solr.client.solrj.request; import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.List; import org.apache.solr.client.solrj.SolrClient; @@ -32,7 +31,6 @@ import org.apache.solr.common.params.CoreAdminParams; import org.apache.solr.common.params.CoreAdminParams.CoreAdminAction; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; /** * This class is experimental and subject to change. @@ -567,10 +565,6 @@ public class CoreAdminRequest extends SolrRequest { // //--------------------------------------------------------------------------------------- - @Override - public Collection getContentStreams() throws IOException { - return null; - } @Override protected CoreAdminResponse createResponse(SolrClient client) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DelegationTokenRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DelegationTokenRequest.java index 6d2f0cb8c2f..697a6ada342 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DelegationTokenRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DelegationTokenRequest.java @@ -17,20 +17,16 @@ package org.apache.solr.client.solrj.request; -import java.io.IOException; import java.util.Arrays; -import java.util.Collection; import java.util.Set; import java.util.TreeSet; -import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.impl.NoOpResponseParser; import org.apache.solr.client.solrj.response.DelegationTokenResponse; - import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; /** * Class for making Solr delegation token requests. @@ -53,14 +49,6 @@ public abstract class DelegationTokenRequest protected abstract Q getThis(); - /** - * {@inheritDoc} - */ - @Override - public Collection getContentStreams() throws IOException { - return null; - } - @Override protected abstract R createResponse(SolrClient client); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DirectXmlRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DirectXmlRequest.java index 766dfeb157a..ef5e9544d81 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DirectXmlRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DirectXmlRequest.java @@ -18,12 +18,10 @@ package org.apache.solr.client.solrj.request; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; +import org.apache.solr.client.solrj.request.RequestWriter.StringPayloadContentWriter; import org.apache.solr.client.solrj.response.UpdateResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; - -import java.util.Collection; /** * Send arbitrary XML to a request handler @@ -35,16 +33,15 @@ public class DirectXmlRequest extends SolrRequest implements IsU final String xml; private SolrParams params; - - public DirectXmlRequest( String path, String body ) - { + + public DirectXmlRequest(String path, String body) { super( METHOD.POST, path ); xml = body; } @Override - public Collection getContentStreams() { - return ClientUtils.toContentStreams( xml, ClientUtils.TEXT_XML ); + public RequestWriter.ContentWriter getContentWriter(String expectedType) { + return new StringPayloadContentWriter(xml, ClientUtils.TEXT_XML); } @Override diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java index 6de2dc33bf2..82ef54dda7b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/DocumentAnalysisRequest.java @@ -16,6 +16,14 @@ */ package org.apache.solr.client.solrj.request; +import java.io.IOException; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; + import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.DocumentAnalysisResponse; @@ -23,13 +31,8 @@ import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.ContentStream; -import java.io.IOException; -import java.io.StringWriter; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; +import static java.nio.charset.StandardCharsets.UTF_8; /** * A request for the org.apache.solr.handler.DocumentAnalysisRequestHandler. @@ -59,12 +62,26 @@ public class DocumentAnalysisRequest extends SolrRequest getContentStreams() throws IOException { - return ClientUtils.toContentStreams(getXML(), ClientUtils.TEXT_XML); + public RequestWriter.ContentWriter getContentWriter(String expectedType) { + + return new RequestWriter.ContentWriter() { + @Override + public void write(OutputStream os) throws IOException { + OutputStreamWriter outputStreamWriter = new OutputStreamWriter(os, UTF_8); + try { + getXML(outputStreamWriter); + } finally { + outputStreamWriter.flush(); + } + } + + @Override + public String getContentType() { + return ClientUtils.TEXT_XML; + } + }; + } @Override @@ -94,8 +111,8 @@ public class DocumentAnalysisRequest extends SolrRequest"); for (SolrInputDocument document : documents) { ClientUtils.writeXML(document, writer); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java index 8d5e7385ffe..9c4993e119a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/FieldAnalysisRequest.java @@ -16,18 +16,15 @@ */ package org.apache.solr.client.solrj.request; +import java.util.LinkedList; +import java.util.List; + import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.FieldAnalysisResponse; import org.apache.solr.common.params.AnalysisParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; - -import java.io.IOException; -import java.util.Collection; -import java.util.LinkedList; -import java.util.List; /** * A request for the org.apache.solr.handler.FieldAnalysisRequestHandler. @@ -59,13 +56,6 @@ public class FieldAnalysisRequest extends SolrRequest { super(METHOD.GET, uri); } - /** - * {@inheritDoc} - */ - @Override - public Collection getContentStreams() throws IOException { - return null; - } @Override protected FieldAnalysisResponse createResponse(SolrClient client) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/GenericSolrRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/GenericSolrRequest.java index 558d7d41fdf..531e55d547d 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/GenericSolrRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/GenericSolrRequest.java @@ -16,7 +16,6 @@ */ package org.apache.solr.client.solrj.request; -import java.io.IOException; import java.util.Collection; import org.apache.solr.client.solrj.SolrClient; @@ -45,11 +44,6 @@ public class GenericSolrRequest extends SolrRequest { return params; } - @Override - public Collection getContentStreams() throws IOException { - return contentStreams; - } - @Override protected SimpleSolrResponse createResponse(SolrClient client) { return response; diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/HealthCheckRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/HealthCheckRequest.java index 7073167bd54..50d4d8cbae3 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/HealthCheckRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/HealthCheckRequest.java @@ -17,15 +17,11 @@ package org.apache.solr.client.solrj.request; -import java.io.IOException; -import java.util.Collection; - import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.response.HealthCheckResponse; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; import static org.apache.solr.common.params.CommonParams.HEALTH_CHECK_HANDLER_PATH; @@ -44,11 +40,6 @@ public class HealthCheckRequest extends SolrRequest { return null; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } - @Override protected HealthCheckResponse createResponse(SolrClient client) { // TODO: Accept requests w/ CloudSolrClient while ensuring that the request doesn't get routed to diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java index 1ccb18aba33..fe14bce5106 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java @@ -16,17 +16,15 @@ */ package org.apache.solr.client.solrj.request; +import java.util.ArrayList; +import java.util.List; + import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; /** * @@ -90,10 +88,6 @@ public class LukeRequest extends SolrRequest { //--------------------------------------------------------------------------------- //--------------------------------------------------------------------------------- - @Override - public Collection getContentStreams() { - return null; - } @Override protected LukeResponse createResponse(SolrClient client) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/QueryRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/QueryRequest.java index 5b2fe959cc3..1c2fda4f594 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/QueryRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/QueryRequest.java @@ -21,9 +21,6 @@ import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; - -import java.util.Collection; /** * @@ -68,11 +65,6 @@ public class QueryRequest extends SolrRequest { //--------------------------------------------------------------------------------- //--------------------------------------------------------------------------------- - - @Override - public Collection getContentStreams() { - return null; - } @Override protected QueryResponse createResponse(SolrClient client) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java index 5521e29acb3..5d941e06fe7 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/RequestWriter.java @@ -102,7 +102,27 @@ public class RequestWriter { public String getUpdateContentType() { return ClientUtils.TEXT_XML; + } + public static class StringPayloadContentWriter implements ContentWriter { + public final String payload; + public final String type; + + public StringPayloadContentWriter(String payload, String type) { + this.payload = payload; + this.type = type; + } + + @Override + public void write(OutputStream os) throws IOException { + if (payload == null) return; + os.write(payload.getBytes(StandardCharsets.UTF_8)); + } + + @Override + public String getContentType() { + return type; + } } protected boolean isNull(List l) { diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/SolrPing.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/SolrPing.java index 13d9e4caded..43801c61d43 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/SolrPing.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/SolrPing.java @@ -21,9 +21,6 @@ import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.SolrPingResponse; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.ModifiableSolrParams; -import org.apache.solr.common.util.ContentStream; - -import java.util.Collection; /** * Verify that there is a working Solr core at the URL of a {@link org.apache.solr.client.solrj.SolrClient}. @@ -48,11 +45,6 @@ public class SolrPing extends SolrRequest { params = new ModifiableSolrParams(); } - @Override - public Collection getContentStreams() { - return null; - } - @Override protected SolrPingResponse createResponse(SolrClient client) { return new SolrPingResponse(); diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java index 8985c9c554a..b65ce92239a 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/V2Request.java @@ -19,8 +19,6 @@ package org.apache.solr.client.solrj.request; import java.io.IOException; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.util.Collection; import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -30,10 +28,12 @@ import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.response.V2Response; import org.apache.solr.common.MapWriter; import org.apache.solr.common.params.SolrParams; -import org.apache.solr.common.util.ContentStream; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.common.util.Utils; +import static org.apache.solr.common.params.CommonParams.JAVABIN_MIME; +import static org.apache.solr.common.params.CommonParams.JSON_MIME; + public class V2Request extends SolrRequest implements MapWriter { //only for debugging purposes public static final ThreadLocal v2Calls = new ThreadLocal<>(); @@ -64,34 +64,28 @@ public class V2Request extends SolrRequest implements MapWriter { public RequestWriter.ContentWriter getContentWriter(String s) { if (v2Calls.get() != null) v2Calls.get().incrementAndGet(); if (payload == null) return null; + if (payload instanceof String) { + return new RequestWriter.StringPayloadContentWriter((String) payload, JSON_MIME); + + } return new RequestWriter.ContentWriter() { @Override public void write(OutputStream os) throws IOException { - if (payload instanceof String) { - os.write(((String) payload).getBytes(StandardCharsets.UTF_8)); + if (useBinary) { + new JavaBinCodec().marshal(payload, os); } else { - if (useBinary) { - new JavaBinCodec().marshal(payload, os); - } else { - byte[] b = Utils.toJSON(payload); - os.write(b); - } + byte[] b = Utils.toJSON(payload); + os.write(b); } } @Override public String getContentType() { - if (payload instanceof String) return "application/json"; - return useBinary ? "application/javabin" : "application/json"; + return useBinary ? JAVABIN_MIME : JSON_MIME; } }; } - @Override - public Collection getContentStreams() throws IOException { - return null; - } - public boolean isPerCollectionRequest() { return isPerCollectionRequest; } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java index 505a8a60cc1..1645ff29ddd 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java @@ -284,5 +284,8 @@ public interface CommonParams { String VERSION_FIELD="_version_"; String ID = "id"; + String JSON_MIME = "application/json"; + + String JAVABIN_MIME = "application/javabin"; } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java index cfcfcd6227e..e666e495552 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientCacheTest.java @@ -36,11 +36,18 @@ import org.apache.solr.client.solrj.request.UpdateRequest; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.util.NamedList; +import org.junit.BeforeClass; + import static org.mockito.Mockito.*; import static java.nio.charset.StandardCharsets.UTF_8; public class CloudSolrClientCacheTest extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() { + assumeWorkingMockito(); + } public void testCaching() throws Exception { String collName = "gettingstarted"; diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index 5fe860893ed..e52e7812496 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -6066,6 +6066,118 @@ public class StreamExpressionTest extends SolrCloudTestCase { } + @Test + public void testMatrixMath() throws Exception { + String cexpr = "let(echo=true, a=matrix(array(1.5, 2.5, 3.5), array(4.5,5.5,6.5)), " + + "b=grandSum(a), " + + "c=sumRows(a), " + + "d=sumColumns(a), " + + "e=scalarAdd(1, a)," + + "f=scalarSubtract(1, a)," + + "g=scalarMultiply(1.5, a)," + + "h=scalarDivide(1.5, a)," + + "i=scalarAdd(1.5, array(1.5, 2.5, 3.5))," + + "j=scalarSubtract(1.5, array(1.5, 2.5, 3.5))," + + "k=scalarMultiply(1.5, array(1.5, 2.5, 3.5))," + + "l=scalarDivide(1.5, array(1.5, 2.5, 3.5)))"; + + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + + double grandSum = tuples.get(0).getDouble("b"); + assertEquals(grandSum, 24, 0.0); + + List sumRows = (List)tuples.get(0).get("c"); + assertEquals(sumRows.size(), 2); + assertEquals(sumRows.get(0).doubleValue(), 7.5, 0.0); + assertEquals(sumRows.get(1).doubleValue(), 16.5, 0.0); + + List sumCols = (List)tuples.get(0).get("d"); + assertEquals(sumCols.size(), 3); + assertEquals(sumCols.get(0).doubleValue(), 6.0, 0.0); + assertEquals(sumCols.get(1).doubleValue(), 8.0, 0.0); + assertEquals(sumCols.get(2).doubleValue(), 10, 0.0); + + List> scalarAdd = (List>)tuples.get(0).get("e"); + List row1 = scalarAdd.get(0); + assertEquals(row1.size(), 3); + assertEquals(row1.get(0).doubleValue(), 2.5, 0.0); + assertEquals(row1.get(1).doubleValue(), 3.5, 0.0); + assertEquals(row1.get(2).doubleValue(), 4.5, 0.0); + + List row2 = scalarAdd.get(1); + assertEquals(row2.get(0).doubleValue(), 5.5, 0.0); + assertEquals(row2.get(1).doubleValue(), 6.5, 0.0); + assertEquals(row2.get(2).doubleValue(), 7.5, 0.0); + + List> scalarSubtract = (List>)tuples.get(0).get("f"); + row1 = scalarSubtract.get(0); + assertEquals(row1.size(), 3); + assertEquals(row1.get(0).doubleValue(), 0.5, 0.0); + assertEquals(row1.get(1).doubleValue(), 1.5, 0.0); + assertEquals(row1.get(2).doubleValue(), 2.5, 0.0); + + row2 = scalarSubtract.get(1); + assertEquals(row2.get(0).doubleValue(), 3.5, 0.0); + assertEquals(row2.get(1).doubleValue(), 4.5, 0.0); + assertEquals(row2.get(2).doubleValue(), 5.5, 0.0); + + List> scalarMultiply = (List>)tuples.get(0).get("g"); + row1 = scalarMultiply.get(0); + assertEquals(row1.size(), 3); + assertEquals(row1.get(0).doubleValue(), 2.25, 0.0); + assertEquals(row1.get(1).doubleValue(), 3.75, 0.0); + assertEquals(row1.get(2).doubleValue(), 5.25, 0.0); + + row2 = scalarMultiply.get(1); + assertEquals(row2.get(0).doubleValue(), 6.75, 0.0); + assertEquals(row2.get(1).doubleValue(), 8.25, 0.0); + assertEquals(row2.get(2).doubleValue(), 9.75, 0.0); + + List> scalarDivide = (List>)tuples.get(0).get("h"); + row1 = scalarDivide.get(0); + assertEquals(row1.size(), 3); + assertEquals(row1.get(0).doubleValue(), 1.0, 0.0); + assertEquals(row1.get(1).doubleValue(), 1.66666666666667, 0.001); + assertEquals(row1.get(2).doubleValue(), 2.33333333333333, 0.001); + + row2 = scalarDivide.get(1); + assertEquals(row2.get(0).doubleValue(), 3, 0.0); + assertEquals(row2.get(1).doubleValue(), 3.66666666666667, 0.001); + assertEquals(row2.get(2).doubleValue(), 4.33333333333333, 0.001); + + List rowA = (List)tuples.get(0).get("i"); + assertEquals(rowA.size(), 3); + assertEquals(rowA.get(0).doubleValue(), 3.0, 0.0); + assertEquals(rowA.get(1).doubleValue(), 4.0, 0.0); + assertEquals(rowA.get(2).doubleValue(), 5.0, 0.0); + + rowA = (List)tuples.get(0).get("j"); + assertEquals(rowA.size(), 3); + assertEquals(rowA.get(0).doubleValue(), 0, 0.0); + assertEquals(rowA.get(1).doubleValue(), 1.0, 0.0); + assertEquals(rowA.get(2).doubleValue(), 2.0, 0.0); + + rowA = (List)tuples.get(0).get("k"); + assertEquals(rowA.size(), 3); + assertEquals(rowA.get(0).doubleValue(), 2.25, 0.0); + assertEquals(rowA.get(1).doubleValue(), 3.75, 0.0); + assertEquals(rowA.get(2).doubleValue(), 5.25, 0.0); + + rowA = (List)tuples.get(0).get("l"); + assertEquals(rowA.size(), 3); + assertEquals(rowA.get(0).doubleValue(), 1.0, 0.0); + assertEquals(rowA.get(1).doubleValue(), 1.66666666666667, 0.001); + assertEquals(rowA.get(2).doubleValue(), 2.33333333333333, 0.001); + } + @Test public void testTranspose() throws Exception { String cexpr = "let(a=matrix(array(1,2,3), array(4,5,6)), b=transpose(a))"; @@ -6097,8 +6209,8 @@ public class StreamExpressionTest extends SolrCloudTestCase { } @Test - public void testUnit() throws Exception { - String cexpr = "let(echo=true, a=unit(matrix(array(1,2,3), array(4,5,6))), b=unit(array(4,5,6)))"; + public void testUnitize() throws Exception { + String cexpr = "let(echo=true, a=unitize(matrix(array(1,2,3), array(4,5,6))), b=unitize(array(4,5,6)))"; ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); paramsLoc.set("expr", cexpr); paramsLoc.set("qt", "/stream"); @@ -6129,6 +6241,66 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(array2.get(2).doubleValue(), 0.6837634587578276, 0.0); } + @Test + public void testStandardize() throws Exception { + String cexpr = "let(echo=true, a=standardize(matrix(array(1,2,3), array(4,5,6))), b=standardize(array(4,5,6)))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List> out = (List>)tuples.get(0).get("a"); + assertEquals(out.size(), 2); + List array1 = out.get(0); + assertEquals(array1.size(), 3); + assertEquals(array1.get(0).doubleValue(), -1, 0.0); + assertEquals(array1.get(1).doubleValue(), 0, 0.0); + assertEquals(array1.get(2).doubleValue(), 1, 0.0); + + List array2 = out.get(1); + assertEquals(array2.size(), 3); + assertEquals(array2.get(0).doubleValue(), -1, 0.0); + assertEquals(array2.get(1).doubleValue(), 0, 0.0); + assertEquals(array2.get(2).doubleValue(), 1, 0.0); + + List array3 = (List)tuples.get(0).get("b"); + assertEquals(array3.size(), 3); + assertEquals(array2.get(0).doubleValue(), -1, 0.0); + assertEquals(array2.get(1).doubleValue(), 0, 0.0); + assertEquals(array2.get(2).doubleValue(), 1, 0.0); + } + + @Test + public void testMarkovChain() throws Exception { + String cexpr = "let(state0=array(.5,.5),\n" + + " state1=array(.5,.5),\n" + + " states=matrix(state0, state1),\n" + + " m=markovChain(states, 0),\n" + + " s=sample(m, 50000),\n" + + " f=freqTable(s))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List> out = (List>)tuples.get(0).get("f"); + assertEquals(out.size(), 2); + Map bin0 = out.get(0); + double state0Pct = bin0.get("pct").doubleValue(); + assertEquals(state0Pct, .5, .015); + Map bin1 = out.get(1); + double state1Pct = bin1.get("pct").doubleValue(); + assertEquals(state1Pct, .5, .015); + } + @Test @@ -7112,6 +7284,68 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertEquals(num, 1.4445, 0.0); } + @Test + public void testMinMaxScale() throws Exception { + String cexpr = "let(echo=true, a=minMaxScale(matrix(array(1,2,3,4,5), array(10,20,30,40,50))), " + + "b=minMaxScale(matrix(array(1,2,3,4,5), array(10,20,30,40,50)), 0, 100)," + + "c=minMaxScale(array(1,2,3,4,5))," + + "d=minMaxScale(array(1,2,3,4,5), 0, 100))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + + List> matrix = (List>)tuples.get(0).get("a"); + List row1 = matrix.get(0); + assertEquals(row1.get(0).doubleValue(), 0,0); + assertEquals(row1.get(1).doubleValue(), .25,0); + assertEquals(row1.get(2).doubleValue(), .5,0); + assertEquals(row1.get(3).doubleValue(), .75, 0); + assertEquals(row1.get(4).doubleValue(), 1, 0); + + List row2 = matrix.get(1); + assertEquals(row2.get(0).doubleValue(), 0,0); + assertEquals(row2.get(1).doubleValue(), .25,0); + assertEquals(row2.get(2).doubleValue(), .5,0); + assertEquals(row2.get(3).doubleValue(), .75,0); + assertEquals(row2.get(4).doubleValue(), 1,0); + + matrix = (List>)tuples.get(0).get("b"); + row1 = matrix.get(0); + assertEquals(row1.get(0).doubleValue(), 0,0); + assertEquals(row1.get(1).doubleValue(), 25,0); + assertEquals(row1.get(2).doubleValue(), 50,0); + assertEquals(row1.get(3).doubleValue(), 75,0); + assertEquals(row1.get(4).doubleValue(), 100,0); + + row2 = matrix.get(1); + assertEquals(row2.get(0).doubleValue(), 0,0); + assertEquals(row2.get(1).doubleValue(), 25,0); + assertEquals(row2.get(2).doubleValue(), 50,0); + assertEquals(row2.get(3).doubleValue(), 75,0); + assertEquals(row2.get(4).doubleValue(), 100,0); + + List row3= (List)tuples.get(0).get("c"); + assertEquals(row3.get(0).doubleValue(), 0,0); + assertEquals(row3.get(1).doubleValue(), .25,0); + assertEquals(row3.get(2).doubleValue(), .5,0); + assertEquals(row3.get(3).doubleValue(), .75,0); + assertEquals(row3.get(4).doubleValue(), 1,0); + + List row4= (List)tuples.get(0).get("d"); + assertEquals(row4.get(0).doubleValue(), 0,0); + assertEquals(row4.get(1).doubleValue(), 25,0); + assertEquals(row4.get(2).doubleValue(), 50,0); + assertEquals(row4.get(3).doubleValue(), 75,0); + assertEquals(row4.get(4).doubleValue(), 100,0); + } + + @Test public void testMean() throws Exception { String cexpr = "mean(array(1,2,3,4,5))"; @@ -7494,59 +7728,6 @@ public class StreamExpressionTest extends SolrCloudTestCase { assertTrue(length == 7); } - @Test - public void testNormalize() throws Exception { - UpdateRequest updateRequest = new UpdateRequest(); - - updateRequest.add(id, "1", "price_f", "100.0", "col_s", "a", "order_i", "1"); - updateRequest.add(id, "2", "price_f", "200.0", "col_s", "a", "order_i", "2"); - updateRequest.add(id, "3", "price_f", "300.0", "col_s", "a", "order_i", "3"); - updateRequest.add(id, "4", "price_f", "100.0", "col_s", "a", "order_i", "4"); - updateRequest.add(id, "5", "price_f", "200.0", "col_s", "a", "order_i", "5"); - updateRequest.add(id, "6", "price_f", "400.0", "col_s", "a", "order_i", "6"); - updateRequest.add(id, "7", "price_f", "600.0", "col_s", "a", "order_i", "7"); - - updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS); - - String expr1 = "search("+COLLECTIONORALIAS+", q=\"col_s:a\", fl=\"price_f, order_i\", sort=\"order_i asc\")"; - String cexpr = "let(a="+expr1+", c=col(a, price_f), tuple(n=normalize(c), c=c))"; - - ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); - paramsLoc.set("expr", cexpr); - paramsLoc.set("qt", "/stream"); - - String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; - TupleStream solrStream = new SolrStream(url, paramsLoc); - - StreamContext context = new StreamContext(); - solrStream.setStreamContext(context); - List tuples = getTuples(solrStream); - assertTrue(tuples.size() == 1); - Tuple tuple = tuples.get(0); - List col = (List)tuple.get("c"); - List normalized = (List)tuple.get("n"); - - assertTrue(col.size() == normalized.size()); - - double total = 0.0D; - - for(double d : normalized) { - total += d; - } - - double mean = total/normalized.size(); - assert(Math.round(mean) == 0); - - double sd = 0; - for (int i = 0; i < normalized.size(); i++) - { - sd += Math.pow(normalized.get(i) - mean, 2) / normalized.size(); - } - double standardDeviation = Math.sqrt(sd); - - assertTrue(Math.round(standardDeviation) == 1); - } - @Test public void testListStream() throws Exception { UpdateRequest updateRequest = new UpdateRequest(); diff --git a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java index 9d9a9dc55bf..3b1c4692cfa 100644 --- a/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java +++ b/solr/test-framework/src/java/org/apache/solr/SolrTestCaseJ4.java @@ -31,6 +31,7 @@ import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; import java.lang.invoke.MethodHandles; +import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.MalformedURLException; import java.net.ServerSocket; @@ -60,9 +61,11 @@ import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import com.carrotsearch.randomizedtesting.RandomizedContext; +import com.carrotsearch.randomizedtesting.RandomizedTest; import com.carrotsearch.randomizedtesting.TraceFormatting; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule; + import org.apache.commons.io.FileUtils; import org.apache.http.client.HttpClient; import org.apache.lucene.analysis.MockAnalyzer; @@ -328,6 +331,19 @@ public abstract class SolrTestCaseJ4 extends LuceneTestCase { StartupLoggingUtils.changeLogLevel(initialRootLogLevel); } + /** Assumes that Mockito/Bytebuddy is available and can be used to mock classes (e.g., fails if Java version is too new). */ + public static void assumeWorkingMockito() { + // we use reflection here, because we do not have ByteBuddy/Mockito in all modules and the test framework! + try { + Class.forName("net.bytebuddy.ClassFileVersion").getMethod("ofThisVm").invoke(null); + } catch (InvocationTargetException e) { + RandomizedTest.assumeNoException("SOLR-11606: ByteBuddy used by Mockito is not working with this JVM version.", + e.getTargetException()); + } catch (ReflectiveOperationException e) { + fail("ByteBuddy and Mockito are not available on classpath: " + e.toString()); + } + } + /** * @return null if ok else error message */