From 7acccd51576117e18d131d18cbe6373ea091b1b1 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Sun, 26 Nov 2017 21:32:05 -0500 Subject: [PATCH 1/5] SOLR-11674: Support ranges in the probability Stream Evaluator --- .../solrj/io/eval/ProbabilityEvaluator.java | 68 ++++++++++++++----- .../solrj/io/stream/StreamExpressionTest.java | 17 +++++ 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ProbabilityEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ProbabilityEvaluator.java index f0c25cb5846..092760c152c 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ProbabilityEvaluator.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/ProbabilityEvaluator.java @@ -20,10 +20,11 @@ import java.io.IOException; import java.util.Locale; import org.apache.commons.math3.distribution.IntegerDistribution; +import org.apache.commons.math3.distribution.AbstractRealDistribution; import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; -public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements TwoValueWorker { +public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker { protected static final long serialVersionUID = 1L; public ProbabilityEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ @@ -31,22 +32,55 @@ public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements Tw } @Override - public Object doWork(Object first, Object second) throws IOException{ - if(null == first){ - throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory))); - } - if(null == second){ - throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory))); - } - if(!(first instanceof IntegerDistribution)){ - throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a IntegerDistribution",toExpression(constructingFactory), first.getClass().getSimpleName())); - } - if(!(second instanceof Number)){ - throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName())); - } + public Object doWork(Object... values) throws IOException{ - IntegerDistribution d = (IntegerDistribution) first; - Number predictOver = (Number) second; - return d.probability(predictOver.intValue()); + Object first = null; + Object second = null; + Object third = null; + + if(values.length == 2) { + first = values[0]; + second = values[1]; + + if (null == first) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory))); + } + if (null == second) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory))); + } + if (!(first instanceof IntegerDistribution)) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a IntegerDistributionm for probability at a specific value.", toExpression(constructingFactory), first.getClass().getSimpleName())); + } + if (!(second instanceof Number)) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName())); + } + + IntegerDistribution d = (IntegerDistribution) first; + Number predictOver = (Number) second; + return d.probability(predictOver.intValue()); + + } else if(values.length == 3) { + first = values[0]; + second = values[1]; + third = values[2]; + + if (!(first instanceof AbstractRealDistribution)) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a RealDistribution for probability ranges", toExpression(constructingFactory), first.getClass().getSimpleName())); + } + if (!(second instanceof Number)) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName())); + } + + if (!(third instanceof Number)) { + throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName())); + } + + AbstractRealDistribution realDistribution = (AbstractRealDistribution)first; + Number start = (Number) second; + Number end = (Number) third; + return realDistribution.probability(start.doubleValue(), end.doubleValue()); + } else { + throw new IOException("The probability function expects 2 or 3 parameters"); + } } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index a2b6b586a05..bfee198fa98 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -6431,6 +6431,23 @@ public class StreamExpressionTest extends SolrCloudTestCase { } @Test + public void testProbabilityRange() throws Exception { + String cexpr = "let(a=normalDistribution(500, 20), " + + "b=probability(a, 520, 530))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + Number prob = (Number)tuples.get(0).get("b"); + assertEquals(prob.doubleValue(), 0.09184805266259899, 0.0); + } + + @Test public void testDistributions() throws Exception { String cexpr = "let(a=normalDistribution(10, 2), " + "b=sample(a, 250), " + From 64d95e6a6dfe8ce1e1ccdf5fbb48b99b55905f26 Mon Sep 17 00:00:00 2001 From: David Smiley Date: Sun, 26 Nov 2017 23:25:06 -0500 Subject: [PATCH 2/5] LUCENE-8049: IndexWriter.getMergingSegments() signature changed to return Set instead of Collection --- lucene/CHANGES.txt | 3 +++ .../src/java/org/apache/lucene/index/IndexWriter.java | 6 +++--- .../src/java/org/apache/lucene/index/LogMergePolicy.java | 4 ++-- .../java/org/apache/lucene/index/TieredMergePolicy.java | 9 +++++---- .../org/apache/lucene/index/MockRandomMergePolicy.java | 4 ++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3840454685e..7f731ccb130 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,6 +23,9 @@ API Changes * LUCENE-8014: Similarity.computeSlopFactor() and Similarity.computePayloadFactor() have been removed (Alan Woodward) +* LUCENE-8049: IndexWriter.getMergingSegments()'s return type was changed from + Collection to Set to more accurately reflect it's nature. (David Smiley) + Changes in Runtime Behavior * LUCENE-7837: Indices that were created before the previous major version diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 44fb36ec7ad..7f47e42d45d 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2285,9 +2285,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { * (which you do when IndexWriter invokes the * MergePolicy). * - *

Do not alter the returned collection! */ - public synchronized Collection getMergingSegments() { - return mergingSegments; + *

The Set is unmodifiable. */ + public synchronized Set getMergingSegments() { + return Collections.unmodifiableSet(mergingSegments); } /** diff --git a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java index 4d0ce51d261..78025634b4a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java @@ -19,10 +19,10 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; /** *

This class implements a {@link MergePolicy} that tries @@ -462,7 +462,7 @@ public abstract class LogMergePolicy extends MergePolicy { final List levels = new ArrayList<>(numSegments); final float norm = (float) Math.log(mergeFactor); - final Collection mergingSegments = writer.getMergingSegments(); + final Set mergingSegments = writer.getMergingSegments(); for(int i=0;i merging = writer.getMergingSegments(); - final Collection toBeMerged = new HashSet<>(); + final Set merging = writer.getMergingSegments(); + final Set toBeMerged = new HashSet<>(); final List infosSorted = new ArrayList<>(infos.asList()); @@ -519,7 +520,7 @@ public class TieredMergePolicy extends MergePolicy { List eligible = new ArrayList<>(); boolean forceMergeRunning = false; - final Collection merging = writer.getMergingSegments(); + final Set merging = writer.getMergingSegments(); boolean segmentIsOriginal = false; for(SegmentCommitInfo info : infos) { final Boolean isOriginal = segmentsToMerge.get(info); @@ -594,7 +595,7 @@ public class TieredMergePolicy extends MergePolicy { message("findForcedDeletesMerges infos=" + writer.segString(infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed, writer); } final List eligible = new ArrayList<>(); - final Collection merging = writer.getMergingSegments(); + final Set merging = writer.getMergingSegments(); for(SegmentCommitInfo info : infos) { double pctDeletes = 100.*((double) writer.numDeletedDocs(info))/info.info.maxDoc(); if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) { diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java index f9fa601cc32..61efaa2adb4 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/MockRandomMergePolicy.java @@ -18,11 +18,11 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Random; +import java.util.Set; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -56,7 +56,7 @@ public class MockRandomMergePolicy extends MergePolicy { int numSegments = segmentInfos.size(); List segments = new ArrayList<>(); - final Collection merging = writer.getMergingSegments(); + final Set merging = writer.getMergingSegments(); for(SegmentCommitInfo sipc : segmentInfos) { if (!merging.contains(sipc)) { From 3b0686a4d38df2a72f9e6cb339e7cd8a3e0d526a Mon Sep 17 00:00:00 2001 From: David Smiley Date: Sun, 26 Nov 2017 23:28:37 -0500 Subject: [PATCH 3/5] LUCENE-8049: CHANGES.txt: moved to 7.2.0 from 8.0.0 --- lucene/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7f731ccb130..7df88fc3705 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -23,9 +23,6 @@ API Changes * LUCENE-8014: Similarity.computeSlopFactor() and Similarity.computePayloadFactor() have been removed (Alan Woodward) -* LUCENE-8049: IndexWriter.getMergingSegments()'s return type was changed from - Collection to Set to more accurately reflect it's nature. (David Smiley) - Changes in Runtime Behavior * LUCENE-7837: Indices that were created before the previous major version @@ -73,6 +70,9 @@ API Changes * LUCENE-7998: DoubleValuesSource.fromQuery() allows you to use the scores from a Query as a DoubleValuesSource. (Alan Woodward) +* LUCENE-8049: IndexWriter.getMergingSegments()'s return type was changed from + Collection to Set to more accurately reflect it's nature. (David Smiley) + New Features * LUCENE-8061: Add convenience factory methods to create BBoxes and XYZSolids From 7a12de47ecc344d7361db69ffedcea28f092c05e Mon Sep 17 00:00:00 2001 From: Noble Paul Date: Mon, 27 Nov 2017 17:06:27 +1030 Subject: [PATCH 4/5] SOLR-9743: removed unused logging --- solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java b/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java index dae208be2d3..54044ebd608 100644 --- a/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/UtilizeNodeCmd.java @@ -72,11 +72,6 @@ public class UtilizeNodeCmd implements OverseerCollectionMessageHandler.Cmd { continue; } log.info("coll: " + coll); - state.getCollection(coll).forEachReplica((s, r) -> { - if (Objects.equals(r.getName(), r.getName())) { - log.info("replica to be moved " + r); - } - }); if (suggestionInfo.getOperation() instanceof V2Request) { String targetNode = (String) Utils.getObjectByPath(suggestionInfo.getOperation(), true, "command/move-replica/targetNode"); if (Objects.equals(targetNode, nodeName)) { From 8072b46b63c70f2bbcd22ae053038e25750c67a0 Mon Sep 17 00:00:00 2001 From: Joel Bernstein Date: Mon, 27 Nov 2017 13:27:59 -0500 Subject: [PATCH 5/5] SOLR-11680: Add normalizeSum Stream Evaluator --- .../apache/solr/handler/StreamHandler.java | 1 + .../solrj/io/eval/NormalizeSumEvaluator.java | 84 +++++++++++++++++++ .../solrj/io/stream/StreamExpressionTest.java | 50 ++++++++++- 3 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeSumEvaluator.java diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java index 75f2de15e37..a0a83c1e6d3 100644 --- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java @@ -278,6 +278,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware, .withFunctionName("sumColumns", SumColumnsEvaluator.class) .withFunctionName("diff", TimeDifferencingEvaluator.class) .withFunctionName("corrPValues", CorrelationSignificanceEvaluator.class) + .withFunctionName("normalizeSum", NormalizeSumEvaluator.class) // Boolean Stream Evaluators diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeSumEvaluator.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeSumEvaluator.java new file mode 100644 index 00000000000..d300f598cc2 --- /dev/null +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/eval/NormalizeSumEvaluator.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.client.solrj.io.eval; + +import java.io.IOException; + +import java.util.Locale; + +import org.apache.commons.math3.util.MathArrays; +import org.apache.solr.client.solrj.io.stream.expr.StreamExpression; +import org.apache.solr.client.solrj.io.stream.expr.StreamFactory; +import java.util.List; +import java.util.ArrayList; + +public class NormalizeSumEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker { + protected static final long serialVersionUID = 1L; + + public NormalizeSumEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{ + super(expression, factory); + + if(2 < containedEvaluators.size()){ + throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at most two parameters but found %d",expression,containedEvaluators.size())); + } + } + + @Override + public Object doWork(Object... values) throws IOException{ + + Object value = values[0]; + + double sumTo = 1.0; + + if(values.length == 2) { + Number n = (Number)values[1]; + sumTo = n.doubleValue(); + } + + if(null == value){ + return null; + } else if(value instanceof Matrix) { + Matrix matrix = (Matrix) value; + + double[][] data = matrix.getData(); + double[][] unitData = new double[data.length][]; + for(int i=0; i vals = (List)value; + double[] doubles = new double[vals.size()]; + for(int i=0; i unitList = new ArrayList(doubles.length); + double[] unitArray = MathArrays.normalizeArray(doubles, sumTo); + for(double d : unitArray) { + unitList.add(d); + } + + return unitList; + } else { + throw new IOException("The unit function expects either a numeric array or matrix as a parameter"); + } + } +} diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java index bfee198fa98..8f3c3f5fb32 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java @@ -6326,11 +6326,55 @@ public class StreamExpressionTest extends SolrCloudTestCase { List array3 = (List)tuples.get(0).get("b"); assertEquals(array3.size(), 3); - assertEquals(array2.get(0).doubleValue(), 0.4558423058385518, 0.0); - assertEquals(array2.get(1).doubleValue(), 0.5698028822981898, 0.0); - assertEquals(array2.get(2).doubleValue(), 0.6837634587578276, 0.0); + assertEquals(array3.get(0).doubleValue(), 0.4558423058385518, 0.0); + assertEquals(array3.get(1).doubleValue(), 0.5698028822981898, 0.0); + assertEquals(array3.get(2).doubleValue(), 0.6837634587578276, 0.0); } + + @Test + public void testNormalizeSum() throws Exception { + String cexpr = "let(echo=true, " + + "a=normalizeSum(matrix(array(1,2,3), array(4,5,6))), " + + "b=normalizeSum(array(1,2,3))," + + "c=normalizeSum(array(1,2,3), 100))"; + ModifiableSolrParams paramsLoc = new ModifiableSolrParams(); + paramsLoc.set("expr", cexpr); + paramsLoc.set("qt", "/stream"); + String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS; + TupleStream solrStream = new SolrStream(url, paramsLoc); + StreamContext context = new StreamContext(); + solrStream.setStreamContext(context); + List tuples = getTuples(solrStream); + assertTrue(tuples.size() == 1); + List> out = (List>)tuples.get(0).get("a"); + assertEquals(out.size(), 2); + List array1 = out.get(0); + assertEquals(array1.size(), 3); + assertEquals(array1.get(0).doubleValue(), 0.16666666666666666, 0.0001); + assertEquals(array1.get(1).doubleValue(), 0.3333333333333333, 0.00001); + assertEquals(array1.get(2).doubleValue(), 0.5, 0.0001); + + List array2 = out.get(1); + assertEquals(array2.size(), 3); + assertEquals(array2.get(0).doubleValue(), 0.26666666666666666, 0.0001); + assertEquals(array2.get(1).doubleValue(), 0.3333333333333333, 0.0001); + assertEquals(array2.get(2).doubleValue(), 0.4, 0.0001); + + List array3 = (List)tuples.get(0).get("b"); + assertEquals(array3.size(), 3); + assertEquals(array3.get(0).doubleValue(), 0.16666666666666666, 0.0001); + assertEquals(array3.get(1).doubleValue(), 0.3333333333333333, 0.0001); + assertEquals(array3.get(2).doubleValue(), 0.5, 0.0001); + + List array4 = (List)tuples.get(0).get("c"); + assertEquals(array4.size(), 3); + assertEquals(array4.get(0).doubleValue(), 16.666666666666666, 0.0001); + assertEquals(array4.get(1).doubleValue(), 33.33333333333333, 0.00001); + assertEquals(array4.get(2).doubleValue(), 50, 0.0001); + } + + @Test public void testStandardize() throws Exception { String cexpr = "let(echo=true, a=standardize(matrix(array(1,2,3), array(4,5,6))), b=standardize(array(4,5,6)))";