This commit is contained in:
Karl Wright 2017-11-28 03:01:18 -05:00
commit dae529de58
10 changed files with 215 additions and 36 deletions

View File

@ -70,6 +70,9 @@ API Changes
* LUCENE-7998: DoubleValuesSource.fromQuery() allows you to use the scores
from a Query as a DoubleValuesSource. (Alan Woodward)
* LUCENE-8049: IndexWriter.getMergingSegments()'s return type was changed from
Collection to Set to more accurately reflect it's nature. (David Smiley)
New Features
* LUCENE-8061: Add convenience factory methods to create BBoxes and XYZSolids

View File

@ -2285,9 +2285,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
* (which you do when IndexWriter invokes the
* MergePolicy).
*
* <p>Do not alter the returned collection! */
public synchronized Collection<SegmentCommitInfo> getMergingSegments() {
return mergingSegments;
* <p>The Set is unmodifiable. */
public synchronized Set<SegmentCommitInfo> getMergingSegments() {
return Collections.unmodifiableSet(mergingSegments);
}
/**

View File

@ -19,10 +19,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
/**
* <p>This class implements a {@link MergePolicy} that tries
@ -462,7 +462,7 @@ public abstract class LogMergePolicy extends MergePolicy {
final List<SegmentInfoAndLevel> levels = new ArrayList<>(numSegments);
final float norm = (float) Math.log(mergeFactor);
final Collection<SegmentCommitInfo> mergingSegments = writer.getMergingSegments();
final Set<SegmentCommitInfo> mergingSegments = writer.getMergingSegments();
for(int i=0;i<numSegments;i++) {
final SegmentCommitInfo info = infos.info(i);

View File

@ -26,6 +26,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
/**
* Merges segments of approximately equal size, subject to
@ -287,8 +288,8 @@ public class TieredMergePolicy extends MergePolicy {
if (infos.size() == 0) {
return null;
}
final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
final Collection<SegmentCommitInfo> toBeMerged = new HashSet<>();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments();
final Set<SegmentCommitInfo> toBeMerged = new HashSet<>();
final List<SegmentCommitInfo> infosSorted = new ArrayList<>(infos.asList());
@ -519,7 +520,7 @@ public class TieredMergePolicy extends MergePolicy {
List<SegmentCommitInfo> eligible = new ArrayList<>();
boolean forceMergeRunning = false;
final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments();
boolean segmentIsOriginal = false;
for(SegmentCommitInfo info : infos) {
final Boolean isOriginal = segmentsToMerge.get(info);
@ -594,7 +595,7 @@ public class TieredMergePolicy extends MergePolicy {
message("findForcedDeletesMerges infos=" + writer.segString(infos) + " forceMergeDeletesPctAllowed=" + forceMergeDeletesPctAllowed, writer);
}
final List<SegmentCommitInfo> eligible = new ArrayList<>();
final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments();
for(SegmentCommitInfo info : infos) {
double pctDeletes = 100.*((double) writer.numDeletedDocs(info))/info.info.maxDoc();
if (pctDeletes > forceMergeDeletesPctAllowed && !merging.contains(info)) {

View File

@ -18,11 +18,11 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
@ -56,7 +56,7 @@ public class MockRandomMergePolicy extends MergePolicy {
int numSegments = segmentInfos.size();
List<SegmentCommitInfo> segments = new ArrayList<>();
final Collection<SegmentCommitInfo> merging = writer.getMergingSegments();
final Set<SegmentCommitInfo> merging = writer.getMergingSegments();
for(SegmentCommitInfo sipc : segmentInfos) {
if (!merging.contains(sipc)) {

View File

@ -72,11 +72,6 @@ public class UtilizeNodeCmd implements OverseerCollectionMessageHandler.Cmd {
continue;
}
log.info("coll: " + coll);
state.getCollection(coll).forEachReplica((s, r) -> {
if (Objects.equals(r.getName(), r.getName())) {
log.info("replica to be moved " + r);
}
});
if (suggestionInfo.getOperation() instanceof V2Request) {
String targetNode = (String) Utils.getObjectByPath(suggestionInfo.getOperation(), true, "command/move-replica/targetNode");
if (Objects.equals(targetNode, nodeName)) {

View File

@ -278,6 +278,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
.withFunctionName("sumColumns", SumColumnsEvaluator.class)
.withFunctionName("diff", TimeDifferencingEvaluator.class)
.withFunctionName("corrPValues", CorrelationSignificanceEvaluator.class)
.withFunctionName("normalizeSum", NormalizeSumEvaluator.class)
// Boolean Stream Evaluators

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.client.solrj.io.eval;
import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.util.MathArrays;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
import java.util.List;
import java.util.ArrayList;
public class NormalizeSumEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public NormalizeSumEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
super(expression, factory);
if(2 < containedEvaluators.size()){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at most two parameters but found %d",expression,containedEvaluators.size()));
}
}
@Override
public Object doWork(Object... values) throws IOException{
Object value = values[0];
double sumTo = 1.0;
if(values.length == 2) {
Number n = (Number)values[1];
sumTo = n.doubleValue();
}
if(null == value){
return null;
} else if(value instanceof Matrix) {
Matrix matrix = (Matrix) value;
double[][] data = matrix.getData();
double[][] unitData = new double[data.length][];
for(int i=0; i<data.length; i++) {
double[] row = data[i];
double[] unitRow = MathArrays.normalizeArray(row, sumTo);
unitData[i] = unitRow;
}
return new Matrix(unitData);
} else if(value instanceof List) {
List<Number> vals = (List<Number>)value;
double[] doubles = new double[vals.size()];
for(int i=0; i<doubles.length; i++) {
doubles[i] = vals.get(i).doubleValue();
}
List<Number> unitList = new ArrayList(doubles.length);
double[] unitArray = MathArrays.normalizeArray(doubles, sumTo);
for(double d : unitArray) {
unitList.add(d);
}
return unitList;
} else {
throw new IOException("The unit function expects either a numeric array or matrix as a parameter");
}
}
}

View File

@ -20,10 +20,11 @@ import java.io.IOException;
import java.util.Locale;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.AbstractRealDistribution;
import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements TwoValueWorker {
public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements ManyValueWorker {
protected static final long serialVersionUID = 1L;
public ProbabilityEvaluator(StreamExpression expression, StreamFactory factory) throws IOException{
@ -31,22 +32,55 @@ public class ProbabilityEvaluator extends RecursiveObjectEvaluator implements Tw
}
@Override
public Object doWork(Object first, Object second) throws IOException{
if(null == first){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the first value",toExpression(constructingFactory)));
}
if(null == second){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - null found for the second value",toExpression(constructingFactory)));
}
if(!(first instanceof IntegerDistribution)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the first value, expecting a IntegerDistribution",toExpression(constructingFactory), first.getClass().getSimpleName()));
}
if(!(second instanceof Number)){
throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - found type %s for the second value, expecting a Number",toExpression(constructingFactory), first.getClass().getSimpleName()));
}
public Object doWork(Object... values) throws IOException{
IntegerDistribution d = (IntegerDistribution) first;
Number predictOver = (Number) second;
return d.probability(predictOver.intValue());
Object first = null;
Object second = null;
Object third = null;
if(values.length == 2) {
first = values[0];
second = values[1];
if (null == first) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the first value", toExpression(constructingFactory)));
}
if (null == second) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - null found for the second value", toExpression(constructingFactory)));
}
if (!(first instanceof IntegerDistribution)) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a IntegerDistributionm for probability at a specific value.", toExpression(constructingFactory), first.getClass().getSimpleName()));
}
if (!(second instanceof Number)) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName()));
}
IntegerDistribution d = (IntegerDistribution) first;
Number predictOver = (Number) second;
return d.probability(predictOver.intValue());
} else if(values.length == 3) {
first = values[0];
second = values[1];
third = values[2];
if (!(first instanceof AbstractRealDistribution)) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the first value, expecting a RealDistribution for probability ranges", toExpression(constructingFactory), first.getClass().getSimpleName()));
}
if (!(second instanceof Number)) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName()));
}
if (!(third instanceof Number)) {
throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for the second value, expecting a Number", toExpression(constructingFactory), first.getClass().getSimpleName()));
}
AbstractRealDistribution realDistribution = (AbstractRealDistribution)first;
Number start = (Number) second;
Number end = (Number) third;
return realDistribution.probability(start.doubleValue(), end.doubleValue());
} else {
throw new IOException("The probability function expects 2 or 3 parameters");
}
}
}

View File

@ -6326,11 +6326,55 @@ public class StreamExpressionTest extends SolrCloudTestCase {
List<Number> array3 = (List<Number>)tuples.get(0).get("b");
assertEquals(array3.size(), 3);
assertEquals(array2.get(0).doubleValue(), 0.4558423058385518, 0.0);
assertEquals(array2.get(1).doubleValue(), 0.5698028822981898, 0.0);
assertEquals(array2.get(2).doubleValue(), 0.6837634587578276, 0.0);
assertEquals(array3.get(0).doubleValue(), 0.4558423058385518, 0.0);
assertEquals(array3.get(1).doubleValue(), 0.5698028822981898, 0.0);
assertEquals(array3.get(2).doubleValue(), 0.6837634587578276, 0.0);
}
@Test
public void testNormalizeSum() throws Exception {
String cexpr = "let(echo=true, " +
"a=normalizeSum(matrix(array(1,2,3), array(4,5,6))), " +
"b=normalizeSum(array(1,2,3))," +
"c=normalizeSum(array(1,2,3), 100))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
List<List<Number>> out = (List<List<Number>>)tuples.get(0).get("a");
assertEquals(out.size(), 2);
List<Number> array1 = out.get(0);
assertEquals(array1.size(), 3);
assertEquals(array1.get(0).doubleValue(), 0.16666666666666666, 0.0001);
assertEquals(array1.get(1).doubleValue(), 0.3333333333333333, 0.00001);
assertEquals(array1.get(2).doubleValue(), 0.5, 0.0001);
List<Number> array2 = out.get(1);
assertEquals(array2.size(), 3);
assertEquals(array2.get(0).doubleValue(), 0.26666666666666666, 0.0001);
assertEquals(array2.get(1).doubleValue(), 0.3333333333333333, 0.0001);
assertEquals(array2.get(2).doubleValue(), 0.4, 0.0001);
List<Number> array3 = (List<Number>)tuples.get(0).get("b");
assertEquals(array3.size(), 3);
assertEquals(array3.get(0).doubleValue(), 0.16666666666666666, 0.0001);
assertEquals(array3.get(1).doubleValue(), 0.3333333333333333, 0.0001);
assertEquals(array3.get(2).doubleValue(), 0.5, 0.0001);
List<Number> array4 = (List<Number>)tuples.get(0).get("c");
assertEquals(array4.size(), 3);
assertEquals(array4.get(0).doubleValue(), 16.666666666666666, 0.0001);
assertEquals(array4.get(1).doubleValue(), 33.33333333333333, 0.00001);
assertEquals(array4.get(2).doubleValue(), 50, 0.0001);
}
@Test
public void testStandardize() throws Exception {
String cexpr = "let(echo=true, a=standardize(matrix(array(1,2,3), array(4,5,6))), b=standardize(array(4,5,6)))";
@ -6431,6 +6475,23 @@ public class StreamExpressionTest extends SolrCloudTestCase {
}
@Test
public void testProbabilityRange() throws Exception {
String cexpr = "let(a=normalDistribution(500, 20), " +
"b=probability(a, 520, 530))";
ModifiableSolrParams paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", cexpr);
paramsLoc.set("qt", "/stream");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString()+"/"+COLLECTIONORALIAS;
TupleStream solrStream = new SolrStream(url, paramsLoc);
StreamContext context = new StreamContext();
solrStream.setStreamContext(context);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 1);
Number prob = (Number)tuples.get(0).get("b");
assertEquals(prob.doubleValue(), 0.09184805266259899, 0.0);
}
@Test
public void testDistributions() throws Exception {
String cexpr = "let(a=normalDistribution(10, 2), " +
"b=sample(a, 250), " +