SOLR-1302: Why should numbers get to have all the fun, add String distance functions, too

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@883421 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2009-11-23 17:18:16 +00:00
parent 6fcb72a3ad
commit 548f973766
4 changed files with 134 additions and 1 deletions

View File

@ -34,7 +34,7 @@ Detailed Change List
New Features
----------------------
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan, Euclidean and String (using the StringDistance methods in the Lucene Spellchecker).
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
* SOLR-1553: New dismax parser implementation (accessible as "edismax")

View File

@ -20,6 +20,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.spell.JaroWinklerDistance;
import org.apache.lucene.search.spell.LevensteinDistance;
import org.apache.lucene.search.spell.NGramDistance;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.DateField;
@ -34,6 +38,7 @@ import org.apache.solr.search.function.distance.SquaredEuclideanFunction;
import org.apache.solr.search.function.distance.VectorDistanceFunction;
import org.apache.solr.search.function.distance.GeohashHaversineFunction;
import org.apache.solr.search.function.distance.GeohashFunction;
import org.apache.solr.search.function.distance.StringDistanceFunction;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import java.io.IOException;
@ -255,6 +260,30 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
return new GeohashFunction(lat, lon);
}
});
addParser("strdist", new ValueSourceParser() {
public ValueSource parse(FunctionQParser fp) throws ParseException {
ValueSource str1 = fp.parseValueSource();
ValueSource str2 = fp.parseValueSource();
String distClass = fp.parseArg();
StringDistance dist = null;
if (distClass.equalsIgnoreCase("jw")) {
dist = new JaroWinklerDistance();
} else if (distClass.equalsIgnoreCase("edit")) {
dist = new LevensteinDistance();
} else if (distClass.equalsIgnoreCase("ngram")) {
int ngram = 2;
if (fp.hasMoreArguments()) {
ngram = fp.parseInt();
}
dist = new NGramDistance(ngram);
} else {
dist = (StringDistance) fp.req.getCore().getResourceLoader().newInstance(distClass);
}
return new StringDistanceFunction(str1, str2, dist);
}
});
addParser(new DoubleParser("rad") {
public double func(int doc, DocValues vals) {

View File

@ -0,0 +1,96 @@
package org.apache.solr.search.function.distance;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.solr.search.function.DocValues;
import org.apache.solr.search.function.ValueSource;
import java.io.IOException;
import java.util.Map;
/**
*
*
**/
public class StringDistanceFunction extends ValueSource {
protected ValueSource str1, str2;
protected StringDistance dist;
/**
* @param str1
* @param str2
* @param measure
*/
public StringDistanceFunction(ValueSource str1, ValueSource str2, StringDistance measure) {
this.str1 = str1;
this.str2 = str2;
dist = measure;
}
@Override
public DocValues getValues(Map context, IndexReader reader) throws IOException {
final DocValues str1DV = str1.getValues(context, reader);
final DocValues str2DV = str2.getValues(context, reader);
return new DocValues() {
public float floatVal(int doc) {
return (float) dist.getDistance(str1DV.strVal(doc), str2DV.strVal(doc));
}
public int intVal(int doc) {
return (int) doubleVal(doc);
}
public long longVal(int doc) {
return (long) doubleVal(doc);
}
public double doubleVal(int doc) {
return (double) floatVal(doc);
}
@Override
public String toString(int doc) {
StringBuilder sb = new StringBuilder();
sb.append("strdist").append('(');
sb.append(str1DV.toString(doc)).append(',').append(str2DV.toString(doc))
.append(", dist=").append(dist.getClass().getName());
sb.append(')');
return sb.toString();
}
};
}
public String description() {
StringBuilder sb = new StringBuilder();
sb.append("strdist").append('(');
sb.append(str1).append(',').append(str2).append(", dist=").append(dist.getClass().getName());
sb.append(')');
return sb.toString();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof StringDistanceFunction)) return false;
StringDistanceFunction that = (StringDistanceFunction) o;
if (!dist.equals(that.dist)) return false;
if (!str1.equals(that.str1)) return false;
if (!str2.equals(that.str2)) return false;
return true;
}
@Override
public int hashCode() {
int result = str1.hashCode();
result = 31 * result + str2.hashCode();
result = 31 * result + dist.hashCode();
return result;
}
}

View File

@ -347,6 +347,14 @@ public class TestFunctionQuery extends AbstractSolrTestCase {
assertQ(req("fl", "*,score", "q", "{!func}deg(y_td)", "fq", "id:3"), "//float[@name='score']='45.0'");
}
public void testStrDistance() throws Exception {
assertU(adoc("id", "1", "x_s", "foil"));
assertU(commit());
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', edit)", "fq", "id:1"), "//float[@name='score']='0.75'");
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', jw)", "fq", "id:1"), "//float[@name='score']='0.8833333'");
assertQ(req("fl", "*,score", "q", "{!func}strdist(x_s, 'foit', ngram, 2)", "fq", "id:1"), "//float[@name='score']='0.875'");
}
public void dofunc(String func, double val) throws Exception {
// String sval = Double.toString(val);
String sval = Float.toString((float)val);