mirror of https://github.com/apache/lucene.git
SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf, norm, maxdoc, numdocs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@954640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9427cad502
commit
3915a393f3
|
@ -177,6 +177,11 @@ New Features
|
|||
Explanation objects in it's responses instead of
|
||||
Explanation.toString (hossman)
|
||||
|
||||
* SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
|
||||
norm, maxdoc, numdocs. (yonik)
|
||||
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -54,8 +54,6 @@ import org.apache.solr.schema.TextField;
|
|||
* </p>
|
||||
*
|
||||
* @see QueryParsing#parseFunction
|
||||
* @see ConstantScoreRangeQuery
|
||||
* @see ConstantScorePrefixQuery
|
||||
*/
|
||||
public class SolrQueryParser extends QueryParser {
|
||||
protected final IndexSchema schema;
|
||||
|
|
|
@ -17,22 +17,24 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.spell.JaroWinklerDistance;
|
||||
import org.apache.lucene.search.spell.LevensteinDistance;
|
||||
import org.apache.lucene.search.spell.NGramDistance;
|
||||
import org.apache.lucene.search.spell.StringDistance;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.DateField;
|
||||
import org.apache.solr.schema.LegacyDateField;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
import org.apache.solr.schema.*;
|
||||
import org.apache.solr.search.function.*;
|
||||
|
||||
import org.apache.solr.search.function.distance.*;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -451,6 +453,81 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
|||
return new DoubleConstValueSource(Math.E);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
addParser("docfreq", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
TInfo tinfo = parseTerm(fp);
|
||||
return new DocFreqValueSource(tinfo.field, tinfo.val, tinfo.indexedField, tinfo.indexedBytes);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("idf", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
TInfo tinfo = parseTerm(fp);
|
||||
return new IDFValueSource(tinfo.field, tinfo.val, tinfo.indexedField, tinfo.indexedBytes);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("termfreq", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
TInfo tinfo = parseTerm(fp);
|
||||
return new TermFreqValueSource(tinfo.field, tinfo.val, tinfo.indexedField, tinfo.indexedBytes);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("tf", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
TInfo tinfo = parseTerm(fp);
|
||||
return new TFValueSource(tinfo.field, tinfo.val, tinfo.indexedField, tinfo.indexedBytes);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("norm", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
String field = fp.parseArg();
|
||||
return new NormValueSource(field);
|
||||
}
|
||||
});
|
||||
|
||||
addParser("maxdoc", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
return new MaxDocValueSource();
|
||||
}
|
||||
});
|
||||
|
||||
addParser("numdocs", new ValueSourceParser() {
|
||||
public ValueSource parse(FunctionQParser fp) throws ParseException {
|
||||
return new NumDocsValueSource();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private static TInfo parseTerm(FunctionQParser fp) throws ParseException {
|
||||
TInfo tinfo = new TInfo();
|
||||
|
||||
tinfo.indexedField = tinfo.field = fp.parseArg();
|
||||
tinfo.val = fp.parseArg();
|
||||
tinfo.indexedBytes = new BytesRef();
|
||||
|
||||
FieldType ft = fp.getReq().getSchema().getFieldTypeNoEx(tinfo.field);
|
||||
if (ft == null) ft = new StrField();
|
||||
|
||||
if (ft instanceof TextField) {
|
||||
// need to do analyisis on the term
|
||||
String indexedVal = tinfo.val;
|
||||
Query q = ft.getFieldQuery(fp, fp.getReq().getSchema().getFieldOrNull(tinfo.field), tinfo.val);
|
||||
if (q instanceof TermQuery) {
|
||||
Term term = ((TermQuery)q).getTerm();
|
||||
tinfo.indexedField = term.field();
|
||||
indexedVal = term.text();
|
||||
}
|
||||
UnicodeUtil.UTF16toUTF8(indexedVal, 0, indexedVal.length(), tinfo.indexedBytes);
|
||||
} else {
|
||||
ft.readableToIndexed(tinfo.val, tinfo.indexedBytes);
|
||||
}
|
||||
|
||||
return tinfo;
|
||||
}
|
||||
|
||||
private static void splitSources(int dim, List<ValueSource> sources, List<ValueSource> dest1, List<ValueSource> dest2) {
|
||||
|
@ -502,6 +579,14 @@ public abstract class ValueSourceParser implements NamedListInitializedPlugin {
|
|||
MultiValueSource mv1;
|
||||
MultiValueSource mv2;
|
||||
}
|
||||
|
||||
private static class TInfo {
|
||||
String field;
|
||||
String val;
|
||||
String indexedField;
|
||||
BytesRef indexedBytes;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,248 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
class ConstIntDocValues extends DocValues {
|
||||
final int ival;
|
||||
final float fval;
|
||||
final double dval;
|
||||
final long lval;
|
||||
final String sval;
|
||||
final ValueSource parent;
|
||||
|
||||
ConstIntDocValues(int val, ValueSource parent) {
|
||||
ival = val;
|
||||
fval = val;
|
||||
dval = val;
|
||||
lval = val;
|
||||
sval = Integer.toString(val);
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
public float floatVal(int doc) {
|
||||
return fval;
|
||||
}
|
||||
public int intVal(int doc) {
|
||||
return ival;
|
||||
}
|
||||
public long longVal(int doc) {
|
||||
return lval;
|
||||
}
|
||||
public double doubleVal(int doc) {
|
||||
return dval;
|
||||
}
|
||||
public String strVal(int doc) {
|
||||
return sval;
|
||||
}
|
||||
public String toString(int doc) {
|
||||
return parent.description() + '=' + sval;
|
||||
}
|
||||
}
|
||||
|
||||
class ConstDoubleDocValues extends DocValues {
|
||||
final int ival;
|
||||
final float fval;
|
||||
final double dval;
|
||||
final long lval;
|
||||
final String sval;
|
||||
final ValueSource parent;
|
||||
|
||||
ConstDoubleDocValues(double val, ValueSource parent) {
|
||||
ival = (int)val;
|
||||
fval = (float)val;
|
||||
dval = val;
|
||||
lval = (long)val;
|
||||
sval = Double.toString(val);
|
||||
this.parent = parent;
|
||||
}
|
||||
|
||||
public float floatVal(int doc) {
|
||||
return fval;
|
||||
}
|
||||
public int intVal(int doc) {
|
||||
return ival;
|
||||
}
|
||||
public long longVal(int doc) {
|
||||
return lval;
|
||||
}
|
||||
public double doubleVal(int doc) {
|
||||
return dval;
|
||||
}
|
||||
public String strVal(int doc) {
|
||||
return sval;
|
||||
}
|
||||
public String toString(int doc) {
|
||||
return parent.description() + '=' + sval;
|
||||
}
|
||||
}
|
||||
|
||||
abstract class FloatDocValues extends DocValues {
|
||||
protected final ValueSource vs;
|
||||
|
||||
public FloatDocValues(ValueSource vs) {
|
||||
this.vs = vs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte byteVal(int doc) {
|
||||
return (byte)floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short shortVal(int doc) {
|
||||
return (short)floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract float floatVal(int doc);
|
||||
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
return (int)floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longVal(int doc) {
|
||||
return (long)floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double doubleVal(int doc) {
|
||||
return (double)floatVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String strVal(int doc) {
|
||||
return Float.toString(floatVal(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return vs.description() + '=' + strVal(doc);
|
||||
}
|
||||
}
|
||||
|
||||
abstract class IntDocValues extends DocValues {
|
||||
protected final ValueSource vs;
|
||||
|
||||
public IntDocValues(ValueSource vs) {
|
||||
this.vs = vs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte byteVal(int doc) {
|
||||
return (byte)intVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short shortVal(int doc) {
|
||||
return (short)intVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return (float)intVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract int intVal(int doc);
|
||||
|
||||
@Override
|
||||
public long longVal(int doc) {
|
||||
return (long)intVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double doubleVal(int doc) {
|
||||
return (double)intVal(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String strVal(int doc) {
|
||||
return Integer.toString(intVal(doc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return vs.description() + '=' + strVal(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <code>DocFreqValueSource</code> returns the number of documents containing the term.
|
||||
* @internal
|
||||
*/
|
||||
public class DocFreqValueSource extends ValueSource {
|
||||
protected String field;
|
||||
protected String indexedField;
|
||||
protected String val;
|
||||
protected BytesRef indexedBytes;
|
||||
|
||||
public DocFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
this.field = field;
|
||||
this.val = val;
|
||||
this.indexedField = indexedField;
|
||||
this.indexedBytes = indexedBytes;
|
||||
}
|
||||
|
||||
public String name() {
|
||||
return "docfreq";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + '(' + field + ',' + val + ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
Searcher searcher = (Searcher)context.get("searcher");
|
||||
// todo: we need docFreq that takes a BytesRef
|
||||
String strVal = ByteUtils.UTF8toUTF16(indexedBytes);
|
||||
int docfreq = searcher.docFreq(new Term(indexedField, strVal));
|
||||
return new ConstIntDocValues(docfreq, this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createWeight(Map context, Searcher searcher) throws IOException {
|
||||
context.put("searcher",searcher);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return getClass().hashCode() + indexedField.hashCode()*29 + indexedBytes.hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (this.getClass() != o.getClass()) return false;
|
||||
DocFreqValueSource other = (DocFreqValueSource)o;
|
||||
return this.indexedField.equals(other.indexedField) && this.indexedBytes.equals(other.indexedBytes);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.search.SolrIndexReader;
|
||||
import org.apache.solr.util.ByteUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
/** @internal */
|
||||
public class IDFValueSource extends DocFreqValueSource {
|
||||
public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "idf";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
Searcher searcher = (Searcher)context.get("searcher");
|
||||
Similarity sim = searcher.getSimilarity();
|
||||
// todo: we need docFreq that takes a BytesRef
|
||||
String strVal = ByteUtils.UTF8toUTF16(indexedBytes);
|
||||
int docfreq = searcher.docFreq(new Term(indexedField, strVal));
|
||||
float idf = sim.idf(docfreq, searcher.maxDoc());
|
||||
return new ConstDoubleDocValues(idf, this);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class MaxDocValueSource extends ValueSource {
|
||||
public String name() {
|
||||
return "maxdoc";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + "()";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createWeight(Map context, Searcher searcher) throws IOException {
|
||||
context.put("searcher",searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
Searcher searcher = (Searcher)context.get("searcher");
|
||||
return new ConstIntDocValues(searcher.maxDoc(), this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return this.getClass() == o.getClass();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.getClass().hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.solr.search.SolrIndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class NormValueSource extends ValueSource {
|
||||
protected String field;
|
||||
public NormValueSource(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
public String name() {
|
||||
return "norm";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + '(' + field + ')';
|
||||
}
|
||||
|
||||
@Override
|
||||
public void createWeight(Map context, Searcher searcher) throws IOException {
|
||||
context.put("searcher",searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
Searcher searcher = (Searcher)context.get("searcher");
|
||||
final Similarity similarity = searcher.getSimilarity();
|
||||
final byte[] norms = reader.norms(field);
|
||||
if (norms == null) {
|
||||
return new ConstDoubleDocValues(0.0, this);
|
||||
}
|
||||
|
||||
return new FloatDocValues(this) {
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return similarity.decodeNormValue(norms[doc]);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this.getClass() != o.getClass()) return false;
|
||||
return this.field.equals(((NormValueSource)o).field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.getClass().hashCode() + field.hashCode();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.solr.search.SolrIndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class NumDocsValueSource extends ValueSource {
|
||||
public String name() {
|
||||
return "numdocs";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return name() + "()";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
// Searcher has no numdocs so we must use the reader instead
|
||||
SolrIndexReader topReader = (SolrIndexReader)reader;
|
||||
while (topReader.getParent() != null) topReader = topReader.getParent();
|
||||
return new ConstIntDocValues(topReader.numDocs(), this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return this.getClass() == o.getClass();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.getClass().hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,93 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class TFValueSource extends TermFreqValueSource {
|
||||
public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "tf";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
// use MultiFields, just in case someone did a top() function
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
final Terms terms = fields.terms(field);
|
||||
final Similarity similarity = ((Searcher)context.get("searcher")).getSimilarity();
|
||||
|
||||
return new FloatDocValues(this) {
|
||||
DocsEnum docs ;
|
||||
int atDoc;
|
||||
int lastDocRequested = -1;
|
||||
|
||||
{ reset(); }
|
||||
|
||||
public void reset() throws IOException {
|
||||
// no one should call us for deleted docs?
|
||||
docs = terms.docs(null, indexedBytes, null);
|
||||
if (docs == null) {
|
||||
docs = new DocsEnum() {
|
||||
@Override
|
||||
public int freq() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
};
|
||||
}
|
||||
atDoc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
try {
|
||||
if (doc < lastDocRequested) {
|
||||
// out-of-order access.... reset
|
||||
reset();
|
||||
}
|
||||
lastDocRequested = doc;
|
||||
|
||||
if (atDoc < doc) {
|
||||
atDoc = docs.advance(doc);
|
||||
}
|
||||
|
||||
if (atDoc > doc) {
|
||||
// term doesn't match this document... either because we hit the
|
||||
// end, or because the next doc is after this doc.
|
||||
return similarity.tf(0);
|
||||
}
|
||||
|
||||
// a match!
|
||||
return similarity.tf(docs.freq());
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "caught exception in function "+description()+" : doc="+doc, e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class TermFreqValueSource extends DocFreqValueSource {
|
||||
public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
|
||||
super(field, val, indexedField, indexedBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "termfreq";
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(Map context, IndexReader reader) throws IOException {
|
||||
// use MultiFields, just in case someone did a top() function
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
final Terms terms = fields.terms(field);
|
||||
|
||||
return new IntDocValues(this) {
|
||||
DocsEnum docs ;
|
||||
int atDoc;
|
||||
int lastDocRequested = -1;
|
||||
|
||||
{ reset(); }
|
||||
|
||||
public void reset() throws IOException {
|
||||
// no one should call us for deleted docs?
|
||||
docs = terms.docs(null, indexedBytes, null);
|
||||
if (docs == null) {
|
||||
docs = new DocsEnum() {
|
||||
@Override
|
||||
public int freq() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return DocIdSetIterator.NO_MORE_DOCS;
|
||||
}
|
||||
};
|
||||
}
|
||||
atDoc = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
try {
|
||||
if (doc < lastDocRequested) {
|
||||
// out-of-order access.... reset
|
||||
reset();
|
||||
}
|
||||
lastDocRequested = doc;
|
||||
|
||||
if (atDoc < doc) {
|
||||
atDoc = docs.advance(doc);
|
||||
}
|
||||
|
||||
if (atDoc > doc) {
|
||||
// term doesn't match this document... either because we hit the
|
||||
// end, or because the next doc is after this doc.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// a match!
|
||||
return docs.freq();
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "caught exception in function "+description()+" : doc="+doc, e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -17,7 +17,9 @@
|
|||
|
||||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -274,15 +276,30 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
clearIndex();
|
||||
|
||||
assertU(adoc("id","1", "a_tdt","2009-08-31T12:10:10.123Z", "b_tdt","2009-08-31T12:10:10.124Z"));
|
||||
assertU(adoc("id","2"));
|
||||
assertU(adoc("id","2", "a_t","how now brown cow"));
|
||||
assertU(commit()); // create more than one segment
|
||||
assertU(adoc("id","3"));
|
||||
assertU(adoc("id","3", "a_t","brown cow"));
|
||||
assertU(adoc("id","4"));
|
||||
assertU(commit()); // create more than one segment
|
||||
assertU(adoc("id","5"));
|
||||
assertU(adoc("id","6"));
|
||||
assertU(adoc("id","6", "a_t","cow cow cow cow cow"));
|
||||
assertU(commit());
|
||||
|
||||
// test relevancy functions
|
||||
assertQ(req("fl","*,score","q", "{!func}numdocs()", "fq","id:6"), "//float[@name='score']='6.0'");
|
||||
assertQ(req("fl","*,score","q", "{!func}maxdoc()", "fq","id:6"), "//float[@name='score']='6.0'");
|
||||
assertQ(req("fl","*,score","q", "{!func}docfreq(a_t,cow)", "fq","id:6"), "//float[@name='score']='3.0'");
|
||||
assertQ(req("fl","*,score","q", "{!func}docfreq('a_t','cow')", "fq","id:6"), "//float[@name='score']='3.0'");
|
||||
assertQ(req("fl","*,score","q", "{!func}docfreq($field,$value)", "fq","id:6", "field","a_t", "value","cow"), "//float[@name='score']='3.0'");
|
||||
assertQ(req("fl","*,score","q", "{!func}termfreq(a_t,cow)", "fq","id:6"), "//float[@name='score']='5.0'");
|
||||
Similarity similarity = new DefaultSimilarity();
|
||||
assertQ(req("fl","*,score","q", "{!func}idf(a_t,cow)", "fq","id:6"),
|
||||
"//float[@name='score']='" + similarity.idf(3,6) + "'");
|
||||
assertQ(req("fl","*,score","q", "{!func}tf(a_t,cow)", "fq","id:6"),
|
||||
"//float[@name='score']='" + similarity.tf(5) + "'");
|
||||
assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"),
|
||||
"//float[@name='score']='" + similarity.lengthNorm("a_t",4) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
|
||||
|
||||
// test that ord and rord are working on a global index basis, not just
|
||||
// at the segment level (since Lucene 2.9 has switched to per-segment searching)
|
||||
assertQ(req("fl","*,score","q", "{!func}ord(id)", "fq","id:6"), "//float[@name='score']='6.0'");
|
||||
|
|
Loading…
Reference in New Issue