mirror of https://github.com/apache/lucene.git
LUCENE-3267: Consolidated Lucene core's function impls into new module. Removed remainder
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1141747 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eefdc42de9
commit
08b8b9b789
|
@ -364,3 +364,21 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
|
|||
for applications that rely on Lucene's internal document ID
|
||||
assigment. If so, you should instead use LogByteSize/DocMergePolicy
|
||||
during indexing.
|
||||
|
||||
* LUCENE-2883: Lucene's o.a.l.search.function ValueSource based functionality, was consolidated
|
||||
into module/queries along with Solr's similar functionality. The following classes were moved:
|
||||
- o.a.l.search.function.CustomScoreQuery -> o.a.l.queries.CustomScoreQuery
|
||||
- o.a.l.search.function.CustomScoreProvider -> o.a.l.queries.CustomScoreProvider
|
||||
- o.a.l.search.function.NumericIndexDocValueSource -> o.a.l.queries.function.valuesource.NumericIndexDocValueSource
|
||||
The following lists the replacement classes for those removed:
|
||||
- o.a.l.search.function.ByteFieldSource -> o.a.l.queries.function.valuesource.ByteFieldSource
|
||||
- o.a.l.search.function.DocValues -> o.a.l.queries.function.DocValues
|
||||
- o.a.l.search.function.FieldCacheSource -> o.a.l.queries.function.valuesource.FieldCacheSource
|
||||
- o.a.l.search.function.FieldScoreQuery ->o.a.l.queries.function.FunctionQuery
|
||||
- o.a.l.search.function.FloatFieldSource -> o.a.l.queries.function.valuesource.FloatFieldSource
|
||||
- o.a.l.search.function.IntFieldSource -> o.a.l.queries.function.valuesource.IntFieldSource
|
||||
- o.a.l.search.function.OrdFieldSource -> o.a.l.queries.function.valuesource.OrdFieldSource
|
||||
- o.a.l.search.function.ReverseOrdFieldSource -> o.a.l.queries.function.valuesource.ReverseOrdFieldSource
|
||||
- o.a.l.search.function.ShortFieldSource -> o.a.l.queries.function.valuesource.ShortFieldSource
|
||||
- o.a.l.search.function.ValueSource -> o.a.l.queries.function.ValueSource
|
||||
- o.a.l.search.function.ValueSourceQuery -> o.a.l.queries.function.FunctionQuery
|
||||
|
|
|
@ -35,8 +35,6 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.function.FieldScoreQuery;
|
||||
import org.apache.lucene.search.function.FieldScoreQuery.Type;
|
||||
import org.apache.lucene.spatial.DistanceUtils;
|
||||
import org.apache.lucene.spatial.geohash.GeoHashUtils;
|
||||
import org.apache.lucene.spatial.geometry.DistanceUnits;
|
||||
|
|
|
@ -1,115 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains single byte field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getBytes()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
public class ByteFieldSource extends FieldCacheSource {
|
||||
private FieldCache.ByteParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached byte field source with default string-to-byte parser.
|
||||
*/
|
||||
public ByteFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached byte field source with a specific string-to-byte parser.
|
||||
*/
|
||||
public ByteFieldSource(String field, FieldCache.ByteParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "byte(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final byte[] arr = cache.getBytes(reader, field, parser);
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
@Override
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != ByteFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
ByteFieldSource other = (ByteFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
@Override
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Byte.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,187 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
* Expert: represents field values as different types.
|
||||
* Normally created via a
|
||||
* {@link org.apache.lucene.search.function.ValueSource ValueSuorce}
|
||||
* for a particular field and reader.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
*
|
||||
*/
|
||||
public abstract class DocValues {
|
||||
/*
|
||||
* DocValues is distinct from ValueSource because
|
||||
* there needs to be an object created at query evaluation time that
|
||||
* is not referenced by the query itself because:
|
||||
* - Query objects should be MT safe
|
||||
* - For caching, Query objects are often used as keys... you don't
|
||||
* want the Query carrying around big objects
|
||||
*/
|
||||
|
||||
/**
|
||||
* Return doc value as a float.
|
||||
* <P>Mandatory: every DocValues implementation must implement at least this method.
|
||||
* @param doc document whose float value is requested.
|
||||
*/
|
||||
public abstract float floatVal(int doc);
|
||||
|
||||
/**
|
||||
* Return doc value as an int.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose int value is requested.
|
||||
*/
|
||||
public int intVal(int doc) {
|
||||
return (int) floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a long.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose long value is requested.
|
||||
*/
|
||||
public long longVal(int doc) {
|
||||
return (long) floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a double.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose double value is requested.
|
||||
*/
|
||||
public double doubleVal(int doc) {
|
||||
return floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a string.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose string value is requested.
|
||||
*/
|
||||
public String strVal(int doc) {
|
||||
return Float.toString(floatVal(doc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of a doc value, as required for Explanations.
|
||||
*/
|
||||
public abstract String toString(int doc);
|
||||
|
||||
/**
|
||||
* Explain the scoring value for the input doc.
|
||||
*/
|
||||
public Explanation explain(int doc) {
|
||||
return new Explanation(floatVal(doc), toString(doc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: for test purposes only, return the inner array of values, or null if not applicable.
|
||||
* <p>
|
||||
* Allows tests to verify that loaded values are:
|
||||
* <ol>
|
||||
* <li>indeed cached/reused.</li>
|
||||
* <li>stored in the expected size/type (byte/short/int/float).</li>
|
||||
* </ol>
|
||||
* Note: implementations of DocValues must override this method for
|
||||
* these test elements to be tested, Otherwise the test would not fail, just
|
||||
* print a warning.
|
||||
*/
|
||||
Object getInnerArray() {
|
||||
throw new UnsupportedOperationException("this optional method is for test purposes only");
|
||||
}
|
||||
|
||||
// --- some simple statistics on values
|
||||
private float minVal = Float.NaN;
|
||||
private float maxVal = Float.NaN;
|
||||
private float avgVal = Float.NaN;
|
||||
private boolean computed=false;
|
||||
// compute optional values
|
||||
private void compute() {
|
||||
if (computed) {
|
||||
return;
|
||||
}
|
||||
float sum = 0;
|
||||
int n = 0;
|
||||
while (true) {
|
||||
float val;
|
||||
try {
|
||||
val = floatVal(n);
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
break;
|
||||
}
|
||||
sum += val;
|
||||
minVal = Float.isNaN(minVal) ? val : Math.min(minVal, val);
|
||||
maxVal = Float.isNaN(maxVal) ? val : Math.max(maxVal, val);
|
||||
++n;
|
||||
}
|
||||
|
||||
avgVal = n == 0 ? Float.NaN : sum / n;
|
||||
computed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the minimum of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value.
|
||||
* <p>
|
||||
* This operation is optional
|
||||
* </p>
|
||||
*
|
||||
* @return the minimum of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value.
|
||||
*/
|
||||
public float getMinValue() {
|
||||
compute();
|
||||
return minVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value.
|
||||
* <p>
|
||||
* This operation is optional
|
||||
* </p>
|
||||
*
|
||||
* @return the maximum of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value.
|
||||
*/
|
||||
public float getMaxValue() {
|
||||
compute();
|
||||
return maxVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the average of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value. *
|
||||
* <p>
|
||||
* This operation is optional
|
||||
* </p>
|
||||
*
|
||||
* @return the average of all values or <code>Float.NaN</code> if this
|
||||
* DocValues instance does not contain any value
|
||||
*/
|
||||
public float getAverageValue() {
|
||||
compute();
|
||||
return avgVal;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
/**
|
||||
* Expert: A base class for ValueSource implementations that retrieve values for
|
||||
* a single field from the {@link org.apache.lucene.search.FieldCache FieldCache}.
|
||||
* <p>
|
||||
* Fields used herein must be indexed (doesn't matter if these fields are stored or not).
|
||||
* <p>
|
||||
* It is assumed that each such indexed field is untokenized, or at least has a single token in a document.
|
||||
* For documents with multiple tokens of the same field, behavior is undefined (It is likely that current
|
||||
* code would use the value of one of these tokens, but this is not guaranteed).
|
||||
* <p>
|
||||
* Document with no tokens in this field are assigned the <code>Zero</code> value.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
public abstract class FieldCacheSource extends ValueSource {
|
||||
private String field;
|
||||
|
||||
/**
|
||||
* Create a cached field source for the input field.
|
||||
*/
|
||||
public FieldCacheSource(String field) {
|
||||
this.field=field;
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public final DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
return getCachedFieldValues(FieldCache.DEFAULT, field, context.reader);
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return cached DocValues for input field and reader.
|
||||
* @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing)
|
||||
* @param field Field for which values are required.
|
||||
* @see ValueSource
|
||||
*/
|
||||
public abstract DocValues getCachedFieldValues(FieldCache cache, String field, IndexReader reader) throws IOException;
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (!(o instanceof FieldCacheSource)) {
|
||||
return false;
|
||||
}
|
||||
FieldCacheSource other = (FieldCacheSource) o;
|
||||
return
|
||||
this.field.equals(other.field) &&
|
||||
cachedFieldSourceEquals(other);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return
|
||||
field.hashCode() +
|
||||
cachedFieldSourceHashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal.
|
||||
* @see Object#equals(java.lang.Object)
|
||||
*/
|
||||
public abstract boolean cachedFieldSourceEquals(FieldCacheSource other);
|
||||
|
||||
/**
|
||||
* Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field
|
||||
* and the cache (those are taken care of elsewhere).
|
||||
* @see Object#hashCode()
|
||||
*/
|
||||
public abstract int cachedFieldSourceHashCode();
|
||||
}
|
|
@ -1,125 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A query that scores each document as the value of the numeric input field.
|
||||
* <p>
|
||||
* The query matches all documents, and scores each document according to the numeric
|
||||
* value of that field.
|
||||
* <p>
|
||||
* It is assumed, and expected, that:
|
||||
* <ul>
|
||||
* <li>The field used here is indexed, and has exactly
|
||||
* one token in every scored document.</li>
|
||||
* <li>Best if this field is un_tokenized.</li>
|
||||
* <li>That token is parseable to the selected type.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Combining this query in a FunctionQuery allows much freedom in affecting document scores.
|
||||
* Note, that with this freedom comes responsibility: it is more than likely that the
|
||||
* default Lucene scoring is superior in quality to scoring modified as explained here.
|
||||
* However, in some cases, and certainly for research experiments, this capability may turn useful.
|
||||
* <p>
|
||||
* When constructing this query, select the appropriate type. That type should match the data stored in the
|
||||
* field. So in fact the "right" type should be selected before indexing. Type selection
|
||||
* has effect on the RAM usage:
|
||||
* <ul>
|
||||
* <li>{@link Type#BYTE} consumes 1 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#SHORT} consumes 2 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#INT} consumes 4 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#FLOAT} consumes 8 * maxDocs bytes.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <b>Caching:</b>
|
||||
* Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader.
|
||||
* To take advantage of this, it is extremely important to reuse index-readers or index-searchers,
|
||||
* otherwise, for instance if for each query a new index reader is opened, large penalties would be
|
||||
* paid for loading the field values into memory over and over again!
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FieldScoreQuery extends ValueSourceQuery {
|
||||
|
||||
/**
|
||||
* Type of score field, indicating how field values are interpreted/parsed.
|
||||
* <p>
|
||||
* The type selected at search search time should match the data stored in the field.
|
||||
* Different types have different RAM requirements:
|
||||
* <ul>
|
||||
* <li>{@link #BYTE} consumes 1 * maxDocs bytes.</li>
|
||||
* <li>{@link #SHORT} consumes 2 * maxDocs bytes.</li>
|
||||
* <li>{@link #INT} consumes 4 * maxDocs bytes.</li>
|
||||
* <li>{@link #FLOAT} consumes 8 * maxDocs bytes.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public static class Type {
|
||||
|
||||
/** field values are interpreted as numeric byte values. */
|
||||
public static final Type BYTE = new Type("byte");
|
||||
|
||||
/** field values are interpreted as numeric short values. */
|
||||
public static final Type SHORT = new Type("short");
|
||||
|
||||
/** field values are interpreted as numeric int values. */
|
||||
public static final Type INT = new Type("int");
|
||||
|
||||
/** field values are interpreted as numeric float values. */
|
||||
public static final Type FLOAT = new Type("float");
|
||||
|
||||
private String typeName;
|
||||
private Type (String name) {
|
||||
this.typeName = name;
|
||||
}
|
||||
/*(non-Javadoc) @see java.lang.Object#toString() */
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName()+"::"+typeName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
|
||||
* <p>
|
||||
* The <code>type</code> param tells how to parse the field string values into a numeric score value.
|
||||
* @param field the numeric field to be used.
|
||||
* @param type the type of the field: either
|
||||
* {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}.
|
||||
*/
|
||||
public FieldScoreQuery(String field, Type type) {
|
||||
super(getValueSource(field,type));
|
||||
}
|
||||
|
||||
// create the appropriate (cached) field value source.
|
||||
private static ValueSource getValueSource(String field, Type type) {
|
||||
if (type == Type.BYTE) {
|
||||
return new ByteFieldSource(field);
|
||||
}
|
||||
if (type == Type.SHORT) {
|
||||
return new ShortFieldSource(field);
|
||||
}
|
||||
if (type == Type.INT) {
|
||||
return new IntFieldSource(field);
|
||||
}
|
||||
if (type == Type.FLOAT) {
|
||||
return new FloatFieldSource(field);
|
||||
}
|
||||
throw new IllegalArgumentException(type+" is not a known Field Score Query Type!");
|
||||
}
|
||||
|
||||
}
|
|
@ -1,109 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains float field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getFloats()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
public class FloatFieldSource extends FieldCacheSource {
|
||||
private FieldCache.FloatParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached float field source with default string-to-float parser.
|
||||
*/
|
||||
public FloatFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached float field source with a specific string-to-float parser.
|
||||
*/
|
||||
public FloatFieldSource(String field, FieldCache.FloatParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "float(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final float[] arr = cache.getFloats(reader, field, parser);
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
@Override
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != FloatFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
FloatFieldSource other = (FloatFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
@Override
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Float.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains int field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getInts()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
public class IntFieldSource extends FieldCacheSource {
|
||||
private FieldCache.IntParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached int field source with default string-to-int parser.
|
||||
*/
|
||||
public IntFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached int field source with a specific string-to-int parser.
|
||||
*/
|
||||
public IntFieldSource(String field, FieldCache.IntParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "int(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final int[] arr = cache.getInts(reader, field, parser);
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
@Override
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != IntFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
IntFieldSource other = (IntFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
@Override
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Integer.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,137 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
/** This class wraps another ValueSource, but protects
|
||||
* against accidental double RAM usage in FieldCache when
|
||||
* a composite reader is passed to {@link #getValues}.
|
||||
*
|
||||
* <p><b>NOTE</b>: this class adds a CPU penalty to every
|
||||
* lookup, as it must resolve the incoming document to the
|
||||
* right sub-reader using a binary search.</p>
|
||||
*
|
||||
* @deprecated (4.0) This class is temporary, to ease the
|
||||
* migration to segment-based searching. Please change your
|
||||
* code to not pass composite readers to these APIs. */
|
||||
|
||||
@Deprecated
|
||||
public final class MultiValueSource extends ValueSource {
|
||||
|
||||
final ValueSource other;
|
||||
public MultiValueSource(ValueSource other) {
|
||||
this.other = other;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
// Already an atomic reader -- just delegate
|
||||
return other.getValues(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(ReaderContext context) throws IOException {
|
||||
if (context.isAtomic) {
|
||||
return getValues((AtomicReaderContext) context);
|
||||
}
|
||||
return new MultiDocValues(ReaderUtil.leaves(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return other.description();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o instanceof MultiValueSource) {
|
||||
return ((MultiValueSource) o).other.equals(other);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return 31 * other.hashCode();
|
||||
}
|
||||
|
||||
private final class MultiDocValues extends DocValues {
|
||||
|
||||
final DocValues[] docValues;
|
||||
final AtomicReaderContext[] leaves;
|
||||
|
||||
MultiDocValues(AtomicReaderContext[] leaves) throws IOException {
|
||||
this.leaves = leaves;
|
||||
docValues = new DocValues[leaves.length];
|
||||
for(int i=0;i<leaves.length;i++) {
|
||||
docValues[i] = other.getValues(leaves[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].floatVal(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].intVal(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long longVal(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].longVal(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double doubleVal(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].doubleVal(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String strVal(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].strVal(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].toString(doc-leaves[n].docBase);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(int doc) {
|
||||
final int n = ReaderUtil.subIndex(doc, leaves);
|
||||
return docValues[n].explain(doc-leaves[n].docBase);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,116 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains the ordinal of the field value from the default Lucene
|
||||
* {@link org.apache.lucene.search.FieldCache Fieldcache} using getStringIndex().
|
||||
* <p>
|
||||
* The native lucene index order is used to assign an ordinal value for each field value.
|
||||
* <p
|
||||
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
|
||||
* <p>
|
||||
* Example:
|
||||
* <br>If there were only three field values: "apple","banana","pear"
|
||||
* <br>then ord("apple")=1, ord("banana")=2, ord("pear")=3
|
||||
* <p>
|
||||
* WARNING:
|
||||
* ord() depends on the position in an index and can thus change
|
||||
* when other documents are inserted or deleted,
|
||||
* or if a MultiSearcher is used.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
|
||||
public class OrdFieldSource extends ValueSource {
|
||||
protected String field;
|
||||
|
||||
/**
|
||||
* Constructor for a certain field.
|
||||
* @param field field whose values order is used.
|
||||
*/
|
||||
public OrdFieldSource(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "ord(" + field + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
final DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader, field);
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return termsIndex.getOrd(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
|
||||
@Override
|
||||
public String strVal(int doc) {
|
||||
// the string value of the ordinal, not the string itself
|
||||
return Integer.toString(termsIndex.getOrd(doc));
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return termsIndex;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) return true;
|
||||
if (o == null) return false;
|
||||
if (o.getClass() != OrdFieldSource.class) return false;
|
||||
OrdFieldSource other = (OrdFieldSource)o;
|
||||
return this.field.equals(other.field);
|
||||
}
|
||||
|
||||
private static final int hcode = OrdFieldSource.class.hashCode();
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hcode + field.hashCode();
|
||||
}
|
||||
}
|
|
@ -1,124 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains the ordinal of the field value from the default Lucene
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache} using getStringIndex()
|
||||
* and reverses the order.
|
||||
* <p>
|
||||
* The native lucene index order is used to assign an ordinal value for each field value.
|
||||
* <p>
|
||||
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
|
||||
* <br>
|
||||
* Example of reverse ordinal (rord):
|
||||
* <br>If there were only three field values: "apple","banana","pear"
|
||||
* <br>then rord("apple")=3, rord("banana")=2, ord("pear")=1
|
||||
* <p>
|
||||
* WARNING:
|
||||
* rord() depends on the position in an index and can thus change
|
||||
* when other documents are inserted or deleted,
|
||||
* or if a MultiSearcher is used.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
|
||||
public class ReverseOrdFieldSource extends ValueSource {
|
||||
public String field;
|
||||
|
||||
/**
|
||||
* Contructor for a certain field.
|
||||
* @param field field whose values reverse order is used.
|
||||
*/
|
||||
public ReverseOrdFieldSource(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "rord("+field+')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
final FieldCache.DocTermsIndex termsIndex = FieldCache.DEFAULT.getTermsIndex(context.reader, field);
|
||||
|
||||
final int end = termsIndex.numOrd();
|
||||
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return (end - termsIndex.getOrd(doc));
|
||||
}
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
return end - termsIndex.getOrd(doc);
|
||||
}
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
|
||||
@Override
|
||||
public String strVal(int doc) {
|
||||
// the string value of the ordinal, not the string itself
|
||||
return Integer.toString(intVal(doc));
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + strVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return termsIndex;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) return true;
|
||||
if (o == null) return false;
|
||||
if (o.getClass() != ReverseOrdFieldSource.class) return false;
|
||||
ReverseOrdFieldSource other = (ReverseOrdFieldSource)o;
|
||||
return this.field.equals(other.field);
|
||||
}
|
||||
|
||||
private static final int hcode = ReverseOrdFieldSource.class.hashCode();
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hcode + field.hashCode();
|
||||
}
|
||||
}
|
|
@ -1,115 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains short field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getShorts()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* <p><b>NOTE</b>: with the switch in 2.9 to segment-based
|
||||
* searching, if {@link #getValues} is invoked with a
|
||||
* composite (multi-segment) reader, this can easily cause
|
||||
* double RAM usage for the values in the FieldCache. It's
|
||||
* best to switch your application to pass only atomic
|
||||
* (single segment) readers to this API.</p>
|
||||
*/
|
||||
public class ShortFieldSource extends FieldCacheSource {
|
||||
private FieldCache.ShortParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached short field source with default string-to-short parser.
|
||||
*/
|
||||
public ShortFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached short field source with a specific string-to-short parser.
|
||||
*/
|
||||
public ShortFieldSource(String field, FieldCache.ShortParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
@Override
|
||||
public String description() {
|
||||
return "short(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final short[] arr = cache.getShorts(reader, field, parser);
|
||||
return new DocValues() {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
@Override
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
@Override
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
@Override
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != ShortFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
ShortFieldSource other = (ShortFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
@Override
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Short.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.CompositeReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: source of values for basic function queries.
|
||||
* <P>At its default/simplest form, values - one per doc - are used as the score of that doc.
|
||||
* <P>Values are instantiated as
|
||||
* {@link org.apache.lucene.search.function.DocValues DocValues} for a particular reader.
|
||||
* <P>ValueSource implementations differ in RAM requirements: it would always be a factor
|
||||
* of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
*
|
||||
*/
|
||||
public abstract class ValueSource {
|
||||
|
||||
/**
|
||||
* Return the DocValues used by the function query.
|
||||
* @param context the IndexReader used to read these values.
|
||||
* If any caching is involved, that caching would also be IndexReader based.
|
||||
* @throws IOException for any error.
|
||||
*/
|
||||
public abstract DocValues getValues(AtomicReaderContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Return the DocValues used by the function query.
|
||||
* @deprecated (4.0) This method is temporary, to ease the migration to segment-based
|
||||
* searching. Please change your code to not pass {@link CompositeReaderContext} to these
|
||||
* APIs. Use {@link #getValues(IndexReader.AtomicReaderContext)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public DocValues getValues(ReaderContext context) throws IOException {
|
||||
return getValues((AtomicReaderContext) context);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* description of field, used in explain()
|
||||
*/
|
||||
public abstract String description();
|
||||
|
||||
/* (non-Javadoc) @see java.lang.Object#toString() */
|
||||
@Override
|
||||
public String toString() {
|
||||
return description();
|
||||
}
|
||||
|
||||
/**
|
||||
* Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(Object)}.
|
||||
* @see Object#equals(Object)
|
||||
*/
|
||||
@Override
|
||||
public abstract boolean equals(Object o);
|
||||
|
||||
/**
|
||||
* Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}.
|
||||
* @see Object#hashCode()
|
||||
*/
|
||||
@Override
|
||||
public abstract int hashCode();
|
||||
|
||||
}
|
|
@ -1,205 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Expert: A Query that sets the scores of document to the
|
||||
* values obtained from a {@link org.apache.lucene.search.function.ValueSource ValueSource}.
|
||||
* <p>
|
||||
* This query provides a score for <em>each and every</em> undeleted document in the index.
|
||||
* <p>
|
||||
* The value source can be based on a (cached) value of an indexed field, but it
|
||||
* can also be based on an external source, e.g. values read from an external database.
|
||||
* <p>
|
||||
* Score is set as: Score(doc,query) = query.getBoost()<sup>2</sup> * valueSource(doc).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ValueSourceQuery extends Query {
|
||||
ValueSource valSrc;
|
||||
|
||||
/**
|
||||
* Create a value source query
|
||||
* @param valSrc provides the values defines the function to be used for scoring
|
||||
*/
|
||||
public ValueSourceQuery(ValueSource valSrc) {
|
||||
this.valSrc=valSrc;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) */
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
return this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#extractTerms(Set) */
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
// no terms involved here
|
||||
}
|
||||
|
||||
class ValueSourceWeight extends Weight {
|
||||
float queryNorm;
|
||||
float queryWeight;
|
||||
|
||||
public ValueSourceWeight(IndexSearcher searcher) {
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getQuery() */
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return ValueSourceQuery.this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getValue() */
|
||||
@Override
|
||||
public float getValue() {
|
||||
return queryWeight;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */
|
||||
@Override
|
||||
public float sumOfSquaredWeights() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */
|
||||
@Override
|
||||
public void normalize(float norm) {
|
||||
this.queryNorm = norm;
|
||||
queryWeight *= this.queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
|
||||
return new ValueSourceScorer(context, this);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int) */
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||
DocValues vals = valSrc.getValues(context);
|
||||
float sc = queryWeight * vals.floatVal(doc);
|
||||
|
||||
Explanation result = new ComplexExplanation(
|
||||
true, sc, ValueSourceQuery.this.toString() + ", product of:");
|
||||
|
||||
result.addDetail(vals.explain(doc));
|
||||
result.addDetail(new Explanation(getBoost(), "boost"));
|
||||
result.addDetail(new Explanation(queryNorm,"queryNorm"));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A scorer that (simply) matches all documents, and scores each document with
|
||||
* the value of the value source in effect. As an example, if the value source
|
||||
* is a (cached) field source, then value of that field in that document will
|
||||
* be used. (assuming field is indexed for this doc, with a single token.)
|
||||
*/
|
||||
private class ValueSourceScorer extends Scorer {
|
||||
private final float qWeight;
|
||||
private final DocValues vals;
|
||||
private final Bits delDocs;
|
||||
private final int maxDoc;
|
||||
private int doc = -1;
|
||||
|
||||
// constructor
|
||||
private ValueSourceScorer(AtomicReaderContext context, ValueSourceWeight w) throws IOException {
|
||||
super(w);
|
||||
final IndexReader reader = context.reader;
|
||||
qWeight = w.getValue();
|
||||
// this is when/where the values are first created.
|
||||
vals = valSrc.getValues(context);
|
||||
delDocs = reader.getDeletedDocs();
|
||||
maxDoc = reader.maxDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
doc++;
|
||||
while (delDocs != null && doc < maxDoc && delDocs.get(doc)) {
|
||||
doc++;
|
||||
}
|
||||
if (doc == maxDoc) {
|
||||
doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = target - 1;
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return qWeight * vals.floatVal(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher) {
|
||||
return new ValueSourceQuery.ValueSourceWeight(searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
return valSrc.toString() + ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
/** Returns true if <code>o</code> is equal to this. */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!super.equals(o))
|
||||
return false;
|
||||
if (getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
ValueSourceQuery other = (ValueSourceQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.valSrc.equals(other.valSrc);
|
||||
}
|
||||
|
||||
/** Returns a hash code value for this object. */
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return (getClass().hashCode() + valSrc.hashCode()) ^ Float.floatToIntBits(getBoost());
|
||||
}
|
||||
|
||||
}
|
|
@ -1,191 +0,0 @@
|
|||
<HTML>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>org.apache.lucene.search.function</TITLE>
|
||||
</HEAD>
|
||||
<BODY>
|
||||
<DIV>
|
||||
Programmatic control over documents scores.
|
||||
</DIV>
|
||||
<DIV>
|
||||
The <code>function</code> package provides tight control over documents scores.
|
||||
</DIV>
|
||||
<DIV>
|
||||
@lucene.experimental
|
||||
</DIV>
|
||||
<DIV>
|
||||
Two types of queries are available in this package:
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
<b>Custom Score queries</b> - allowing to set the score
|
||||
of a matching document as a mathematical expression over scores
|
||||
of that document by contained (sub) queries.
|
||||
</li>
|
||||
<li>
|
||||
<b>Field score queries</b> - allowing to base the score of a
|
||||
document on <b>numeric values</b> of <b>indexed fields</b>.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV> </DIV>
|
||||
<DIV>
|
||||
<b>Some possible uses of these queries:</b>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
Normalizing the document scores by values indexed in a special field -
|
||||
for instance, experimenting with a different doc length normalization.
|
||||
</li>
|
||||
<li>
|
||||
Introducing some static scoring element, to the score of a document, -
|
||||
for instance using some topological attribute of the links to/from a document.
|
||||
</li>
|
||||
<li>
|
||||
Computing the score of a matching document as an arbitrary odd function of
|
||||
its score by a certain query.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>Performance and Quality Considerations:</b>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
When scoring by values of indexed fields,
|
||||
these values are loaded into memory.
|
||||
Unlike the regular scoring, where the required information is read from
|
||||
disk as necessary, here field values are loaded once and cached by Lucene in memory
|
||||
for further use, anticipating reuse by further queries. While all this is carefully
|
||||
cached with performance in mind, it is recommended to
|
||||
use these features only when the default Lucene scoring does
|
||||
not match your "special" application needs.
|
||||
</li>
|
||||
<li>
|
||||
Use only with carefully selected fields, because in most cases,
|
||||
search quality with regular Lucene scoring
|
||||
would outperform that of scoring by field values.
|
||||
</li>
|
||||
<li>
|
||||
Values of fields used for scoring should match.
|
||||
Do not apply on a field containing arbitrary (long) text.
|
||||
Do not mix values in the same field if that field is used for scoring.
|
||||
</li>
|
||||
<li>
|
||||
Smaller (shorter) field tokens means less RAM (something always desired).
|
||||
When using <a href=FieldScoreQuery.html>FieldScoreQuery</a>,
|
||||
select the shortest <a href=FieldScoreQuery.html#Type>FieldScoreQuery.Type</a>
|
||||
that is sufficient for the used field values.
|
||||
</li>
|
||||
<li>
|
||||
Reusing IndexReaders/IndexSearchers is essential, because the caching of field tokens
|
||||
is based on an IndexReader. Whenever a new IndexReader is used, values currently in the cache
|
||||
cannot be used and new values must be loaded from disk. So replace/refresh readers/searchers in
|
||||
a controlled manner.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>History and Credits:</b>
|
||||
<ul>
|
||||
<li>
|
||||
A large part of the code of this package was originated from Yonik's FunctionQuery code that was
|
||||
imported from <a href="http://lucene.apache.org/solr">Solr</a>
|
||||
(see <a href="http://issues.apache.org/jira/browse/LUCENE-446">LUCENE-446</a>).
|
||||
</li>
|
||||
<li>
|
||||
The idea behind CustomScoreQurey is borrowed from
|
||||
the "Easily create queries that transform sub-query scores arbitrarily" contribution by Mike Klaas
|
||||
(see <a href="http://issues.apache.org/jira/browse/LUCENE-850">LUCENE-850</a>)
|
||||
though the implementation and API here are different.
|
||||
</li>
|
||||
</ul>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>Code sample:</b>
|
||||
<P>
|
||||
Note: code snippets here should work, but they were never really compiled... so,
|
||||
tests sources under TestCustomScoreQuery, TestFieldScoreQuery and TestOrdValues
|
||||
may also be useful.
|
||||
<ol>
|
||||
<li>
|
||||
Using field (byte) values to as scores:
|
||||
<p>
|
||||
Indexing:
|
||||
<pre class="prettyprint">
|
||||
f = new Field("score", "7", Field.Store.NO, Field.Index.UN_TOKENIZED);
|
||||
f.setOmitNorms(true);
|
||||
d1.add(f);
|
||||
</pre>
|
||||
<p>
|
||||
Search:
|
||||
<pre class="prettyprint">
|
||||
Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
|
||||
</pre>
|
||||
Document d1 above would get a score of 7.
|
||||
</li>
|
||||
<p>
|
||||
<li>
|
||||
Manipulating scores
|
||||
<p>
|
||||
Dividing the original score of each document by a square root of its docid
|
||||
(just to demonstrate what it takes to manipulate scores this way)
|
||||
<pre class="prettyprint">
|
||||
Query q = queryParser.parse("my query text");
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return subQueryScore / Math.sqrt(docid);
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
For more informative debug info on the custom query, also override the name() method:
|
||||
<pre class="prettyprint">
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return subQueryScore / Math.sqrt(docid);
|
||||
}
|
||||
public String name() {
|
||||
return "1/sqrt(docid)";
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
Taking the square root of the original score and multiplying it by a "short field driven score", ie, the
|
||||
short value that was indexed for the scored doc in a certain field:
|
||||
<pre class="prettyprint">
|
||||
Query q = queryParser.parse("my query text");
|
||||
FieldScoreQuery qf = new FieldScoreQuery("shortScore", FieldScoreQuery.Type.SHORT);
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q,qf) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return Math.sqrt(subQueryScore) * valSrcScore;
|
||||
}
|
||||
public String name() {
|
||||
return "shortVal*sqrt(score)";
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
</BODY>
|
||||
</HTML>
|
|
@ -1,166 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Ignore;
|
||||
|
||||
/**
|
||||
* Setup for function tests
|
||||
*/
|
||||
@Ignore
|
||||
public abstract class FunctionTestSetup extends LuceneTestCase {
|
||||
|
||||
/**
|
||||
* Actual score computation order is slightly different than assumptios
|
||||
* this allows for a small amount of variation
|
||||
*/
|
||||
protected static float TEST_SCORE_TOLERANCE_DELTA = 0.001f;
|
||||
|
||||
protected static final int N_DOCS = 17; // select a primary number > 2
|
||||
|
||||
protected static final String ID_FIELD = "id";
|
||||
protected static final String TEXT_FIELD = "text";
|
||||
protected static final String INT_FIELD = "iii";
|
||||
protected static final String FLOAT_FIELD = "fff";
|
||||
|
||||
private static final String DOC_TEXT_LINES[] = {
|
||||
"Well, this is just some plain text we use for creating the ",
|
||||
"test documents. It used to be a text from an online collection ",
|
||||
"devoted to first aid, but if there was there an (online) lawyers ",
|
||||
"first aid collection with legal advices, \"it\" might have quite ",
|
||||
"probably advised one not to include \"it\"'s text or the text of ",
|
||||
"any other online collection in one's code, unless one has money ",
|
||||
"that one don't need and one is happy to donate for lawyers ",
|
||||
"charity. Anyhow at some point, rechecking the usage of this text, ",
|
||||
"it became uncertain that this text is free to use, because ",
|
||||
"the web site in the disclaimer of he eBook containing that text ",
|
||||
"was not responding anymore, and at the same time, in projGut, ",
|
||||
"searching for first aid no longer found that eBook as well. ",
|
||||
"So here we are, with a perhaps much less interesting ",
|
||||
"text for the test, but oh much much safer. ",
|
||||
};
|
||||
|
||||
protected static Directory dir;
|
||||
protected static Analyzer anlzr;
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassFunctionTestSetup() throws Exception {
|
||||
dir.close();
|
||||
dir = null;
|
||||
anlzr = null;
|
||||
}
|
||||
|
||||
protected static void createIndex(boolean doMultiSegment) throws Exception {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp");
|
||||
}
|
||||
// prepare a small index with just a few documents.
|
||||
dir = newDirectory();
|
||||
anlzr = new MockAnalyzer(random);
|
||||
IndexWriterConfig iwc = newIndexWriterConfig( TEST_VERSION_CURRENT, anlzr).setMergePolicy(newLogMergePolicy());
|
||||
if (doMultiSegment) {
|
||||
iwc.setMaxBufferedDocs(_TestUtil.nextInt(random, 2, 7));
|
||||
}
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random, dir, iwc);
|
||||
iw.w.setInfoStream(VERBOSE ? System.out : null);
|
||||
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
|
||||
int remaining = N_DOCS;
|
||||
boolean done[] = new boolean[N_DOCS];
|
||||
int i = 0;
|
||||
while (remaining > 0) {
|
||||
if (done[i]) {
|
||||
throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
|
||||
}
|
||||
addDoc(iw, i);
|
||||
done[i] = true;
|
||||
i = (i + 4) % N_DOCS;
|
||||
remaining --;
|
||||
}
|
||||
if (!doMultiSegment) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp optimize");
|
||||
}
|
||||
iw.optimize();
|
||||
}
|
||||
iw.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: setUp done close");
|
||||
}
|
||||
}
|
||||
|
||||
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
|
||||
Document d = new Document();
|
||||
Fieldable f;
|
||||
int scoreAndID = i + 1;
|
||||
|
||||
f = newField(ID_FIELD, id2String(scoreAndID), Field.Store.YES, Field.Index.NOT_ANALYZED); // for debug purposes
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), Field.Store.NO, Field.Index.ANALYZED); // for regular search
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = newField(INT_FIELD, "" + scoreAndID, Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = newField(FLOAT_FIELD, scoreAndID + ".000", Field.Store.NO, Field.Index.NOT_ANALYZED); // for function scoring
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
iw.addDocument(d);
|
||||
log("added: " + d);
|
||||
}
|
||||
|
||||
// 17 --> ID00017
|
||||
protected static String id2String(int scoreAndID) {
|
||||
String s = "000000000" + scoreAndID;
|
||||
int n = ("" + N_DOCS).length() + 3;
|
||||
int k = s.length() - n;
|
||||
return "ID" + s.substring(k);
|
||||
}
|
||||
|
||||
// some text line for regular search
|
||||
private static String textLine(int docNum) {
|
||||
return DOC_TEXT_LINES[docNum % DOC_TEXT_LINES.length];
|
||||
}
|
||||
|
||||
// extract expected doc score from its ID Field: "ID7" --> 7.0
|
||||
protected static float expectedFieldScore(String docIDFieldVal) {
|
||||
return Float.parseFloat(docIDFieldVal.substring(2));
|
||||
}
|
||||
|
||||
// debug messages (change DBG to true for anything to print)
|
||||
protected static void log(Object o) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(o.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,97 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Holds all implementations of classes in the o.a.l.s.function package as a
|
||||
* back-compatibility test. It does not run any tests per-se, however if
|
||||
* someone adds a method to an interface or abstract method to an abstract
|
||||
* class, one of the implementations here will fail to compile and so we know
|
||||
* back-compat policy was violated.
|
||||
*/
|
||||
final class JustCompileSearchFunction {
|
||||
|
||||
private static final String UNSUPPORTED_MSG = "unsupported: used for back-compat testing only !";
|
||||
|
||||
static final class JustCompileDocValues extends DocValues {
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class JustCompileFieldCacheSource extends FieldCacheSource {
|
||||
|
||||
public JustCompileFieldCacheSource(String field) {
|
||||
super(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource other) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int cachedFieldSourceHashCode() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getCachedFieldValues(FieldCache cache, String field,
|
||||
IndexReader reader) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class JustCompileValueSource extends ValueSource {
|
||||
@Override
|
||||
public String description() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,113 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* DocValues TestCase
|
||||
*/
|
||||
public class TestDocValues extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testGetMinValue() {
|
||||
float[] innerArray = new float[] { 1.0f, 2.0f, -1.0f, 100.0f };
|
||||
DocValuesTestImpl docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("-1.0f is the min value in the source array", -1.0f, docValues
|
||||
.getMinValue(), 0);
|
||||
|
||||
// test with without values - NaN
|
||||
innerArray = new float[] {};
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertTrue("max is NaN - no values in inner array", Float.isNaN(docValues
|
||||
.getMinValue()));
|
||||
}
|
||||
@Test
|
||||
public void testGetMaxValue() {
|
||||
float[] innerArray = new float[] { 1.0f, 2.0f, -1.0f, 10.0f };
|
||||
DocValuesTestImpl docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("10.0f is the max value in the source array", 10.0f, docValues
|
||||
.getMaxValue(), 0);
|
||||
|
||||
innerArray = new float[] { -3.0f, -1.0f, -100.0f };
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("-1.0f is the max value in the source array", -1.0f, docValues
|
||||
.getMaxValue(), 0);
|
||||
|
||||
innerArray = new float[] { -3.0f, -1.0f, 100.0f, Float.MAX_VALUE,
|
||||
Float.MAX_VALUE - 1 };
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals(Float.MAX_VALUE + " is the max value in the source array",
|
||||
Float.MAX_VALUE, docValues.getMaxValue(), 0);
|
||||
|
||||
// test with without values - NaN
|
||||
innerArray = new float[] {};
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertTrue("max is NaN - no values in inner array", Float.isNaN(docValues
|
||||
.getMaxValue()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetAverageValue() {
|
||||
float[] innerArray = new float[] { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
DocValuesTestImpl docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("the average is 1.0f", 1.0f, docValues.getAverageValue(), 0);
|
||||
|
||||
innerArray = new float[] { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f };
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("the average is 3.5f", 3.5f, docValues.getAverageValue(), 0);
|
||||
|
||||
// test with negative values
|
||||
innerArray = new float[] { -1.0f, 2.0f };
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertEquals("the average is 0.5f", 0.5f, docValues.getAverageValue(), 0);
|
||||
|
||||
// test with without values - NaN
|
||||
innerArray = new float[] {};
|
||||
docValues = new DocValuesTestImpl(innerArray);
|
||||
assertTrue("the average is NaN - no values in inner array", Float
|
||||
.isNaN(docValues.getAverageValue()));
|
||||
}
|
||||
|
||||
static class DocValuesTestImpl extends DocValues {
|
||||
float[] innerArray;
|
||||
|
||||
DocValuesTestImpl(float[] innerArray) {
|
||||
this.innerArray = innerArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.lucene.search.function.DocValues#floatVal(int)
|
||||
*/
|
||||
@Override
|
||||
public float floatVal(int doc) {
|
||||
return innerArray[doc];
|
||||
}
|
||||
|
||||
/**
|
||||
* @see org.apache.lucene.search.function.DocValues#toString(int)
|
||||
*/
|
||||
@Override
|
||||
public String toString(int doc) {
|
||||
return Integer.toString(doc);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -1,246 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test FieldScoreQuery search.
|
||||
* <p>
|
||||
* Tests here create an index with a few documents, each having
|
||||
* an int value indexed field and a float value indexed field.
|
||||
* The values of these fields are later used for scoring.
|
||||
* <p>
|
||||
* The rank tests use Hits to verify that docs are ordered (by score) as expected.
|
||||
* <p>
|
||||
* The exact score tests use TopDocs top to verify the exact score.
|
||||
*/
|
||||
public class TestFieldScoreQuery extends FunctionTestSetup {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
createIndex(true);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankByte () throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankShort () throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankInt () throws Exception {
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankFloat () throws Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestRank(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs in expected order.
|
||||
private void doTestRank (String field, FieldScoreQuery.Type tp) throws Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
Query q = new FieldScoreQuery(field,tp);
|
||||
log("test: "+q);
|
||||
QueryUtils.check(random, q,s);
|
||||
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length);
|
||||
String prevID = "ID"+(N_DOCS+1); // greater than all ids of docs in this test
|
||||
for (int i=0; i<h.length; i++) {
|
||||
String resID = s.doc(h[i].doc).get(ID_FIELD);
|
||||
log(i+". score="+h[i].score+" - "+resID);
|
||||
log(s.explain(q,h[i].doc));
|
||||
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
|
||||
prevID = resID;
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreByte () throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreShort () throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreInt () throws Exception {
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreFloat () throws Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestExactScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs with expected score.
|
||||
private void doTestExactScore (String field, FieldScoreQuery.Type tp) throws Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
Query q = new FieldScoreQuery(field,tp);
|
||||
TopDocs td = s.search(q,null,1000);
|
||||
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
for (ScoreDoc aSd : sd) {
|
||||
float score = aSd.score;
|
||||
log(s.explain(q, aSd.doc));
|
||||
String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
|
||||
float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
|
||||
assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
@Test
|
||||
public void testCachingByte () throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
@Test
|
||||
public void testCachingShort () throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
@Test
|
||||
public void testCachingInt () throws Exception {
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
@Test
|
||||
public void testCachingFloat () throws Exception {
|
||||
// INT field values can be parsed as float
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestCaching(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
|
||||
private void doTestCaching (String field, FieldScoreQuery.Type tp) throws Exception {
|
||||
// prepare expected array types for comparison
|
||||
HashMap<FieldScoreQuery.Type,Object> expectedArrayTypes = new HashMap<FieldScoreQuery.Type,Object>();
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.BYTE, new byte[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.SHORT, new short[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.INT, new int[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.FLOAT, new float[0]);
|
||||
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
Object[] innerArray = new Object[s.getIndexReader().getSequentialSubReaders().length];
|
||||
|
||||
boolean warned = false; // print warning once.
|
||||
for (int i=0; i<10; i++) {
|
||||
FieldScoreQuery q = new FieldScoreQuery(field,tp);
|
||||
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length);
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
|
||||
for (int j = 0; j < leaves.length; j++) {
|
||||
AtomicReaderContext leaf = leaves[j];
|
||||
try {
|
||||
if (i == 0) {
|
||||
innerArray[j] = q.valSrc.getValues(leaf).getInnerArray();
|
||||
log(i + ". compare: " + innerArray[j].getClass() + " to "
|
||||
+ expectedArrayTypes.get(tp).getClass());
|
||||
assertEquals(
|
||||
"field values should be cached in the correct array type!",
|
||||
innerArray[j].getClass(), expectedArrayTypes.get(tp).getClass());
|
||||
} else {
|
||||
log(i + ". compare: " + innerArray[j] + " to "
|
||||
+ q.valSrc.getValues(leaf).getInnerArray());
|
||||
assertSame("field values should be cached and reused!", innerArray[j],
|
||||
q.valSrc.getValues(leaf).getInnerArray());
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
if (!warned) {
|
||||
System.err.println("WARNING: " + testName()
|
||||
+ " cannot fully test values of " + q);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s.close();
|
||||
// verify new values are reloaded (not reused) for a new reader
|
||||
s = new IndexSearcher(dir, true);
|
||||
FieldScoreQuery q = new FieldScoreQuery(field,tp);
|
||||
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length);
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
|
||||
for (int j = 0; j < leaves.length; j++) {
|
||||
AtomicReaderContext leaf = leaves[j];
|
||||
try {
|
||||
log("compare: " + innerArray + " to "
|
||||
+ q.valSrc.getValues(leaf).getInnerArray());
|
||||
assertNotSame(
|
||||
"cached field values should not be reused if reader as changed!",
|
||||
innerArray, q.valSrc.getValues(leaf).getInnerArray());
|
||||
} catch (UnsupportedOperationException e) {
|
||||
if (!warned) {
|
||||
System.err.println("WARNING: " + testName()
|
||||
+ " cannot fully test values of " + q);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
private String testName() {
|
||||
return getClass().getName()+"."+ getName();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.document.*;
|
||||
|
||||
public class TestValueSource extends LuceneTestCase {
|
||||
|
||||
public void testMultiValueSource() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
||||
((LogMergePolicy) w.getConfig().getMergePolicy()).setMergeFactor(10);
|
||||
Document doc = new Document();
|
||||
Field f = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED);
|
||||
doc.add(f);
|
||||
|
||||
for(int i=0;i<17;i++) {
|
||||
f.setValue(""+i);
|
||||
w.addDocument(doc);
|
||||
w.commit();
|
||||
}
|
||||
|
||||
IndexReader r = IndexReader.open(w, true);
|
||||
w.close();
|
||||
|
||||
assertTrue("reader=" + r, r.getSequentialSubReaders().length > 1);
|
||||
|
||||
ValueSource s1 = new IntFieldSource("field");
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext());
|
||||
DocValues v1 = null;
|
||||
DocValues v2 = new MultiValueSource(s1).getValues(r.getTopReaderContext());
|
||||
int leafOrd = -1;
|
||||
for(int i=0;i<r.maxDoc();i++) {
|
||||
int subIndex = ReaderUtil.subIndex(i, leaves);
|
||||
if (subIndex != leafOrd) {
|
||||
leafOrd = subIndex;
|
||||
v1 = s1.getValues(leaves[leafOrd]);
|
||||
}
|
||||
assertEquals(v1.intVal(i - leaves[leafOrd].docBase), i);
|
||||
assertEquals(v2.intVal(i), i);
|
||||
}
|
||||
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.search.function;
|
||||
package org.apache.lucene.queries.function.valuesource;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -17,10 +17,13 @@ package org.apache.lucene.search.function;
|
|||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.queries.function.DocValues;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
|
||||
/**
|
||||
* Expert: obtains numeric field values from a {@link IndexDocValues} field.
|
||||
|
@ -39,13 +42,14 @@ public class NumericIndexDocValueSource extends ValueSource {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocValues getValues(AtomicReaderContext context) throws IOException {
|
||||
final IndexDocValues.Source source = context.reader.docValues(field)
|
||||
public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
|
||||
final IndexDocValues.Source source = readerContext.reader.docValues(field)
|
||||
.getSource();
|
||||
ValueType type = source.type();
|
||||
switch (type) {
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
// TODO (chrism) Change to use FloatDocValues and IntDocValues
|
||||
return new DocValues() {
|
||||
|
||||
@Override
|
|
@ -110,7 +110,7 @@ public class OrdFieldSource extends ValueSource {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return o.getClass() == OrdFieldSource.class && this.field.equals(((OrdFieldSource)o).field);
|
||||
return o != null && o.getClass() == OrdFieldSource.class && this.field.equals(((OrdFieldSource)o).field);
|
||||
}
|
||||
|
||||
private static final int hcode = OrdFieldSource.class.hashCode();
|
||||
|
|
|
@ -79,7 +79,7 @@ public class ReverseOrdFieldSource extends ValueSource {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o.getClass() != ReverseOrdFieldSource.class) return false;
|
||||
if (o == null || (o.getClass() != ReverseOrdFieldSource.class)) return false;
|
||||
ReverseOrdFieldSource other = (ReverseOrdFieldSource)o;
|
||||
return this.field.equals(other.field);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.queries;
|
|||
|
||||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.FunctionTestSetup;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
|
@ -51,11 +52,8 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
@Test
|
||||
public void testCustomScoreByte() throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
ByteValuesCreator valuesCreator = new ByteValuesCreator(INT_FIELD, null, CachedArrayCreator.CACHE_VALUES_AND_BITS);
|
||||
FunctionQuery functionQuery = new FunctionQuery(new ByteFieldSource(valuesCreator));
|
||||
|
||||
doTestCustomScore(functionQuery, 1.0);
|
||||
doTestCustomScore(functionQuery, 2.0);
|
||||
doTestCustomScore(BYTE_VALUESOURCE, 1.0);
|
||||
doTestCustomScore(BYTE_VALUESOURCE, 2.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -64,11 +62,8 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
@Test
|
||||
public void testCustomScoreShort() throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
ShortValuesCreator valuesCreator = new ShortValuesCreator(INT_FIELD, null, CachedArrayCreator.CACHE_VALUES_AND_BITS);
|
||||
FunctionQuery functionQuery = new FunctionQuery(new ShortFieldSource(valuesCreator));
|
||||
|
||||
doTestCustomScore(functionQuery, 1.0);
|
||||
doTestCustomScore(functionQuery, 3.0);
|
||||
doTestCustomScore(SHORT_VALUESOURCE, 1.0);
|
||||
doTestCustomScore(SHORT_VALUESOURCE, 3.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -76,11 +71,8 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
*/
|
||||
@Test
|
||||
public void testCustomScoreInt() throws Exception {
|
||||
IntValuesCreator valuesCreator = new IntValuesCreator(INT_FIELD, null, CachedArrayCreator.CACHE_VALUES_AND_BITS);
|
||||
FunctionQuery functionQuery = new FunctionQuery(new IntFieldSource(valuesCreator));
|
||||
|
||||
doTestCustomScore(functionQuery, 1.0);
|
||||
doTestCustomScore(functionQuery, 4.0);
|
||||
doTestCustomScore(INT_VALUESOURCE, 1.0);
|
||||
doTestCustomScore(INT_VALUESOURCE, 4.0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -90,17 +82,14 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
public void testCustomScoreFloat() throws Exception {
|
||||
// INT field can be parsed as float
|
||||
FloatValuesCreator valuesCreator = new FloatValuesCreator(INT_FIELD, null, CachedArrayCreator.CACHE_VALUES_AND_BITS);
|
||||
FunctionQuery functionQuery = new FunctionQuery(new FloatFieldSource(valuesCreator));
|
||||
FloatFieldSource fieldSource = new FloatFieldSource(valuesCreator);
|
||||
|
||||
doTestCustomScore(functionQuery, 1.0);
|
||||
doTestCustomScore(functionQuery, 5.0);
|
||||
doTestCustomScore(INT_AS_FLOAT_VALUESOURCE, 1.0);
|
||||
doTestCustomScore(INT_AS_FLOAT_VALUESOURCE, 5.0);
|
||||
|
||||
// same values, but in float format
|
||||
valuesCreator = new FloatValuesCreator(FLOAT_FIELD, null, CachedArrayCreator.CACHE_VALUES_AND_BITS);
|
||||
functionQuery = new FunctionQuery(new FloatFieldSource(valuesCreator));
|
||||
|
||||
doTestCustomScore(functionQuery, 1.0);
|
||||
doTestCustomScore(functionQuery, 6.0);
|
||||
doTestCustomScore(FLOAT_VALUESOURCE, 1.0);
|
||||
doTestCustomScore(FLOAT_VALUESOURCE, 6.0);
|
||||
}
|
||||
|
||||
// must have static class otherwise serialization tests fail
|
||||
|
@ -250,7 +239,8 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs with expected score.
|
||||
private void doTestCustomScore(FunctionQuery functionQuery, double dboost) throws Exception {
|
||||
private void doTestCustomScore(ValueSource valueSource, double dboost) throws Exception {
|
||||
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
||||
float boost = (float) dboost;
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, TEXT_FIELD, anlzr);
|
||||
|
|
|
@ -7,6 +7,11 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
|
||||
import org.apache.lucene.search.cache.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -48,6 +53,14 @@ public abstract class FunctionTestSetup extends LuceneTestCase {
|
|||
protected static final String INT_FIELD = "iii";
|
||||
protected static final String FLOAT_FIELD = "fff";
|
||||
|
||||
private static final int CREATOR_FLAGS = CachedArrayCreator.CACHE_VALUES_AND_BITS;
|
||||
|
||||
protected ValueSource BYTE_VALUESOURCE = new ByteFieldSource(new ByteValuesCreator(INT_FIELD, null, CREATOR_FLAGS));
|
||||
protected ValueSource SHORT_VALUESOURCE = new ShortFieldSource(new ShortValuesCreator(INT_FIELD, null, CREATOR_FLAGS));
|
||||
protected ValueSource INT_VALUESOURCE = new IntFieldSource(new IntValuesCreator(INT_FIELD, null, CREATOR_FLAGS));
|
||||
protected ValueSource INT_AS_FLOAT_VALUESOURCE = new FloatFieldSource(new FloatValuesCreator(INT_FIELD, null, CREATOR_FLAGS));
|
||||
protected ValueSource FLOAT_VALUESOURCE = new FloatFieldSource(new FloatValuesCreator(FLOAT_FIELD, null, CREATOR_FLAGS));
|
||||
|
||||
private static final String DOC_TEXT_LINES[] = {
|
||||
"Well, this is just some plain text we use for creating the ",
|
||||
"test documents. It used to be a text from an online collection ",
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package org.apache.lucene.queries.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.queries.function.FunctionQuery;
|
||||
import org.apache.lucene.queries.function.ValueSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ByteFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.FloatFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.IntFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ShortFieldSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.cache.*;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Test FieldScoreQuery search.
|
||||
* <p>
|
||||
* Tests here create an index with a few documents, each having
|
||||
* an int value indexed field and a float value indexed field.
|
||||
* The values of these fields are later used for scoring.
|
||||
* <p>
|
||||
* The rank tests use Hits to verify that docs are ordered (by score) as expected.
|
||||
* <p>
|
||||
* The exact score tests use TopDocs top to verify the exact score.
|
||||
*/
|
||||
public class TestFieldScoreQuery extends FunctionTestSetup {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
createIndex(true);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankByte () throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestRank(BYTE_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankShort () throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestRank(SHORT_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankInt () throws Exception {
|
||||
doTestRank(INT_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns docs in expected order. */
|
||||
@Test
|
||||
public void testRankFloat () throws Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestRank(INT_AS_FLOAT_VALUESOURCE);
|
||||
// same values, but in flot format
|
||||
doTestRank(FLOAT_VALUESOURCE);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs in expected order.
|
||||
private void doTestRank (ValueSource valueSource) throws Exception {
|
||||
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
log("test: "+ functionQuery);
|
||||
QueryUtils.check(random, functionQuery,s);
|
||||
ScoreDoc[] h = s.search(functionQuery, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length);
|
||||
String prevID = "ID"+(N_DOCS+1); // greater than all ids of docs in this test
|
||||
for (int i=0; i<h.length; i++) {
|
||||
String resID = s.doc(h[i].doc).get(ID_FIELD);
|
||||
log(i+". score="+h[i].score+" - "+resID);
|
||||
log(s.explain(functionQuery,h[i].doc));
|
||||
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
|
||||
prevID = resID;
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreByte () throws Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestExactScore(BYTE_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreShort () throws Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestExactScore(SHORT_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreInt () throws Exception {
|
||||
doTestExactScore(INT_VALUESOURCE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns the expected scores. */
|
||||
@Test
|
||||
public void testExactScoreFloat () throws Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestExactScore(INT_AS_FLOAT_VALUESOURCE);
|
||||
// same values, but in flot format
|
||||
doTestExactScore(FLOAT_VALUESOURCE);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs with expected score.
|
||||
private void doTestExactScore (ValueSource valueSource) throws Exception {
|
||||
FunctionQuery functionQuery = new FunctionQuery(valueSource);
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
TopDocs td = s.search(functionQuery,null,1000);
|
||||
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
for (ScoreDoc aSd : sd) {
|
||||
float score = aSd.score;
|
||||
log(s.explain(functionQuery, aSd.doc));
|
||||
String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
|
||||
float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
|
||||
assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.search.function;
|
||||
package org.apache.lucene.queries.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.search.function;
|
|||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.queries.function.valuesource.OrdFieldSource;
|
||||
import org.apache.lucene.queries.function.valuesource.ReverseOrdFieldSource;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -63,12 +65,12 @@ public class TestOrdValues extends FunctionTestSetup {
|
|||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
ValueSource vs;
|
||||
if (inOrder) {
|
||||
vs = new MultiValueSource(new OrdFieldSource(field));
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new MultiValueSource(new ReverseOrdFieldSource(field));
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
|
||||
Query q = new ValueSourceQuery(vs);
|
||||
Query q = new FunctionQuery(vs);
|
||||
log("test: " + q);
|
||||
QueryUtils.check(random, q, s);
|
||||
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
|
||||
|
@ -117,7 +119,7 @@ public class TestOrdValues extends FunctionTestSetup {
|
|||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
Query q = new ValueSourceQuery(vs);
|
||||
Query q = new FunctionQuery(vs);
|
||||
TopDocs td = s.search(q, null, 1000);
|
||||
assertEquals("All docs should be matched!", N_DOCS, td.totalHits);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
|
@ -136,125 +138,6 @@ public class TestOrdValues extends FunctionTestSetup {
|
|||
s.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test caching OrdFieldSource
|
||||
*/
|
||||
@Test
|
||||
public void testCachingOrd() throws CorruptIndexException, Exception {
|
||||
doTestCaching(ID_FIELD, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test caching for ReverseOrdFieldSource
|
||||
*/
|
||||
@Test
|
||||
public void testCachingReverseOrd() throws CorruptIndexException, Exception {
|
||||
doTestCaching(ID_FIELD, false);
|
||||
}
|
||||
|
||||
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
|
||||
private void doTestCaching(String field, boolean inOrder) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
Object innerArray = null;
|
||||
|
||||
boolean warned = false; // print warning once
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
ValueSource vs;
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
ValueSourceQuery q = new ValueSourceQuery(vs);
|
||||
ScoreDoc[] h = s.search(q, null, 1000).scoreDocs;
|
||||
try {
|
||||
assertEquals("All docs should be matched!", N_DOCS, h.length);
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
|
||||
|
||||
for (AtomicReaderContext leaf : leaves) {
|
||||
if (i == 0) {
|
||||
innerArray = q.valSrc.getValues(leaf).getInnerArray();
|
||||
} else {
|
||||
log(i + ". compare: " + innerArray + " to " + q.valSrc.getValues(leaf).getInnerArray());
|
||||
assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(leaf).getInnerArray());
|
||||
}
|
||||
}
|
||||
} catch (UnsupportedOperationException e) {
|
||||
if (!warned) {
|
||||
System.err.println("WARNING: " + testName() + " cannot fully test values of " + q);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ValueSource vs;
|
||||
ValueSourceQuery q;
|
||||
ScoreDoc[] h;
|
||||
|
||||
// verify that different values are loaded for a different field
|
||||
String field2 = INT_FIELD;
|
||||
assertFalse(field.equals(field2)); // otherwise this test is meaningless.
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field2);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field2);
|
||||
}
|
||||
q = new ValueSourceQuery(vs);
|
||||
h = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!", N_DOCS, h.length);
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(s.getTopReaderContext());
|
||||
|
||||
for (AtomicReaderContext leaf : leaves) {
|
||||
try {
|
||||
log("compare (should differ): " + innerArray + " to "
|
||||
+ q.valSrc.getValues(leaf).getInnerArray());
|
||||
assertNotSame(
|
||||
"different values should be loaded for a different field!",
|
||||
innerArray, q.valSrc.getValues(leaf).getInnerArray());
|
||||
} catch (UnsupportedOperationException e) {
|
||||
if (!warned) {
|
||||
System.err.println("WARNING: " + testName()
|
||||
+ " cannot fully test values of " + q);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
s.close();
|
||||
// verify new values are reloaded (not reused) for a new reader
|
||||
s = new IndexSearcher(dir, true);
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
q = new ValueSourceQuery(vs);
|
||||
h = s.search(q, null, 1000).scoreDocs;
|
||||
assertEquals("All docs should be matched!", N_DOCS, h.length);
|
||||
leaves = ReaderUtil.leaves(s.getTopReaderContext());
|
||||
|
||||
for (AtomicReaderContext leaf : leaves) {
|
||||
try {
|
||||
log("compare (should differ): " + innerArray + " to "
|
||||
+ q.valSrc.getValues(leaf).getInnerArray());
|
||||
assertNotSame(
|
||||
"cached field values should not be reused if reader as changed!",
|
||||
innerArray, q.valSrc.getValues(leaf).getInnerArray());
|
||||
} catch (UnsupportedOperationException e) {
|
||||
if (!warned) {
|
||||
System.err.println("WARNING: " + testName()
|
||||
+ " cannot fully test values of " + q);
|
||||
warned = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
s.close();
|
||||
}
|
||||
|
||||
private String testName() {
|
||||
return getClass().getName() + "." + getName();
|
||||
}
|
||||
|
||||
// LUCENE-1250
|
||||
public void testEqualsNull() throws Exception {
|
||||
OrdFieldSource ofs = new OrdFieldSource("f");
|
Loading…
Reference in New Issue