diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b3caabde88d..3f2b32c5468 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -39,6 +39,12 @@ Other ======================= Lucene 8.2.0 ======================= +New Features + +* LUCENE-8803: Provide a FeatureSortfield to allow sorting search hits by descending value of a + feature. This is exposed via the factory method FeatureField#newFeatureSort. + (Colin Goodheart-Smithe via Adrien Grand) + Bug Fixes * LUCENE-8785: Ensure new threadstates are locked before retrieving the number of active threadstates. diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java index 33255638e6b..d060829463f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java +++ b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java @@ -30,7 +30,9 @@ import org.apache.lucene.index.TermStates; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; @@ -196,7 +198,7 @@ public final class FeatureField extends Field { private static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15; - private static float decodeFeatureValue(float freq) { + static float decodeFeatureValue(float freq) { if (freq > MAX_FREQ) { // This is never used in practice but callers of the SimScorer API might // occasionally call it on eg. Float.MAX_VALUE to compute the max score @@ -518,4 +520,22 @@ public final class FeatureField extends Field { float avgFreq = (float) ((double) states.totalTermFreq() / states.docFreq()); return decodeFeatureValue(avgFreq); } + + /** + * Creates a SortField for sorting by the value of a feature. + *

+ * This sort orders documents by descending value of a feature. The value returned in {@link FieldDoc} for + * the hits contains a Float instance with the feature value. + *

+ * If a document is missing the field, then it is treated as having a vaue of 0.0f. + *

+ * + * @param field field name. Must not be null. + * @param featureName feature name. Must not be null. + * @return SortField ordering documents by the value of the feature + * @throws NullPointerException if {@code field} or {@code featureName} is null. + */ + public static SortField newFeatureSort(String field, String featureName) { + return new FeatureSortField(field, featureName); + } } diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java new file mode 100644 index 00000000000..1a6df723240 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.search.SimpleFieldComparator; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; + +/** + * Sorts using the value of a specified feature name from a {@link FeatureField}. + */ +final class FeatureSortField extends SortField { + + private final String featureName; + + /** + * Creates a {@link FeatureSortField} that can be used to sort hits by + * the value of a particular feature in a {@link FeatureField}. + * + * @param featureName The name of the feature to use for the sort value + */ + public FeatureSortField(String field, String featureName) { + super(Objects.requireNonNull(field), SortField.Type.CUSTOM); + this.featureName = Objects.requireNonNull(featureName); + } + + @Override + public FieldComparator getComparator(int numHits, int sortPos) { + return new FeatureComparator(numHits, getField(), featureName); + } + + @Override + public void setMissingValue(Object missingValue) { + throw new IllegalArgumentException("Missing value not supported for FeatureSortField"); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + featureName.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + FeatureSortField other = (FeatureSortField) obj; + return Objects.equals(featureName, other.featureName); + } + + @Override + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append("