diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b3caabde88d..3f2b32c5468 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -39,6 +39,12 @@ Other ======================= Lucene 8.2.0 ======================= +New Features + +* LUCENE-8803: Provide a FeatureSortfield to allow sorting search hits by descending value of a + feature. This is exposed via the factory method FeatureField#newFeatureSort. + (Colin Goodheart-Smithe via Adrien Grand) + Bug Fixes * LUCENE-8785: Ensure new threadstates are locked before retrieving the number of active threadstates. diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java index 33255638e6b..d060829463f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java +++ b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java @@ -30,7 +30,9 @@ import org.apache.lucene.index.TermStates; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Explanation; +import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.Similarity.SimScorer; @@ -196,7 +198,7 @@ public final class FeatureField extends Field { private static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15; - private static float decodeFeatureValue(float freq) { + static float decodeFeatureValue(float freq) { if (freq > MAX_FREQ) { // This is never used in practice but callers of the SimScorer API might // occasionally call it on eg. Float.MAX_VALUE to compute the max score @@ -518,4 +520,22 @@ public final class FeatureField extends Field { float avgFreq = (float) ((double) states.totalTermFreq() / states.docFreq()); return decodeFeatureValue(avgFreq); } + + /** + * Creates a SortField for sorting by the value of a feature. + *
+ * This sort orders documents by descending value of a feature. The value returned in {@link FieldDoc} for + * the hits contains a Float instance with the feature value. + *
+ * If a document is missing the field, then it is treated as having a vaue of 0.0f
.
+ *
+ *
+ * @param field field name. Must not be null.
+ * @param featureName feature name. Must not be null.
+ * @return SortField ordering documents by the value of the feature
+ * @throws NullPointerException if {@code field} or {@code featureName} is null.
+ */
+ public static SortField newFeatureSort(String field, String featureName) {
+ return new FeatureSortField(field, featureName);
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java b/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java
new file mode 100644
index 00000000000..1a6df723240
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/FeatureSortField.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.Objects;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.SimpleFieldComparator;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Sorts using the value of a specified feature name from a {@link FeatureField}.
+ */
+final class FeatureSortField extends SortField {
+
+ private final String featureName;
+
+ /**
+ * Creates a {@link FeatureSortField} that can be used to sort hits by
+ * the value of a particular feature in a {@link FeatureField}.
+ *
+ * @param featureName The name of the feature to use for the sort value
+ */
+ public FeatureSortField(String field, String featureName) {
+ super(Objects.requireNonNull(field), SortField.Type.CUSTOM);
+ this.featureName = Objects.requireNonNull(featureName);
+ }
+
+ @Override
+ public FieldComparator> getComparator(int numHits, int sortPos) {
+ return new FeatureComparator(numHits, getField(), featureName);
+ }
+
+ @Override
+ public void setMissingValue(Object missingValue) {
+ throw new IllegalArgumentException("Missing value not supported for FeatureSortField");
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + featureName.hashCode();
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (!super.equals(obj)) return false;
+ if (getClass() != obj.getClass()) return false;
+ FeatureSortField other = (FeatureSortField) obj;
+ return Objects.equals(featureName, other.featureName);
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder builder = new StringBuilder();
+ builder.append("