Remove unnecessary fields loop from extractWeightedSpanTerms() (#12965)

2024-01-08 22:01:56 +01:00 · 2024-01-08 22:01:56 +01:00 · a32f6acadf
parent 376bd24693
commit a32f6acadf
1 changed files with 43 additions and 73 deletions
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@ -100,8 +100,8 @@ import org.apache.lucene.util.IOUtils;
 public class WeightedSpanTermExtractor {
  private String fieldName;
-  private TokenStream tokenStream; // set subsequent to getWeightedSpanTerms* methods
+  private TokenStream tokenStream; // set after getWeightedSpanTerms* methods
-  private String defaultField;
+  private final String defaultField;
  private boolean expandMultiTermQuery;
  private boolean cachedTokenStream;
  private boolean wrapToCaching = true;
@ -244,7 +244,6 @@ public class WeightedSpanTermExtractor {
          && (!expandMultiTermQuery || !fieldNameComparator(((MultiTermQuery) query).getField()))) {
        return;
      }
      Query origQuery = query;
      final IndexReader reader = getLeafContext().reader();
      Query rewritten;
      if (query instanceof MultiTermQuery) {
@ -252,12 +251,11 @@ public class WeightedSpanTermExtractor {
            MultiTermQuery.SCORING_BOOLEAN_REWRITE.rewrite(
                new IndexSearcher(reader), (MultiTermQuery) query);
      } else {
-        rewritten = origQuery.rewrite(new IndexSearcher(reader));
+        rewritten = query.rewrite(new IndexSearcher(reader));
      }
-      if (rewritten != origQuery) {
+      if (rewritten != query) {
        // only rewrite once and then flatten again - the rewritten query could have a special
-        // treatment
+        // treatment if this method is overwritten in a subclass or above in the next recursion
        // if this method is overwritten in a subclass or above in the next recursion
        extract(rewritten, boost, terms);
      } else {
        extractUnknownQuery(query, terms);
@ -293,67 +291,49 @@ public class WeightedSpanTermExtractor {
   */
  protected void extractWeightedSpanTerms(
      Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery, float boost) throws IOException {
    Set<String> fieldNames;
-    if (fieldName == null) {
+    Set<String> queryFieldNames = new HashSet<>();
-      fieldNames = new HashSet<>();
+    collectSpanQueryFields(spanQuery, queryFieldNames);
-      collectSpanQueryFields(spanQuery, fieldNames);
+    if (fieldName != null
-    } else {
+        && queryFieldNames.contains(fieldName) == false
-      fieldNames = new HashSet<>(1);
+        && (defaultField == null || queryFieldNames.contains(defaultField) == false)) {
-      fieldNames.add(fieldName);
+      return;
    }
    // To support the use of the default field name
    if (defaultField != null) {
      fieldNames.add(defaultField);
    }
    Map<String, SpanQuery> queries = new HashMap<>();
    Set<Term> nonWeightedTerms = new HashSet<>();
    final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
    final IndexSearcher searcher = new IndexSearcher(getLeafContext());
    searcher.setQueryCache(null);
-    if (mustRewriteQuery) {
+    final SpanQuery query = mustRewriteQuery ? (SpanQuery) searcher.rewrite(spanQuery) : spanQuery;
-      final SpanQuery rewrittenQuery = (SpanQuery) searcher.rewrite(spanQuery);
+
-      for (final String field : fieldNames) {
+    final Set<Term> nonWeightedTerms = new HashSet<>();
-        queries.put(field, rewrittenQuery);
+    query.visit(QueryVisitor.termCollector(nonWeightedTerms));
-      }
+    if (nonWeightedTerms.isEmpty()) {
-      rewrittenQuery.visit(QueryVisitor.termCollector(nonWeightedTerms));
+      return;
    } else {
      spanQuery.visit(QueryVisitor.termCollector(nonWeightedTerms));
    }
-    List<PositionSpan> spanPositions = new ArrayList<>();
+    final List<PositionSpan> spanPositions = new ArrayList<>();
-    for (final String field : fieldNames) {
+    LeafReaderContext context = getLeafContext();
-      final SpanQuery q;
+    SpanWeight w =
-      if (mustRewriteQuery) {
+        (SpanWeight)
-        q = queries.get(field);
+            searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1);
-      } else {
+    final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
-        q = spanQuery;
+    if (spans == null) {
-      }
+      return;
-      LeafReaderContext context = getLeafContext();
+    }
      SpanWeight w =
          (SpanWeight) searcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE_NO_SCORES, 1);
      Bits acceptDocs = context.reader().getLiveDocs();
      final Spans spans = w.getSpans(context, SpanWeight.Postings.POSITIONS);
      if (spans == null) {
        return;
      }
-      // collect span positions
+    final Bits acceptDocs = context.reader().getLiveDocs();
-      while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
+    // collect span positions
-        if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
+    while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
-          continue;
+      if (acceptDocs != null && acceptDocs.get(spans.docID()) == false) {
-        }
+        continue;
-        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
+      }
-          spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
+      while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
-        }
+        spanPositions.add(new PositionSpan(spans.startPosition(), spans.endPosition() - 1));
      }
    }
-    if (spanPositions.size() == 0) {
+    if (spanPositions.isEmpty()) {
      // no spans found
      return;
    }
@ -401,11 +381,9 @@ public class WeightedSpanTermExtractor {
  /** Necessary to implement matches for queries against <code>defaultField</code> */
  protected boolean fieldNameComparator(String fieldNameToCheck) {
-    boolean rv =
+    return fieldName == null
-        fieldName == null
+        || fieldName.equals(fieldNameToCheck)
-            || fieldName.equals(fieldNameToCheck)
+        || (defaultField != null && defaultField.equals(fieldNameToCheck));
            || (defaultField != null && defaultField.equals(fieldNameToCheck));
    return rv;
  }
  protected LeafReaderContext getLeafContext() throws IOException {
@ -555,11 +533,7 @@ public class WeightedSpanTermExtractor {
  public Map<String, WeightedSpanTerm> getWeightedSpanTermsWithScores(
      Query query, float boost, TokenStream tokenStream, String fieldName, IndexReader reader)
      throws IOException {
-    if (fieldName != null) {
+    this.fieldName = fieldName;
      this.fieldName = fieldName;
    } else {
      this.fieldName = null;
    }
    this.tokenStream = tokenStream;
    Map<String, WeightedSpanTerm> terms = new PositionCheckingMap<>();
@ -640,7 +614,6 @@ public class WeightedSpanTermExtractor {
   * This class makes sure that if both position sensitive and insensitive versions of the same term
   * are added, the position insensitive one wins.
   */
  @SuppressWarnings("serial")
  protected static class PositionCheckingMap<K> extends HashMap<K, WeightedSpanTerm> {
    @Override
@ -650,15 +623,12 @@ public class WeightedSpanTermExtractor {
    }
    @Override
-    public WeightedSpanTerm put(K key, WeightedSpanTerm value) {
+    public WeightedSpanTerm put(K key, WeightedSpanTerm newTerm) {
-      WeightedSpanTerm prev = super.put(key, value);
+      WeightedSpanTerm prevTerm = super.put(key, newTerm);
-      if (prev == null) return prev;
+      if (prevTerm != null && prevTerm.positionSensitive == false) {
      WeightedSpanTerm prevTerm = prev;
      WeightedSpanTerm newTerm = value;
      if (!prevTerm.positionSensitive) {
        newTerm.positionSensitive = false;
      }
-      return prev;
+      return prevTerm;
    }
  }