LUCENE-8312: Leverage impacts to speed up SynonymQuery.

2018-05-25 08:47:47 +02:00 · 2018-05-25 08:47:47 +02:00 · 0a1de2c4a5
parent 24d79de796
commit 0a1de2c4a5
10 changed files with 756 additions and 214 deletions
--- a/lucene/core/src/java/org/apache/lucene/document/FeatureQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FeatureQuery.java
@ -30,8 +30,8 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.ImpactsDISI;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MaxScoreCache;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
@ -133,9 +133,9 @@ final class FeatureQuery extends Query {
          return null;
        }
-        SimScorer scorer = function.scorer(boost);
+        final SimScorer scorer = function.scorer(boost);
-        ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
+        final ImpactsEnum impacts = termsEnum.impacts(PostingsEnum.FREQS);
-        MaxScoreCache maxScoreCache = new MaxScoreCache(impacts, scorer);
+        final ImpactsDISI impactsDisi = new ImpactsDISI(impacts, impacts, scorer);
        return new Scorer(this) {
@ -151,19 +151,23 @@ final class FeatureQuery extends Query {
          @Override
          public DocIdSetIterator iterator() {
-            return impacts;
+            return impactsDisi;
          }
          @Override
          public int advanceShallow(int target) throws IOException {
-            return maxScoreCache.advanceShallow(target);
+            return impactsDisi.advanceShallow(target);
          }
          @Override
          public float getMaxScore(int upTo) throws IOException {
-            return maxScoreCache.getMaxScore(upTo);
+            return impactsDisi.getMaxScore(upTo);
          }
          @Override
          public void setMinCompetitiveScore(float minScore) {
            impactsDisi.setMinCompetitiveScore(minScore);
          }
        };
      }
--- a/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ImpactsEnum.java
@ -16,41 +16,14 @@
 */
 package org.apache.lucene.index;
 import java.io.IOException;
 /**
 * Extension of {@link PostingsEnum} which also provides information about
 * upcoming impacts.
 * @lucene.experimental
 */
-public abstract class ImpactsEnum extends PostingsEnum {
+public abstract class ImpactsEnum extends PostingsEnum implements ImpactsSource {
  /** Sole constructor. */
  protected ImpactsEnum() {}
  /**
   * Shallow-advance to {@code target}. This is cheaper than calling
   * {@link #advance(int)} and allows further calls to {@link #getImpacts()}
   * to ignore doc IDs that are less than {@code target} in order to get more
   * precise information about impacts.
   * This method may not be called on targets that are less than the current
   * {@link #docID()}.
   * After this method has been called, {@link #nextDoc()} may not be called
   * if the current doc ID is less than {@code target - 1} and
   * {@link #advance(int)} may not be called on targets that are less than
   * {@code target}.
   */
  public abstract void advanceShallow(int target) throws IOException;
  /**
   * Get information about upcoming impacts for doc ids that are greater than
   * or equal to the maximum of {@link #docID()} and the last target that was
   * passed to {@link #advanceShallow(int)}.
   * This method may not be called on an unpositioned iterator on which
   * {@link #advanceShallow(int)} has never been called.
   * NOTE: advancing this iterator may invalidate the returned impacts, so they
   * should not be used after the iterator has been advanced.
   */
  public abstract Impacts getImpacts() throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/index/ImpactsSource.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ImpactsSource.java
@ -0,0 +1,54 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.index;
 import java.io.IOException;
 import org.apache.lucene.search.DocIdSetIterator;
 /**
 * Source of {@link Impacts}.
 * @lucene.internal
 */
 public interface ImpactsSource {
  /**
   * Shallow-advance to {@code target}. This is cheaper than calling
   * {@link DocIdSetIterator#advance(int)} and allows further calls to
   * {@link #getImpacts()} to ignore doc IDs that are less than {@code target}
   * in order to get more precise information about impacts.
   * This method may not be called on targets that are less than the current
   * {@link DocIdSetIterator#docID()}.
   * After this method has been called, {@link DocIdSetIterator#nextDoc()} may
   * not be called if the current doc ID is less than {@code target - 1} and
   * {@link DocIdSetIterator#advance(int)} may not be called on targets that
   * are less than {@code target}.
   */
  void advanceShallow(int target) throws IOException;
  /**
   * Get information about upcoming impacts for doc ids that are greater than
   * or equal to the maximum of {@link DocIdSetIterator#docID()} and the last
   * target that was passed to {@link #advanceShallow(int)}.
   * This method may not be called on an unpositioned iterator on which
   * {@link #advanceShallow(int)} has never been called.
   * NOTE: advancing this iterator may invalidate the returned impacts, so they
   * should not be used after the iterator has been advanced.
   */
  Impacts getImpacts() throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ImpactsDISI.java
@ -0,0 +1,149 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.index.Impacts;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.ImpactsSource;
 import org.apache.lucene.search.similarities.Similarity.SimScorer;
 /**
 * {@link DocIdSetIterator} that skips non-competitive docs thanks to the
 * indexed impacts. Call {@link #setMinCompetitiveScore(float)} in order to
 * give this iterator the ability to skip low-scoring documents.
 * @lucene.internal
 */
 public final class ImpactsDISI extends DocIdSetIterator {
  private final DocIdSetIterator in;
  private final ImpactsSource impactsSource;
  private final MaxScoreCache maxScoreCache;
  private final float globalMaxScore;
  private float minCompetitiveScore = 0;
  private int upTo = DocIdSetIterator.NO_MORE_DOCS;
  private float maxScore = Float.MAX_VALUE;
  /**
   * Sole constructor.
   * @param in            wrapped iterator
   * @param impactsSource source of impacts
   * @param scorer        scorer
   */
  public ImpactsDISI(DocIdSetIterator in, ImpactsSource impactsSource, SimScorer scorer) {
    this.in = in;
    this.impactsSource = impactsSource;
    this.maxScoreCache = new MaxScoreCache(impactsSource, scorer);
    this.globalMaxScore = scorer.score(Float.MAX_VALUE, 1L);
  }
  /**
   * Set the minimum competitive score.
   * @see Scorer#setMinCompetitiveScore(float)
   */
  public void setMinCompetitiveScore(float minCompetitiveScore) {
    assert minCompetitiveScore >= this.minCompetitiveScore;
    if (minCompetitiveScore > this.minCompetitiveScore) {
      this.minCompetitiveScore = minCompetitiveScore;
      // force upTo and maxScore to be recomputed so that we will skip documents
      // if the current block of documents is not competitive - only if the min
      // competitive score actually increased
      upTo = -1;
    }
  }
  /**
   * Implement the contract of {@link Scorer#advanceShallow(int)} based on the
   * wrapped {@link ImpactsEnum}.
   * @see Scorer#advanceShallow(int)
   */
  public int advanceShallow(int target) throws IOException {
    impactsSource.advanceShallow(target);
    Impacts impacts = impactsSource.getImpacts();
    return impacts.getDocIdUpTo(0);
  }
  /**
   * Implement the contract of {@link Scorer#getMaxScore(int)} based on the
   * wrapped {@link ImpactsEnum} and {@link Scorer}.
   * @see Scorer#getMaxScore(int)
   */
  public float getMaxScore(int upTo) throws IOException {
    final int level = maxScoreCache.getLevel(upTo);
    if (level == -1) {
      return globalMaxScore;
    } else {
      return maxScoreCache.getMaxScoreForLevel(level);
    }
  }
  private int advanceTarget(int target) throws IOException {
    if (target <= upTo) {
      // we are still in the current block, which is considered competitive
      // according to impacts, no skipping
      return target;
    }
    upTo = advanceShallow(target);
    maxScore = maxScoreCache.getMaxScoreForLevel(0);
    while (true) {
      assert upTo >= target;
      if (maxScore >= minCompetitiveScore) {
        return target;
      }
      if (upTo == NO_MORE_DOCS) {
        return NO_MORE_DOCS;
      }
      final int skipUpTo = maxScoreCache.getSkipUpTo(minCompetitiveScore);
      if (skipUpTo == -1) { // no further skipping
        target = upTo + 1;
      } else if (skipUpTo == NO_MORE_DOCS) {
        return NO_MORE_DOCS;
      } else {
        target = skipUpTo + 1;
      }
      upTo = advanceShallow(target);
      maxScore = maxScoreCache.getMaxScoreForLevel(0);
    }
  }
  @Override
  public int advance(int target) throws IOException {
    return in.advance(advanceTarget(target));
  }
  @Override
  public int nextDoc() throws IOException {
    return advance(in.docID() + 1);
  }
  @Override
  public int docID() {
    return in.docID();
  }
  @Override
  public long cost() {
    return in.cost();
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java
@ -22,7 +22,7 @@ import java.util.List;
 import org.apache.lucene.index.Impact;
 import org.apache.lucene.index.Impacts;
-import org.apache.lucene.index.ImpactsEnum;
+import org.apache.lucene.index.ImpactsSource;
 import org.apache.lucene.search.similarities.Similarity.SimScorer;
 import org.apache.lucene.util.ArrayUtil;
@ -30,22 +30,21 @@ import org.apache.lucene.util.ArrayUtil;
 * Compute maximum scores based on {@link Impacts} and keep them in a cache in
 * order not to run expensive similarity score computations multiple times on
 * the same data.
 * @lucene.internal
 */
-public final class MaxScoreCache {
+final class MaxScoreCache {
-  private final ImpactsEnum impactsEnum;
+  private final ImpactsSource impactsSource;
  private final SimScorer scorer;
  private final float globalMaxScore;
  private float[] maxScoreCache;
  private int[] maxScoreCacheUpTo;
  /**
   * Sole constructor.
   */
-  public MaxScoreCache(ImpactsEnum impactsEnum, SimScorer scorer) {
+  public MaxScoreCache(ImpactsSource impactsSource, SimScorer scorer) {
-    this.impactsEnum = impactsEnum;
+    this.impactsSource = impactsSource;
    this.scorer = scorer;
    globalMaxScore = scorer.score(Integer.MAX_VALUE, 1L);
    maxScoreCache = new float[0];
    maxScoreCacheUpTo = new int[0];
  }
@ -71,8 +70,8 @@ public final class MaxScoreCache {
   * Return the first level that includes all doc IDs up to {@code upTo},
   * or -1 if there is no such level.
   */
-  private int getLevel(int upTo) throws IOException {
+  int getLevel(int upTo) throws IOException {
-    final Impacts impacts = impactsEnum.getImpacts();
+    final Impacts impacts = impactsSource.getImpacts();
    for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
      final int impactsUpTo = impacts.getDocIdUpTo(level);
      if (upTo <= impactsUpTo) {
@ -86,7 +85,7 @@ public final class MaxScoreCache {
   * Return the maximum score for the given {@code level}.
   */
  float getMaxScoreForLevel(int level) throws IOException {
-    final Impacts impacts = impactsEnum.getImpacts();
+    final Impacts impacts = impactsSource.getImpacts();
    ensureCacheSize(level + 1);
    final int levelUpTo = impacts.getDocIdUpTo(level);
    if (maxScoreCacheUpTo[level] < levelUpTo) {
@ -100,8 +99,7 @@ public final class MaxScoreCache {
   * Return the maximum level at which scores are all less than {@code minScore},
   * or -1 if none.
   */
-  int getSkipLevel(float minScore) throws IOException {
+  private int getSkipLevel(Impacts impacts, float minScore) throws IOException {
    final Impacts impacts = impactsEnum.getImpacts();
    final int numLevels = impacts.numLevels();
    for (int level = 0; level < numLevels; ++level) {
      if (getMaxScoreForLevel(level) >= minScore) {
@ -112,27 +110,17 @@ public final class MaxScoreCache {
  }
  /**
-   * Implement the contract of {@link Scorer#advanceShallow(int)} based on the
+   * Return the an inclusive upper bound of documents that all have a score that
-   * wrapped {@link ImpactsEnum}.
+   * is less than {@code minScore}, or {@code -1} if the current document may
-   * @see Scorer#advanceShallow(int)
+   * be competitive.
   */
-  public int advanceShallow(int target) throws IOException {
+  int getSkipUpTo(float minScore) throws IOException {
-    impactsEnum.advanceShallow(target);
+    final Impacts impacts = impactsSource.getImpacts();
-    Impacts impacts = impactsEnum.getImpacts();
+    final int level = getSkipLevel(impacts, minScore);
-    return impacts.getDocIdUpTo(0);
+    if (level == -1) {
      return -1;
    }
    return impacts.getDocIdUpTo(level);
  }
  /**
   * Implement the contract of {@link Scorer#getMaxScore(int)} based on the
   * wrapped {@link ImpactsEnum} and {@link Scorer}.
   * @see Scorer#getMaxScore(int)
   */
  public float getMaxScore(int upTo) throws IOException {
    final int level = getLevel(upTo);
    if (level == -1) {
      return globalMaxScore;
    } else {
      return getMaxScoreForLevel(level);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java
@ -21,12 +21,19 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Objects;
 import java.util.Set;
 import org.apache.lucene.index.Impact;
 import org.apache.lucene.index.Impacts;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.ImpactsSource;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SlowImpactsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.TermStates;
@ -34,6 +41,7 @@ import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 /**
 * A query that treats multiple terms as synonyms.
@ -112,7 +120,7 @@ public final class SynonymQuery extends Query {
  @Override
  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
    if (scoreMode.needsScores()) {
-      return new SynonymWeight(this, searcher, boost);
+      return new SynonymWeight(this, searcher, scoreMode, boost);
    } else {
      // if scores are not needed, let BooleanWeight deal with optimizing that case.
      BooleanQuery.Builder bq = new BooleanQuery.Builder();
@ -127,9 +135,12 @@ public final class SynonymQuery extends Query {
    private final TermStates termStates[];
    private final Similarity similarity;
    private final Similarity.SimScorer simWeight;
    private final ScoreMode scoreMode;
-    SynonymWeight(Query query, IndexSearcher searcher, float boost) throws IOException {
+    SynonymWeight(Query query, IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
      super(query);
      assert scoreMode.needsScores();
      this.scoreMode = scoreMode;
      CollectionStatistics collectionStats = searcher.collectionStatistics(terms[0].field());
      long docFreq = 0;
      long totalTermFreq = 0;
@ -176,8 +187,7 @@ public final class SynonymQuery extends Query {
        if (newDoc == doc) {
          final float freq;
          if (scorer instanceof SynonymScorer) {
-            SynonymScorer synScorer = (SynonymScorer) scorer;
+            freq = ((SynonymScorer) scorer).freq();
            freq = synScorer.tf(synScorer.getSubMatches());
          } else {
            assert scorer instanceof TermScorer;
            freq = ((TermScorer)scorer).freq();
@ -197,61 +207,277 @@ public final class SynonymQuery extends Query {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
-      // we use termscorers + disjunction as an impl detail
+      List<PostingsEnum> iterators = new ArrayList<>();
-      List<Scorer> subScorers = new ArrayList<>();
+      List<ImpactsEnum> impacts = new ArrayList<>();
      for (int i = 0; i < terms.length; i++) {
        TermState state = termStates[i].get(context);
        if (state != null) {
          TermsEnum termsEnum = context.reader().terms(terms[i].field()).iterator();
          termsEnum.seekExact(terms[i].bytes(), state);
-          LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
+          if (scoreMode == ScoreMode.TOP_SCORES) {
-          subScorers.add(new TermScorer(this, termsEnum, ScoreMode.COMPLETE, simScorer));
+            ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
-        }
+            iterators.add(impactsEnum);
-      }
+            impacts.add(impactsEnum);
      if (subScorers.isEmpty()) {
        return null;
      } else if (subScorers.size() == 1) {
        // we must optimize this case (term not in segment), disjunctionscorer requires >= 2 subs
        return subScorers.get(0);
          } else {
-        LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
+            PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.FREQS);
-        return new SynonymScorer(simScorer, this, subScorers);
+            iterators.add(postingsEnum);
            impacts.add(new SlowImpactsEnum(postingsEnum));
          }
        }
      }
      if (iterators.isEmpty()) {
        return null;
      }
      LeafSimScorer simScorer = new LeafSimScorer(simWeight, context.reader(), terms[0].field(), true);
      // we must optimize this case (term not in segment), disjunctions require >= 2 subs
      if (iterators.size() == 1) {
        if (scoreMode == ScoreMode.TOP_SCORES) {
          return new TermScorer(this, impacts.get(0), simScorer);
        } else {
          return new TermScorer(this, iterators.get(0), simScorer);
        }
      }
      // we use termscorers + disjunction as an impl detail
      DisiPriorityQueue queue = new DisiPriorityQueue(iterators.size());
      for (PostingsEnum postings : iterators) {
        queue.add(new DisiWrapper(new TermScorer(this, postings, simScorer)));
      }
      // Even though it is called approximation, it is accurate since none of
      // the sub iterators are two-phase iterators.
      DocIdSetIterator iterator = new DisjunctionDISIApproximation(queue);
      ImpactsSource impactsSource = mergeImpacts(impacts.toArray(new ImpactsEnum[0]));
      ImpactsDISI impactsDisi = new ImpactsDISI(iterator, impactsSource, simScorer.getSimScorer());
      if (scoreMode == ScoreMode.TOP_SCORES) {
        iterator = impactsDisi;
      }
      return new SynonymScorer(this, queue, iterator, impactsDisi, simScorer);
    }
    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  }
-  static class SynonymScorer extends DisjunctionScorer {
+  /**
-    private final LeafSimScorer similarity;
+   * Merge impacts for multiple synonyms.
   */
  static ImpactsSource mergeImpacts(ImpactsEnum[] impactsEnums) {
    return new ImpactsSource() {
      class SubIterator {
        final Iterator<Impact> iterator;
        int previousFreq;
        Impact current;
        SubIterator(Iterator<Impact> iterator) {
          this.iterator = iterator;
          this.current = iterator.next();
        }
        void next() {
          previousFreq = current.freq;
          if (iterator.hasNext() == false) {
            current = null;
          } else {
            current = iterator.next();
          }
        }
    SynonymScorer(LeafSimScorer similarity, Weight weight, List<Scorer> subScorers) {
      super(weight, subScorers, true);
      this.similarity = similarity;
      }
      @Override
-    protected float score(DisiWrapper topList) throws IOException {
+      public Impacts getImpacts() throws IOException {
-      return similarity.score(topList.doc, tf(topList));
+        final Impacts[] impacts = new Impacts[impactsEnums.length];
        // Use the impacts that have the lower next boundary as a lead.
        // It will decide on the number of levels and the block boundaries.
        Impacts tmpLead = null;
        for (int i = 0; i < impactsEnums.length; ++i) {
          impacts[i] = impactsEnums[i].getImpacts();
          if (tmpLead == null || impacts[i].getDocIdUpTo(0) < tmpLead.getDocIdUpTo(0)) {
            tmpLead = impacts[i];
          }
        }
        final Impacts lead = tmpLead;
        return new Impacts() {
          @Override
          public int numLevels() {
            // Delegate to the lead
            return lead.numLevels();
          }
          @Override
          public int getDocIdUpTo(int level) {
            // Delegate to the lead
            return lead.getDocIdUpTo(level);
          }
          /**
           * Return the minimum level whose impacts are valid up to {@code docIdUpTo},
           * or {@code -1} if there is no such level.
           */
          private int getLevel(Impacts impacts, int docIdUpTo) {
            for (int level = 0, numLevels = impacts.numLevels(); level < numLevels; ++level) {
              if (impacts.getDocIdUpTo(level) >= docIdUpTo) {
                return level;
              }
            }
            return -1;
          }
          @Override
          public List<Impact> getImpacts(int level) {
            final int docIdUpTo = getDocIdUpTo(level);
            List<List<Impact>> toMerge = new ArrayList<>();
            for (int i = 0; i < impactsEnums.length; ++i) {
              if (impactsEnums[i].docID() <= docIdUpTo) {
                int impactsLevel = getLevel(impacts[i], docIdUpTo);
                if (impactsLevel == -1) {
                  // One instance doesn't have impacts that cover up to docIdUpTo
                  // Return impacts that trigger the maximum score
                  return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
                }
                toMerge.add(impacts[i].getImpacts(impactsLevel));
              }
            }
            assert toMerge.size() > 0; // otherwise it would mean the docID is > docIdUpTo, which is wrong
            if (toMerge.size() == 1) {
              // common if one synonym is common and the other one is rare
              return toMerge.get(0);
            }
            PriorityQueue<SubIterator> pq = new PriorityQueue<SubIterator>(impacts.length) {
              @Override
              protected boolean lessThan(SubIterator a, SubIterator b) {
                if (a.current == null) { // means iteration is finished
                  return false;
                }
                if (b.current == null) {
                  return true;
                }
                return Long.compareUnsigned(a.current.norm, b.current.norm) < 0;
              }
            };
            for (List<Impact> impacts : toMerge) {
              pq.add(new SubIterator(impacts.iterator()));
            }
            List<Impact> mergedImpacts = new ArrayList<>();
            // Idea: merge impacts by norm. The tricky thing is that we need to
            // consider norm values that are not in the impacts too. For
            // instance if the list of impacts is [{freq=2,norm=10}, {freq=4,norm=12}],
            // there might well be a document that has a freq of 2 and a length of 11,
            // which was just not added to the list of impacts because {freq=2,norm=10}
            // is more competitive. So the way it works is that we track the sum of
            // the term freqs that we have seen so far in order to account for these
            // implicit impacts.
            long sumTf = 0;
            SubIterator top = pq.top();
            do {
              final long norm = top.current.norm;
              do {
                sumTf += top.current.freq - top.previousFreq;
                top.next();
                top = pq.updateTop();
              } while (top.current != null && top.current.norm == norm);
              final int freqUpperBound = (int) Math.min(Integer.MAX_VALUE, sumTf);
              if (mergedImpacts.isEmpty()) {
                mergedImpacts.add(new Impact(freqUpperBound, norm));
              } else {
                Impact prevImpact = mergedImpacts.get(mergedImpacts.size() - 1);
                assert Long.compareUnsigned(prevImpact.norm, norm) < 0;
                if (freqUpperBound > prevImpact.freq) {
                  mergedImpacts.add(new Impact(freqUpperBound, norm));
                } // otherwise the previous impact is already more competitive
              }
            } while (top.current != null);
            return mergedImpacts;
          }
        };
      }
      @Override
      public void advanceShallow(int target) throws IOException {
        for (ImpactsEnum impactsEnum : impactsEnums) {
          if (impactsEnum.docID() < target) {
            impactsEnum.advanceShallow(target);
          }
        }
      }
    };
  }
  private static class SynonymScorer extends Scorer {
    private final DisiPriorityQueue queue;
    private final DocIdSetIterator iterator;
    private final ImpactsDISI impactsDisi;
    private final LeafSimScorer simScorer;
    SynonymScorer(Weight weight, DisiPriorityQueue queue, DocIdSetIterator iterator,
        ImpactsDISI impactsDisi, LeafSimScorer simScorer) {
      super(weight);
      this.queue = queue;
      this.iterator = iterator;
      this.impactsDisi = impactsDisi;
      this.simScorer = simScorer;
    }
    @Override
    public int docID() {
      return iterator.docID();
    }
    int freq() throws IOException {
      DisiWrapper w = queue.topList();
      int freq = ((PostingsEnum) w.iterator).freq();
      for (w = w.next; w != null; w = w.next) {
        freq += ((PostingsEnum) w.iterator).freq();
        if (freq < 0) { // overflow
          return Integer.MAX_VALUE;
        }
      }
      return freq;
    }
    @Override
    public float score() throws IOException {
      return simScorer.score(iterator.docID(), freq());
    }
    @Override
    public DocIdSetIterator iterator() {
      return iterator;
    }
    @Override
    public float getMaxScore(int upTo) throws IOException {
-      // TODO: merge impacts to get better score upper bounds
+      return impactsDisi.getMaxScore(upTo);
      return similarity.getSimScorer().score(Float.MAX_VALUE, 1L);
    }
-    /** combines TF of all subs. */
+    @Override
-    final int tf(DisiWrapper topList) throws IOException {
+    public int advanceShallow(int target) throws IOException {
-      int tf = 0;
+      return impactsDisi.advanceShallow(target);
      for (DisiWrapper w = topList; w != null; w = w.next) {
        tf += ((TermScorer)w.scorer).freq();
    }
-      return tf;
+
    @Override
    public void setMinCompetitiveScore(float minScore) {
      impactsDisi.setMinCompetitiveScore(minScore);
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
@ -111,7 +111,11 @@ public class TermQuery extends Query {
        return null;
      }
      LeafSimScorer scorer = new LeafSimScorer(simScorer, context.reader(), term.field(), scoreMode.needsScores());
-      return new TermScorer(this, termsEnum, scoreMode, scorer);
+      if (scoreMode == ScoreMode.TOP_SCORES) {
        return new TermScorer(this, termsEnum.impacts(PostingsEnum.FREQS), scorer);
      } else {
        return new TermScorer(this, termsEnum.postings(null, PostingsEnum.FREQS), scorer);
      }
    }
    @Override
--- a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
@ -19,11 +19,9 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import org.apache.lucene.index.Impacts;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SlowImpactsEnum;
 import org.apache.lucene.index.TermsEnum;
 /** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
 */
@ -32,101 +30,29 @@ final class TermScorer extends Scorer {
  private final ImpactsEnum impactsEnum;
  private final DocIdSetIterator iterator;
  private final LeafSimScorer docScorer;
-  private final MaxScoreCache maxScoreCache;
+  private final ImpactsDISI impactsDisi;
  private float minCompetitiveScore;
  /**
-   * Construct a <code>TermScorer</code>.
+   * Construct a {@link TermScorer} that will iterate all documents.
   *
   * @param weight
   *          The weight of the <code>Term</code> in the query.
   * @param te
   *          A {@link TermsEnum} positioned on the expected term.
   * @param docScorer
   *          A {@link LeafSimScorer} for the appropriate field.
   */
-  TermScorer(Weight weight, TermsEnum te, ScoreMode scoreMode, LeafSimScorer docScorer) throws IOException {
+  TermScorer(Weight weight, PostingsEnum postingsEnum, LeafSimScorer docScorer) {
    super(weight);
-    this.docScorer = docScorer;
+    iterator = this.postingsEnum = postingsEnum;
    if (scoreMode == ScoreMode.TOP_SCORES) {
      impactsEnum = te.impacts(PostingsEnum.FREQS);
      maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
      postingsEnum = impactsEnum;
      iterator = new DocIdSetIterator() {
        int upTo = -1;
        float maxScore;
        private int advanceTarget(int target) throws IOException {
          if (minCompetitiveScore == 0) {
            // no potential for skipping
            return target;
          }
          if (target > upTo) {
            impactsEnum.advanceShallow(target);
            Impacts impacts = impactsEnum.getImpacts();
            upTo = impacts.getDocIdUpTo(0);
            maxScore = maxScoreCache.getMaxScoreForLevel(0);
          }
          while (true) {
            assert upTo >= target;
            if (maxScore >= minCompetitiveScore) {
              return target;
            }
            if (upTo == NO_MORE_DOCS) {
              return NO_MORE_DOCS;
            }
            impactsEnum.advanceShallow(upTo + 1);
            Impacts impacts = impactsEnum.getImpacts();
            final int level = maxScoreCache.getSkipLevel(minCompetitiveScore);
            if (level >= 0) {
              // we can skip more docs
              int newUpTo = impacts.getDocIdUpTo(level);
              if (newUpTo == NO_MORE_DOCS) {
                return NO_MORE_DOCS;
              }
              target = newUpTo + 1;
              impactsEnum.advanceShallow(target);
              impacts = impactsEnum.getImpacts();
            } else {
              target = upTo + 1;
            }
            upTo = impacts.getDocIdUpTo(0);
            maxScore = maxScoreCache.getMaxScoreForLevel(0);
          }
        }
        @Override
        public int advance(int target) throws IOException {
          return impactsEnum.advance(advanceTarget(target));
        }
        @Override
        public int nextDoc() throws IOException {
          return advance(impactsEnum.docID() + 1);
        }
        @Override
        public int docID() {
          return impactsEnum.docID();
        }
        @Override
        public long cost() {
          return impactsEnum.cost();
        }
      };
    } else {
      postingsEnum = te.postings(null, scoreMode.needsScores() ? PostingsEnum.FREQS : PostingsEnum.NONE);
    impactsEnum = new SlowImpactsEnum(postingsEnum);
-      maxScoreCache = new MaxScoreCache(impactsEnum, docScorer.getSimScorer());
+    impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
-      iterator = postingsEnum;
+    this.docScorer = docScorer;
  }
  /**
   * Construct a {@link TermScorer} that will use impacts to skip blocks of
   * non-competitive documents.
   */
  TermScorer(Weight weight, ImpactsEnum impactsEnum, LeafSimScorer docScorer) {
    super(weight);
    postingsEnum = this.impactsEnum = impactsEnum;
    impactsDisi = new ImpactsDISI(impactsEnum, impactsEnum, docScorer.getSimScorer());
    iterator = impactsDisi;
    this.docScorer = docScorer;
  }
  @Override
@ -151,17 +77,17 @@ final class TermScorer extends Scorer {
  @Override
  public int advanceShallow(int target) throws IOException {
-    return maxScoreCache.advanceShallow(target);
+    return impactsDisi.advanceShallow(target);
  }
  @Override
  public float getMaxScore(int upTo) throws IOException {
-    return maxScoreCache.getMaxScore(upTo);
+    return impactsDisi.getMaxScore(upTo);
  }
  @Override
  public void setMinCompetitiveScore(float minScore) {
-    this.minCompetitiveScore = minScore;
+    impactsDisi.setMinCompetitiveScore(minScore);
  }
  /** Returns a string representation of this <code>TermScorer</code>. */
--- a/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSynonymQuery.java
@ -18,15 +18,27 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.Impact;
 import org.apache.lucene.index.Impacts;
 import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.ImpactsSource;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 public class TestSynonymQuery extends LuceneTestCase {
@ -54,6 +66,11 @@ public class TestSynonymQuery extends LuceneTestCase {
  }
  public void testScores() throws IOException {
    doTestScores(false);
    doTestScores(true);
  }
  private void doTestScores(boolean trackTotalHits) throws IOException {
    Directory dir = newDirectory();
    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
@ -71,8 +88,12 @@ public class TestSynonymQuery extends LuceneTestCase {
    IndexSearcher searcher = newSearcher(reader);
    SynonymQuery query = new SynonymQuery(new Term("f", "a"), new Term("f", "b"));
-    TopDocs topDocs = searcher.search(query, 20);
+    TopScoreDocCollector collector = TopScoreDocCollector.create(20, null, trackTotalHits);
    searcher.search(query, collector);
    TopDocs topDocs = collector.topDocs();
    if (trackTotalHits) {
      assertEquals(11, topDocs.totalHits);
    }
    // All docs must have the same score
    for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
      assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
@ -83,4 +104,206 @@ public class TestSynonymQuery extends LuceneTestCase {
    dir.close();
  }
  public void testMergeImpacts() throws IOException {
    DummyImpactsEnum impacts1 = new DummyImpactsEnum();
    impacts1.reset(42,
        new Impact[][] {
          new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) },
          new Impact[] { new Impact(5, 11), new Impact(8, 13),  new Impact(12, 14) }
        },
        new int[] {
            110,
            945
        });
    DummyImpactsEnum impacts2 = new DummyImpactsEnum();
    impacts2.reset(45,
        new Impact[][] {
          new Impact[] { new Impact(2, 10), new Impact(6, 13) },
          new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
        },
        new int[] {
            90,
            1000
        });
    ImpactsSource mergedImpacts = SynonymQuery.mergeImpacts(new ImpactsEnum[] { impacts1, impacts2 });
    assertEquals(
        new Impact[][] {
          new Impact[] { new Impact(5, 10), new Impact(7, 12), new Impact(14, 13) },
          new Impact[] { new Impact(Integer.MAX_VALUE, 1) }
        },
        new int[] {
            90,
            1000
        },
        mergedImpacts.getImpacts());
    // docID is > the first doIdUpTo of impacts1
    impacts2.reset(112,
        new Impact[][] {
          new Impact[] { new Impact(2, 10), new Impact(6, 13) },
          new Impact[] { new Impact(3, 9), new Impact(5, 11), new Impact(7, 13) }
        },
        new int[] {
            150,
            1000
        });
    assertEquals(
        new Impact[][] {
          new Impact[] { new Impact(3, 10), new Impact(5, 12), new Impact(8, 13) }, // same as impacts1
          new Impact[] { new Impact(3, 9), new Impact(10, 11), new Impact(15, 13), new Impact(19, 14) }
        },
        new int[] {
            110,
            945
        },
        mergedImpacts.getImpacts());
  }
  private static void assertEquals(Impact[][] impacts, int[] docIdUpTo, Impacts actual) {
    assertEquals(impacts.length, actual.numLevels());
    for (int i = 0; i < impacts.length; ++i) {
      assertEquals(docIdUpTo[i], actual.getDocIdUpTo(i));
      assertEquals(Arrays.asList(impacts[i]), actual.getImpacts(i));
    }
  }
  private static class DummyImpactsEnum extends ImpactsEnum {
    private int docID;
    private Impact[][] impacts;
    private int[] docIdUpTo;
    void reset(int docID, Impact[][] impacts, int[] docIdUpTo) {
      this.docID = docID;
      this.impacts = impacts;
      this.docIdUpTo = docIdUpTo;
    }
    @Override
    public void advanceShallow(int target) throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public Impacts getImpacts() throws IOException {
      return new Impacts() {
        @Override
        public int numLevels() {
          return impacts.length;
        }
        @Override
        public int getDocIdUpTo(int level) {
          return docIdUpTo[level];
        }
        @Override
        public List<Impact> getImpacts(int level) {
          return Arrays.asList(impacts[level]);
        }
      };
    }
    @Override
    public int freq() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int nextPosition() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int startOffset() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int endOffset() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public BytesRef getPayload() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int docID() {
      return docID;
    }
    @Override
    public int nextDoc() throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public int advance(int target) throws IOException {
      throw new UnsupportedOperationException();
    }
    @Override
    public long cost() {
      throw new UnsupportedOperationException();
    }
  }
  public void testRandomTopDocs() throws IOException {
    Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
    int numDocs = atLeast(128 * 8 * 8 * 3); // make sure some terms have skip data
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      int numValues = random().nextInt(1 << random().nextInt(5));
      int start = random().nextInt(10);
      for (int j = 0; j < numValues; ++j) {
        int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
        for (int k = 0; k < freq; ++k) {
          doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
        }
      }
      w.addDocument(doc);
    }
    IndexReader reader = DirectoryReader.open(w);
    w.close();
    IndexSearcher searcher = newSearcher(reader);
    for (int term1 = 0; term1 < 15; ++term1) {
      int term2;
      do {
        term2 = random().nextInt(15);
      } while (term1 == term2);
      Query query = new SynonymQuery(
          new Term[] {
              new Term("foo", Integer.toString(term1)),
              new Term("foo", Integer.toString(term2))});
      TopScoreDocCollector collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
      TopScoreDocCollector collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
      searcher.search(query, collector1);
      searcher.search(query, collector2);
      CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
      int filterTerm = random().nextInt(15);
      Query filteredQuery = new BooleanQuery.Builder()
          .add(query, Occur.MUST)
          .add(new TermQuery(new Term("foo", Integer.toString(filterTerm))), Occur.FILTER)
          .build();
      collector1 = TopScoreDocCollector.create(10, null, true); // COMPLETE
      collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
      searcher.search(filteredQuery, collector1);
      searcher.search(filteredQuery, collector2);
      CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
    }
    reader.close();
    dir.close();
  }
 }
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermScorer.java
@ -24,7 +24,7 @@ import java.util.List;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FilterLeafReader;
@ -39,6 +39,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 public class TestTermScorer extends LuceneTestCase {
  protected Directory directory;
@ -218,7 +219,10 @@ public class TestTermScorer extends LuceneTestCase {
      int numValues = random().nextInt(1 << random().nextInt(5));
      int start = random().nextInt(10);
      for (int j = 0; j < numValues; ++j) {
-        doc.add(new StringField("foo", Integer.toString(start + j), Store.NO));
+        int freq = TestUtil.nextInt(random(), 1, 1 << random().nextInt(3));
        for (int k = 0; k < freq; ++k) {
          doc.add(new TextField("foo", Integer.toString(start + j), Store.NO));
        }
      }
      w.addDocument(doc);
    }
@ -234,7 +238,7 @@ public class TestTermScorer extends LuceneTestCase {
      searcher.search(query, collector1);
      searcher.search(query, collector2);
-      assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+      CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
      int filterTerm = random().nextInt(15);
      Query filteredQuery = new BooleanQuery.Builder()
@ -246,19 +250,10 @@ public class TestTermScorer extends LuceneTestCase {
      collector2 = TopScoreDocCollector.create(10, null, false); // TOP_SCORES
      searcher.search(filteredQuery, collector1);
      searcher.search(filteredQuery, collector2);
-      assertTopDocsEquals(collector1.topDocs(), collector2.topDocs());
+      CheckHits.checkEqual(query, collector1.topDocs().scoreDocs, collector2.topDocs().scoreDocs);
    }
    reader.close();
    dir.close();
  }
  private static void assertTopDocsEquals(TopDocs td1, TopDocs td2) {
    assertEquals(td1.scoreDocs.length, td2.scoreDocs.length);
    for (int i = 0; i < td1.scoreDocs.length; ++i) {
      ScoreDoc sd1 = td1.scoreDocs[i];
      ScoreDoc sd2 = td2.scoreDocs[i];
      assertEquals(sd1.doc, sd2.doc);
      assertEquals(sd1.score, sd2.score, 0f);
    }
  }
 }