Speedup sandbox/DocValuesTermsQuery (#12129)

* Optimize the common case that docs only have single values for the field * In the multivalued case, terminate reading docvalues if they are > maximum set ordinal * Implement ScorerSupplier, so that (potentially large) number of ordinal lookups aren't performed just to get the cost() * Graduate to Sorted(Set)DocValuesField.newSlowSetQuery to complement newSlowRangeQuery, newSlowExactQuery Like other slow queries in these classes, it's currently only recommended to use with points, e.g. IndexOrDocValuesQuery(new PointInSetQuery, newSlowSetQuery)
2023-02-06 12:47:53 -05:00 · 2023-02-06 12:47:53 -05:00 · 0bc4135695
parent 10d9c7440b
commit 0bc4135695
7 changed files with 312 additions and 339 deletions
--- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@ -88,4 +88,16 @@ public class SortedDocValuesField extends Field {
  public static Query newSlowExactQuery(String field, BytesRef value) {
    return newSlowRangeQuery(field, value, value, true, true);
  }
+
+  /**
+   * Create a query matching any of the specified values.
+   *
+   * <p><b>NOTE</b>: Such queries cannot efficiently advance to the next match, which makes them
+   * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped
+   * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on points, such as
+   * {@link BinaryPoint#newSetQuery}.
+   */
+  public static Query newSlowSetQuery(String field, BytesRef... values) {
+    return new SortedSetDocValuesSetQuery(field, values.clone());
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
@ -90,4 +90,18 @@ public class SortedSetDocValuesField extends Field {
  public static Query newSlowExactQuery(String field, BytesRef value) {
    return newSlowRangeQuery(field, value, value, true, true);
  }
+
+  /**
+   * Create a query matching any of the specified values.
+   *
+   * <p>This query also works with fields that have indexed {@link SortedDocValuesField}s.
+   *
+   * <p><b>NOTE</b>: Such queries cannot efficiently advance to the next match, which makes them
+   * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped
+   * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on points, such as
+   * {@link BinaryPoint#newSetQuery}.
+   */
+  public static Query newSlowSetQuery(String field, BytesRef... values) {
+    return new SortedSetDocValuesSetQuery(field, values.clone());
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesSetQuery.java
@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.Objects;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PrefixCodedTerms;
+import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.ScorerSupplier;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/** Similar to SortedSetDocValuesRangeQuery but for a set */
+final class SortedSetDocValuesSetQuery extends Query implements Accountable {
+  private static final long BASE_RAM_BYTES =
+      RamUsageEstimator.shallowSizeOfInstance(SortedSetDocValuesSetQuery.class);
+
+  private final String field;
+  private final PrefixCodedTerms termData;
+  private final int termDataHashCode; // cached hashcode of termData
+
+  SortedSetDocValuesSetQuery(String field, BytesRef terms[]) {
+    this.field = Objects.requireNonNull(field);
+    Objects.requireNonNull(terms);
+    ArrayUtil.timSort(terms);
+    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+    BytesRef previous = null;
+    for (BytesRef term : terms) {
+      if (term.equals(previous) == false) {
+        builder.add(field, term);
+      }
+      previous = term;
+    }
+    termData = builder.finish();
+    termDataHashCode = termData.hashCode();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return sameClassAs(other) && equalsTo(getClass().cast(other));
+  }
+
+  private boolean equalsTo(SortedSetDocValuesSetQuery other) {
+    // termData might be heavy to compare so check the hash code first
+    return termDataHashCode == other.termDataHashCode && termData.equals(other.termData);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(classHash(), termDataHashCode);
+  }
+
+  @Override
+  public String toString(String defaultField) {
+    StringBuilder builder = new StringBuilder();
+    boolean first = true;
+    TermIterator iterator = termData.iterator();
+    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+      if (!first) {
+        builder.append(' ');
+      }
+      first = false;
+      builder.append(new Term(iterator.field(), term).toString());
+    }
+
+    return builder.toString();
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return BASE_RAM_BYTES
+        + RamUsageEstimator.sizeOfObject(field)
+        + RamUsageEstimator.sizeOfObject(termData);
+  }
+
+  @Override
+  public void visit(QueryVisitor visitor) {
+    if (visitor.acceptField(field)) {
+      visitor.visitLeaf(this);
+    }
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
+      throws IOException {
+    return new ConstantScoreWeight(this, boost) {
+
+      @Override
+      public Scorer scorer(LeafReaderContext context) throws IOException {
+        ScorerSupplier scorerSupplier = scorerSupplier(context);
+        if (scorerSupplier == null) {
+          return null;
+        }
+        return scorerSupplier.get(Long.MAX_VALUE);
+      }
+
+      @Override
+      public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
+        final Weight weight = this;
+        if (context.reader().getFieldInfos().fieldInfo(field) == null) {
+          return null;
+        }
+        final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
+
+        // implement ScorerSupplier, since we do some expensive stuff to make a scorer
+        return new ScorerSupplier() {
+          @Override
+          public Scorer get(long leadCost) throws IOException {
+            final LongBitSet bits = new LongBitSet(values.getValueCount());
+            long maxOrd = -1;
+            TermIterator termIterator = termData.iterator();
+            for (BytesRef term = termIterator.next(); term != null; term = termIterator.next()) {
+              final long ord = values.lookupTerm(term);
+              if (ord >= 0) {
+                maxOrd = ord;
+                bits.set(ord);
+              }
+            }
+            // no terms matched in this segment
+            if (maxOrd < 0) {
+              return new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.empty());
+            }
+            final SortedDocValues singleton = DocValues.unwrapSingleton(values);
+            final TwoPhaseIterator iterator;
+            final long max = maxOrd;
+            if (singleton != null) {
+              iterator =
+                  new TwoPhaseIterator(singleton) {
+                    @Override
+                    public boolean matches() throws IOException {
+                      return bits.get(singleton.ordValue());
+                    }
+
+                    @Override
+                    public float matchCost() {
+                      return 3; // lookup in a bitset
+                    }
+                  };
+            } else {
+              iterator =
+                  new TwoPhaseIterator(values) {
+                    @Override
+                    public boolean matches() throws IOException {
+                      for (int i = 0; i < values.docValueCount(); i++) {
+                        long value = values.nextOrd();
+                        if (value > max) {
+                          return false; // values are sorted, terminate
+                        } else if (bits.get(value)) {
+                          return true;
+                        }
+                      }
+                      return false;
+                    }
+
+                    @Override
+                    public float matchCost() {
+                      return 3; // lookup in a bitset
+                    }
+                  };
+            }
+            return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
+          }
+
+          @Override
+          public long cost() {
+            return values.cost();
+          }
+        };
+      }
+
+      @Override
+      public boolean isCacheable(LeafReaderContext ctx) {
+        return DocValues.isCacheable(ctx, field);
+      }
+    };
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
@ -200,6 +200,6 @@ public class PrefixCodedTerms implements Accountable {
    }

    PrefixCodedTerms other = (PrefixCodedTerms) obj;
-    return delGen == other.delGen && this.content.equals(other.content);
+    return delGen == other.delGen && size() == other.size() && this.content.equals(other.content);
  }
 }
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestDocValuesTermsQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestDocValuesTermsQuery.java
@ -14,15 +14,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.lucene.sandbox.search;
+package org.apache.lucene.document;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field.Store;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
@ -31,6 +27,7 @@ import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@ -41,18 +38,74 @@ import org.apache.lucene.tests.util.TestUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;

-public class TestDocValuesTermsQuery extends LuceneTestCase {
+public class TestSortedSetDocValuesSetQuery extends LuceneTestCase {
+
+  public void testMissingTerms() throws Exception {
+    String fieldName = "field1";
+    Directory rd = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), rd);
+    for (int i = 0; i < 100; i++) {
+      Document doc = new Document();
+      int term = i * 10; // terms are units of 10;
+      doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
+      doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
+      w.addDocument(doc);
+    }
+    IndexReader reader = w.getReader();
+    w.close();
+
+    IndexSearcher searcher = newSearcher(reader);
+    int numDocs = reader.numDocs();
+    ScoreDoc[] results;
+
+    List<BytesRef> terms = new ArrayList<>();
+    terms.add(new BytesRef("5"));
+    results =
+        searcher.search(
+                SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
+                numDocs)
+            .scoreDocs;
+    assertEquals("Must match nothing", 0, results.length);
+
+    terms = new ArrayList<>();
+    terms.add(new BytesRef("10"));
+    results =
+        searcher.search(
+                SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
+                numDocs)
+            .scoreDocs;
+    assertEquals("Must match 1", 1, results.length);
+
+    terms = new ArrayList<>();
+    terms.add(new BytesRef("10"));
+    terms.add(new BytesRef("20"));
+    results =
+        searcher.search(
+                SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
+                numDocs)
+            .scoreDocs;
+    assertEquals("Must match 2", 2, results.length);
+
+    reader.close();
+    rd.close();
+  }

  public void testEquals() {
-    assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
    assertEquals(
-        new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar", "bar"));
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")),
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")));
    assertEquals(
-        new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")),
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("bar")));
+    assertEquals(
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("baz")),
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz"), new BytesRef("bar")));
    assertFalse(
-        new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"))
+            .equals(SortedDocValuesField.newSlowSetQuery("foo2", new BytesRef("bar"))));
    assertFalse(
-        new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));
+        SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"))
+            .equals(SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz"))));
  }

  public void testDuelTermsQuery() throws IOException {
@ -70,7 +123,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
      for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final Term term = allTerms.get(random().nextInt(allTerms.size()));
-        doc.add(new StringField(term.field(), term.text(), Store.NO));
+        doc.add(new StringField(term.field(), term.text(), Field.Store.NO));
        doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
        iw.addDocument(doc);
      }
@ -101,12 +154,14 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
          bq.add(new TermQuery(term), Occur.SHOULD);
        }
        Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
-        List<String> bytesTerms = new ArrayList<>();
+        List<BytesRef> bytesTerms = new ArrayList<>();
        for (Term term : queryTerms) {
-          bytesTerms.add(term.text());
+          bytesTerms.add(term.bytes());
        }
        final Query q2 =
-            new BoostQuery(new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0])), boost);
+            new BoostQuery(
+                SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])),
+                boost);
        assertSameMatches(searcher, q1, q2, true);
      }

@ -130,7 +185,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
      for (int i = 0; i < numDocs; ++i) {
        Document doc = new Document();
        final Term term = allTerms.get(random().nextInt(allTerms.size()));
-        doc.add(new StringField(term.field(), term.text(), Store.NO));
+        doc.add(new StringField(term.field(), term.text(), Field.Store.NO));
        doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
        iw.addDocument(doc);
      }
@ -161,12 +216,14 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
          bq.add(new TermQuery(term), Occur.SHOULD);
        }
        Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
-        List<String> bytesTerms = new ArrayList<>();
+        List<BytesRef> bytesTerms = new ArrayList<>();
        for (Term term : queryTerms) {
-          bytesTerms.add(term.text());
+          bytesTerms.add(term.bytes());
        }
        final Query q2 =
-            new BoostQuery(new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0])), boost);
+            new BoostQuery(
+                SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])),
+                boost);

        BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
        bq1.add(q1, Occur.MUST);
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/DocValuesTermsQuery.java
@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.sandbox.search;
-
-import java.io.IOException;
-import java.util.AbstractList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Objects;
-import org.apache.lucene.index.DocValues;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.index.PrefixCodedTerms;
-import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
-import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.ConstantScoreScorer;
-import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.QueryVisitor;
-import org.apache.lucene.search.ScoreMode;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TwoPhaseIterator;
-import org.apache.lucene.search.Weight;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.FixedBitSet;
-import org.apache.lucene.util.LongBitSet;
-import org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * A {@link Query} that only accepts documents whose term value in the specified field is contained
- * in the provided set of allowed terms.
- *
- * <p>This is the same functionality as TermsQuery (from queries/), but because of drastically
- * different implementations, they also have different performance characteristics, as described
- * below.
- *
- * <p><b>NOTE</b>: be very careful using this query: it is typically much slower than using {@code
- * TermsQuery}, but in certain specialized cases may be faster.
- *
- * <p>With each search, this query translates the specified set of Terms into a private {@link
- * LongBitSet} keyed by term number per unique {@link IndexReader} (normally one reader per
- * segment). Then, during matching, the term number for each docID is retrieved from the cache and
- * then checked for inclusion using the {@link LongBitSet}. Since all testing is done using RAM
- * resident data structures, performance should be very fast, most likely fast enough to not require
- * further caching of the DocIdSet for each possible combination of terms. However, because docIDs
- * are simply scanned linearly, an index with a great many small documents may find this linear scan
- * too costly.
- *
- * <p>In contrast, TermsQuery builds up an {@link FixedBitSet}, keyed by docID, every time it's
- * created, by enumerating through all matching docs using {@link
- * org.apache.lucene.index.PostingsEnum} to seek and scan through each term's docID list. While
- * there is no linear scan of all docIDs, besides the allocation of the underlying array in the
- * {@link FixedBitSet}, this approach requires a number of "disk seeks" in proportion to the number
- * of terms, which can be exceptionally costly when there are cache misses in the OS's IO cache.
- *
- * <p>Generally, this filter will be slower on the first invocation for a given field, but
- * subsequent invocations, even if you change the allowed set of Terms, should be faster than
- * TermsQuery, especially as the number of Terms being matched increases. If you are matching only a
- * very small number of terms, and those terms in turn match a very small number of documents,
- * TermsQuery may perform faster.
- *
- * <p>Which query is best is very application dependent.
- *
- * @lucene.experimental
- */
-public class DocValuesTermsQuery extends Query implements Accountable {
-  private static final long BASE_RAM_BYTES =
-      RamUsageEstimator.shallowSizeOfInstance(DocValuesTermsQuery.class);
-
-  private final String field;
-  private final PrefixCodedTerms termData;
-  private final int termDataHashCode; // cached hashcode of termData
-
-  public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
-    this.field = Objects.requireNonNull(field);
-    Objects.requireNonNull(terms, "Collection of terms must not be null");
-    BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
-    ArrayUtil.timSort(sortedTerms);
-    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
-    BytesRef previous = null;
-    for (BytesRef term : sortedTerms) {
-      if (term.equals(previous) == false) {
-        builder.add(field, term);
-      }
-      previous = term;
-    }
-    termData = builder.finish();
-    termDataHashCode = termData.hashCode();
-  }
-
-  public DocValuesTermsQuery(String field, BytesRef... terms) {
-    this(field, Arrays.asList(terms));
-  }
-
-  public DocValuesTermsQuery(String field, String... terms) {
-    this(
-        field,
-        new AbstractList<BytesRef>() {
-          @Override
-          public BytesRef get(int index) {
-            return new BytesRef(terms[index]);
-          }
-
-          @Override
-          public int size() {
-            return terms.length;
-          }
-        });
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    return sameClassAs(other) && equalsTo(getClass().cast(other));
-  }
-
-  private boolean equalsTo(DocValuesTermsQuery other) {
-    // termData might be heavy to compare so check the hash code first
-    return termDataHashCode == other.termDataHashCode && termData.equals(other.termData);
-  }
-
-  @Override
-  public int hashCode() {
-    return 31 * classHash() + termDataHashCode;
-  }
-
-  @Override
-  public String toString(String defaultField) {
-    StringBuilder builder = new StringBuilder();
-    boolean first = true;
-    TermIterator iterator = termData.iterator();
-    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
-      if (!first) {
-        builder.append(' ');
-      }
-      first = false;
-      builder.append(new Term(iterator.field(), term).toString());
-    }
-
-    return builder.toString();
-  }
-
-  /**
-   * @return the name of the field searched by this query.
-   */
-  public String getField() {
-    return field;
-  }
-
-  /**
-   * @return the terms looked up by this query, prefix-encoded.
-   */
-  public PrefixCodedTerms getTerms() {
-    return termData;
-  }
-
-  @Override
-  public long ramBytesUsed() {
-    return BASE_RAM_BYTES
-        + RamUsageEstimator.sizeOfObject(field)
-        + RamUsageEstimator.sizeOfObject(termData);
-  }
-
-  @Override
-  public void visit(QueryVisitor visitor) {
-    if (visitor.acceptField(field)) {
-      visitor.visitLeaf(this);
-    }
-  }
-
-  @Override
-  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
-      throws IOException {
-    return new ConstantScoreWeight(this, boost) {
-
-      @Override
-      public Scorer scorer(LeafReaderContext context) throws IOException {
-        final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
-        final LongBitSet bits = new LongBitSet(values.getValueCount());
-        boolean matchesAtLeastOneTerm = false;
-        TermIterator iterator = termData.iterator();
-        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
-          final long ord = values.lookupTerm(term);
-          if (ord >= 0) {
-            matchesAtLeastOneTerm = true;
-            bits.set(ord);
-          }
-        }
-        if (matchesAtLeastOneTerm == false) {
-          return null;
-        }
-        return new ConstantScoreScorer(
-            this,
-            score(),
-            scoreMode,
-            new TwoPhaseIterator(values) {
-
-              @Override
-              public boolean matches() throws IOException {
-                for (int i = 0; i < values.docValueCount(); i++) {
-                  if (bits.get(values.nextOrd())) {
-                    return true;
-                  }
-                }
-                return false;
-              }
-
-              @Override
-              public float matchCost() {
-                return 3; // lookup in a bitset
-              }
-            });
-      }
-
-      @Override
-      public boolean isCacheable(LeafReaderContext ctx) {
-        return DocValues.isCacheable(ctx, field);
-      }
-    };
-  }
-}
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestFieldCacheTermsFilter.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/search/TestFieldCacheTermsFilter.java
@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.sandbox.search;
-
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.tests.index.RandomIndexWriter;
-import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * A basic unit test for FieldCacheTermsFilter
- *
- * @see DocValuesTermsQuery
- */
-public class TestFieldCacheTermsFilter extends LuceneTestCase {
-  public void testMissingTerms() throws Exception {
-    String fieldName = "field1";
-    Directory rd = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), rd);
-    for (int i = 0; i < 100; i++) {
-      Document doc = new Document();
-      int term = i * 10; // terms are units of 10;
-      doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
-      doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
-      w.addDocument(doc);
-    }
-    IndexReader reader = w.getReader();
-    w.close();
-
-    IndexSearcher searcher = newSearcher(reader);
-    int numDocs = reader.numDocs();
-    ScoreDoc[] results;
-
-    List<String> terms = new ArrayList<>();
-    terms.add("5");
-    results =
-        searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
-            .scoreDocs;
-    assertEquals("Must match nothing", 0, results.length);
-
-    terms = new ArrayList<>();
-    terms.add("10");
-    results =
-        searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
-            .scoreDocs;
-    assertEquals("Must match 1", 1, results.length);
-
-    terms = new ArrayList<>();
-    terms.add("10");
-    terms.add("20");
-    results =
-        searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
-            .scoreDocs;
-    assertEquals("Must match 2", 2, results.length);
-
-    reader.close();
-    rd.close();
-  }
-}