From 5852a0fb8cadc70cfd5db52613e423a499abee37 Mon Sep 17 00:00:00 2001 From: Jakub Slowinski <32519034+slow-J@users.noreply.github.com> Date: Tue, 5 Dec 2023 22:35:37 +0000 Subject: [PATCH] Mark TermInSetQuery ctors with varargs terms as deprecated (#12864) --- lucene/CHANGES.txt | 3 +- .../apache/lucene/document/KeywordField.java | 19 ++++++ .../lucene/document/SortedDocValuesField.java | 16 +++++ .../document/SortedSetDocValuesField.java | 18 +++++ .../apache/lucene/search/TermInSetQuery.java | 11 +++- .../lucene/document/TestKeywordField.java | 10 +-- .../TestSortedSetDocValuesSetQuery.java | 65 ++++++++++--------- .../lucene/search/TestTermInSetQuery.java | 31 +++++---- .../MultipassTermFilteredPresearcher.java | 3 +- .../monitor/TestMultipassPresearcher.java | 17 +++-- .../lucene/monitor/TestTermPresearcher.java | 3 +- 11 files changed, 140 insertions(+), 56 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 78745e0fd94..9cf0002bd1d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -7,7 +7,8 @@ http://s.apache.org/luceneversions API Changes --------------------- -(No changes) +* GITHUB#12243: Mark TermInSetQuery ctors with varargs terms as @Deprecated. SortedSetDocValuesField#newSlowSetQuery, + SortedDocValuesField#newSlowSetQuery, KeywordField#newSetQuery now take a collection of terms as a param. (Jakub Slowinski) New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java index 654c2dab9c4..ba4ef0be58e 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.document; +import java.util.Collection; import java.util.Objects; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -170,7 +171,9 @@ public class KeywordField extends Field { * @param values the set of values to match * @throws NullPointerException if {@code field} is null. * @return a query matching documents with this exact value + * @deprecated Use {@link #newSetQuery(String, Collection)} instead. */ + @Deprecated(forRemoval = true, since = "9.10") public static Query newSetQuery(String field, BytesRef... values) { Objects.requireNonNull(field, "field must not be null"); Objects.requireNonNull(values, "values must not be null"); @@ -179,6 +182,22 @@ public class KeywordField extends Field { return new IndexOrDocValuesQuery(indexQuery, dvQuery); } + /** + * Create a query for matching any of a set of provided {@link BytesRef} values. + * + * @param field field name. must not be {@code null}. + * @param values the set of values to match + * @throws NullPointerException if {@code field} is null. + * @return a query matching documents with this exact value + */ + public static Query newSetQuery(String field, Collection values) { + Objects.requireNonNull(field, "field must not be null"); + Objects.requireNonNull(values, "values must not be null"); + Query indexQuery = new TermInSetQuery(field, values); + Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + /** * Create a new {@link SortField} for {@link BytesRef} values. * diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java index e9e1b26521d..f0a7e555b9f 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.document; +import java.util.Collection; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; @@ -98,8 +99,23 @@ public class SortedDocValuesField extends Field { * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as * {@link TermInSetQuery}. + * + * @deprecated Use {@link #newSlowSetQuery(String, Collection)} instead. */ + @Deprecated(forRemoval = true, since = "9.10") public static Query newSlowSetQuery(String field, BytesRef... values) { return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values); } + + /** + * Create a query matching any of the specified values. + * + *

NOTE: Such queries cannot efficiently advance to the next match, which makes them + * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped + * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as + * {@link TermInSetQuery}. + */ + public static Query newSlowSetQuery(String field, Collection values) { + return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values); + } } diff --git a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java index 14eaffe842f..340a0d966fe 100644 --- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java +++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java @@ -16,6 +16,7 @@ */ package org.apache.lucene.document; +import java.util.Collection; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; @@ -102,8 +103,25 @@ public class SortedSetDocValuesField extends Field { * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as * {@link TermInSetQuery}. + * + * @deprecated Use {@link #newSlowSetQuery(String, Collection)} instead. */ + @Deprecated(forRemoval = true, since = "9.10") public static Query newSlowSetQuery(String field, BytesRef... values) { return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values); } + + /** + * Create a query matching any of the specified values. + * + *

This query also works with fields that have indexed {@link SortedDocValuesField}s. + * + *

NOTE: Such queries cannot efficiently advance to the next match, which makes them + * slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped + * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as + * {@link TermInSetQuery}. + */ + public static Query newSlowSetQuery(String field, Collection values) { + return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values); + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index 51d44f254a8..f81131bfaa0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -84,6 +84,10 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable { this(field, packTerms(field, terms)); } + /** + * @deprecated Use {@link #TermInSetQuery(String, Collection)} instead. + */ + @Deprecated(since = "9.10") public TermInSetQuery(String field, BytesRef... terms) { this(field, packTerms(field, Arrays.asList(terms))); } @@ -96,7 +100,12 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable { termDataHashCode = termData.hashCode(); } - /** Creates a new {@link TermInSetQuery} from the given array of terms. */ + /** + * Creates a new {@link TermInSetQuery} from the given array of terms. + * + * @deprecated Use {@link #TermInSetQuery(RewriteMethod, String, Collection)} instead. + */ + @Deprecated(since = "9.10") public TermInSetQuery(RewriteMethod rewriteMethod, String field, BytesRef... terms) { this(rewriteMethod, field, Arrays.asList(terms)); } diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java index 9654a04aa24..8e577fcaff3 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java @@ -17,7 +17,9 @@ package org.apache.lucene.document; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; +import java.util.List; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -126,15 +128,15 @@ public class TestKeywordField extends LuceneTestCase { } public void testValueClone() { - BytesRef[] values = new BytesRef[100]; + List values = new ArrayList<>(100); for (int i = 0; i < 100; i++) { String s = TestUtil.randomSimpleString(random(), 10, 20); - values[i] = new BytesRef(s); + values.add(new BytesRef(s)); } // Make sure we don't modify the input values array. - BytesRef[] expected = values.clone(); + List expected = new ArrayList<>(values); KeywordField.newSetQuery("f", values); - assertArrayEquals(expected, values); + assertEquals(expected, values); } } diff --git a/lucene/core/src/test/org/apache/lucene/document/TestSortedSetDocValuesSetQuery.java b/lucene/core/src/test/org/apache/lucene/document/TestSortedSetDocValuesSetQuery.java index 4b3d2ad994c..59a7ffbb135 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestSortedSetDocValuesSetQuery.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestSortedSetDocValuesSetQuery.java @@ -61,29 +61,20 @@ public class TestSortedSetDocValuesSetQuery extends LuceneTestCase { List terms = new ArrayList<>(); terms.add(new BytesRef("5")); results = - searcher.search( - SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])), - numDocs) - .scoreDocs; + searcher.search(SortedDocValuesField.newSlowSetQuery(fieldName, terms), numDocs).scoreDocs; assertEquals("Must match nothing", 0, results.length); terms = new ArrayList<>(); terms.add(new BytesRef("10")); results = - searcher.search( - SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])), - numDocs) - .scoreDocs; + searcher.search(SortedDocValuesField.newSlowSetQuery(fieldName, terms), numDocs).scoreDocs; assertEquals("Must match 1", 1, results.length); terms = new ArrayList<>(); terms.add(new BytesRef("10")); terms.add(new BytesRef("20")); results = - searcher.search( - SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])), - numDocs) - .scoreDocs; + searcher.search(SortedDocValuesField.newSlowSetQuery(fieldName, terms), numDocs).scoreDocs; assertEquals("Must match 2", 2, results.length); reader.close(); @@ -91,21 +82,39 @@ public class TestSortedSetDocValuesSetQuery extends LuceneTestCase { } public void testEquals() { + List bar = new ArrayList<>(); + bar.add(new BytesRef("bar")); + + List barbar = new ArrayList<>(); + barbar.add(new BytesRef("bar")); + barbar.add(new BytesRef("bar")); + + List barbaz = new ArrayList<>(); + barbaz.add(new BytesRef("bar")); + barbaz.add(new BytesRef("baz")); + + List bazbar = new ArrayList<>(); + bazbar.add(new BytesRef("baz")); + bazbar.add(new BytesRef("bar")); + + List baz = new ArrayList<>(); + baz.add(new BytesRef("baz")); + assertEquals( - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")), - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"))); + SortedDocValuesField.newSlowSetQuery("foo", bar), + SortedDocValuesField.newSlowSetQuery("foo", bar)); assertEquals( - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")), - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("bar"))); + SortedDocValuesField.newSlowSetQuery("foo", bar), + SortedDocValuesField.newSlowSetQuery("foo", barbar)); assertEquals( - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("baz")), - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz"), new BytesRef("bar"))); - assertFalse( - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")) - .equals(SortedDocValuesField.newSlowSetQuery("foo2", new BytesRef("bar")))); - assertFalse( - SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")) - .equals(SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz")))); + SortedDocValuesField.newSlowSetQuery("foo", barbaz), + SortedDocValuesField.newSlowSetQuery("foo", bazbar)); + assertNotEquals( + SortedDocValuesField.newSlowSetQuery("foo", bar), + SortedDocValuesField.newSlowSetQuery("foo2", bar)); + assertNotEquals( + SortedDocValuesField.newSlowSetQuery("foo", bar), + SortedDocValuesField.newSlowSetQuery("foo", baz)); } public void testDuelTermsQuery() throws IOException { @@ -159,9 +168,7 @@ public class TestSortedSetDocValuesSetQuery extends LuceneTestCase { bytesTerms.add(term.bytes()); } final Query q2 = - new BoostQuery( - SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])), - boost); + new BoostQuery(SortedDocValuesField.newSlowSetQuery("f", bytesTerms), boost); assertSameMatches(searcher, q1, q2, true); } @@ -221,9 +228,7 @@ public class TestSortedSetDocValuesSetQuery extends LuceneTestCase { bytesTerms.add(term.bytes()); } final Query q2 = - new BoostQuery( - SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])), - boost); + new BoostQuery(SortedDocValuesField.newSlowSetQuery("f", bytesTerms), boost); BooleanQuery.Builder bq1 = new BooleanQuery.Builder(); bq1.add(q1, Occur.MUST); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java index a62d7f8fc4d..091c94ad038 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java @@ -193,40 +193,42 @@ public class TestTermInSetQuery extends LuceneTestCase { } } - TermInSetQuery tq1 = new TermInSetQuery("thing", newBytesRef("apple")); - TermInSetQuery tq2 = new TermInSetQuery("thing", newBytesRef("orange")); + TermInSetQuery tq1 = new TermInSetQuery("thing", List.of(newBytesRef("apple"))); + TermInSetQuery tq2 = new TermInSetQuery("thing", List.of(newBytesRef("orange"))); assertFalse(tq1.hashCode() == tq2.hashCode()); // different fields with the same term should have differing hashcodes - tq1 = new TermInSetQuery("thing", newBytesRef("apple")); - tq2 = new TermInSetQuery("thing2", newBytesRef("apple")); + tq1 = new TermInSetQuery("thing", List.of(newBytesRef("apple"))); + tq2 = new TermInSetQuery("thing2", List.of(newBytesRef("apple"))); assertFalse(tq1.hashCode() == tq2.hashCode()); } public void testSimpleEquals() { // Two terms with the same hash code assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode()); - TermInSetQuery left = new TermInSetQuery("id", newBytesRef("AaAaAa"), newBytesRef("AaAaBB")); - TermInSetQuery right = new TermInSetQuery("id", newBytesRef("AaAaAa"), newBytesRef("BBBBBB")); + TermInSetQuery left = + new TermInSetQuery("id", List.of(newBytesRef("AaAaAa"), newBytesRef("AaAaBB"))); + TermInSetQuery right = + new TermInSetQuery("id", List.of(newBytesRef("AaAaAa"), newBytesRef("BBBBBB"))); assertFalse(left.equals(right)); } public void testToString() { TermInSetQuery termsQuery = - new TermInSetQuery("field1", newBytesRef("a"), newBytesRef("b"), newBytesRef("c")); + new TermInSetQuery("field1", List.of(newBytesRef("a"), newBytesRef("b"), newBytesRef("c"))); assertEquals("field1:(a b c)", termsQuery.toString()); } public void testDedup() { - Query query1 = new TermInSetQuery("foo", newBytesRef("bar")); - Query query2 = new TermInSetQuery("foo", newBytesRef("bar"), newBytesRef("bar")); + Query query1 = new TermInSetQuery("foo", List.of(newBytesRef("bar"))); + Query query2 = new TermInSetQuery("foo", List.of(newBytesRef("bar"), newBytesRef("bar"))); QueryUtils.checkEqual(query1, query2); } public void testOrderDoesNotMatter() { // order of terms if different - Query query1 = new TermInSetQuery("foo", newBytesRef("bar"), newBytesRef("baz")); - Query query2 = new TermInSetQuery("foo", newBytesRef("baz"), newBytesRef("bar")); + Query query1 = new TermInSetQuery("foo", List.of(newBytesRef("bar"), newBytesRef("baz"))); + Query query2 = new TermInSetQuery("foo", List.of(newBytesRef("baz"), newBytesRef("bar"))); QueryUtils.checkEqual(query1, query2); } @@ -346,12 +348,13 @@ public class TestTermInSetQuery extends LuceneTestCase { public void testBinaryToString() { TermInSetQuery query = - new TermInSetQuery("field", newBytesRef(new byte[] {(byte) 0xff, (byte) 0xfe})); + new TermInSetQuery("field", List.of(newBytesRef(new byte[] {(byte) 0xff, (byte) 0xfe}))); assertEquals("field:([ff fe])", query.toString()); } public void testIsConsideredCostlyByQueryCache() throws IOException { - TermInSetQuery query = new TermInSetQuery("foo", newBytesRef("bar"), newBytesRef("baz")); + TermInSetQuery query = + new TermInSetQuery("foo", List.of(newBytesRef("bar"), newBytesRef("baz"))); UsageTrackingQueryCachingPolicy policy = new UsageTrackingQueryCachingPolicy(); assertFalse(policy.shouldCache(query)); policy.onUse(query); @@ -362,7 +365,7 @@ public class TestTermInSetQuery extends LuceneTestCase { public void testVisitor() { // singleton reports back to consumeTerms() - TermInSetQuery singleton = new TermInSetQuery("field", newBytesRef("term1")); + TermInSetQuery singleton = new TermInSetQuery("field", List.of(newBytesRef("term1"))); singleton.visit( new QueryVisitor() { @Override diff --git a/lucene/monitor/src/java/org/apache/lucene/monitor/MultipassTermFilteredPresearcher.java b/lucene/monitor/src/java/org/apache/lucene/monitor/MultipassTermFilteredPresearcher.java index ed2a1856e68..47cf6c8cadc 100644 --- a/lucene/monitor/src/java/org/apache/lucene/monitor/MultipassTermFilteredPresearcher.java +++ b/lucene/monitor/src/java/org/apache/lucene/monitor/MultipassTermFilteredPresearcher.java @@ -17,6 +17,7 @@ package org.apache.lucene.monitor; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -125,7 +126,7 @@ public class MultipassTermFilteredPresearcher extends TermFilteredPresearcher { BooleanQuery.Builder child = new BooleanQuery.Builder(); for (String field : terms.keySet()) { child.add( - new TermInSetQuery(field(field, i), collectedTerms.get(field)), + new TermInSetQuery(field(field, i), Arrays.asList(collectedTerms.get(field))), BooleanClause.Occur.SHOULD); } parent.add(child.build(), BooleanClause.Occur.MUST); diff --git a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMultipassPresearcher.java b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMultipassPresearcher.java index 54ac305d3e5..711c3b6dab5 100644 --- a/lucene/monitor/src/test/org/apache/lucene/monitor/TestMultipassPresearcher.java +++ b/lucene/monitor/src/test/org/apache/lucene/monitor/TestMultipassPresearcher.java @@ -18,6 +18,7 @@ package org.apache.lucene.monitor; import java.io.IOException; +import java.util.List; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -111,25 +112,33 @@ public class TestMultipassPresearcher extends PresearcherTestBase { must( new BooleanQuery.Builder() .add( - should(new TermInSetQuery("f_0", new BytesRef("test")))) + should( + new TermInSetQuery( + "f_0", List.of(new BytesRef("test"))))) .build())) .add( must( new BooleanQuery.Builder() .add( - should(new TermInSetQuery("f_1", new BytesRef("test")))) + should( + new TermInSetQuery( + "f_1", List.of(new BytesRef("test"))))) .build())) .add( must( new BooleanQuery.Builder() .add( - should(new TermInSetQuery("f_2", new BytesRef("test")))) + should( + new TermInSetQuery( + "f_2", List.of(new BytesRef("test"))))) .build())) .add( must( new BooleanQuery.Builder() .add( - should(new TermInSetQuery("f_3", new BytesRef("test")))) + should( + new TermInSetQuery( + "f_3", List.of(new BytesRef("test"))))) .build())) .build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) diff --git a/lucene/monitor/src/test/org/apache/lucene/monitor/TestTermPresearcher.java b/lucene/monitor/src/test/org/apache/lucene/monitor/TestTermPresearcher.java index f2bcaceda2a..0bd5eac4865 100644 --- a/lucene/monitor/src/test/org/apache/lucene/monitor/TestTermPresearcher.java +++ b/lucene/monitor/src/test/org/apache/lucene/monitor/TestTermPresearcher.java @@ -19,6 +19,7 @@ package org.apache.lucene.monitor; import java.io.IOException; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.index.DirectoryReader; @@ -144,7 +145,7 @@ public class TestTermPresearcher extends PresearcherTestBase { .add( should( new BooleanQuery.Builder() - .add(should(new TermInSetQuery("f", new BytesRef("test")))) + .add(should(new TermInSetQuery("f", List.of(new BytesRef("test"))))) .build())) .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__")))) .build();