From e4ac57746eb86846b3a53944c14e09873f793ff1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 19 Sep 2024 11:51:42 +0200 Subject: [PATCH] Add BytesRefIterator to TermInSetQuery (#13806) TermInSetQuery used to have an accessor to its terms that was removed in #12173 to protect leaking internal encoding details. This introduces an accessor to the term data in the query that doesn't expose internals but merely allows iterating over the decoded BytesRef, making inspection of the querys content possible again. Closes #13804 --- lucene/CHANGES.txt | 2 ++ .../apache/lucene/search/TermInSetQuery.java | 18 +++++++++++------- .../lucene/search/TestTermInSetQuery.java | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index bcbc1e33b83..09096a48faa 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -295,6 +295,8 @@ Build API Changes --------------------- +* GITHUB#13806: Add TermInSetQuery#getBytesRefIterator to be able to iterate over query terms. (Christoph Büscher) + * GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov) * GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov) diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java index da01b24f0bd..c82df0ac1eb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java @@ -27,13 +27,7 @@ import org.apache.lucene.index.PrefixCodedTerms.TermIterator; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.BytesRefComparator; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.StringSorter; +import org.apache.lucene.util.*; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.ByteRunAutomaton; @@ -141,6 +135,16 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable { return termData.size(); } + /** + * Get an iterator over the encoded terms for query inspection. + * + * @lucene.experimental + */ + public BytesRefIterator getBytesRefIterator() { + final TermIterator iterator = this.termData.iterator(); + return () -> iterator.next(); + } + @Override public void visit(QueryVisitor visitor) { if (visitor.acceptField(field) == false) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java index 7cfd0c5adde..b6503021617 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java @@ -52,6 +52,7 @@ import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.RamUsageTester; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefIterator; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.automaton.ByteRunAutomaton; @@ -527,4 +528,19 @@ public class TestTermInSetQuery extends LuceneTestCase { } }); } + + public void testTermsIterator() throws IOException { + TermInSetQuery empty = new TermInSetQuery("field", Collections.emptyList()); + BytesRefIterator it = empty.getBytesRefIterator(); + assertNull(it.next()); + + TermInSetQuery query = + new TermInSetQuery( + "field", List.of(newBytesRef("term1"), newBytesRef("term2"), newBytesRef("term3"))); + it = query.getBytesRefIterator(); + assertEquals(newBytesRef("term1"), it.next()); + assertEquals(newBytesRef("term2"), it.next()); + assertEquals(newBytesRef("term3"), it.next()); + assertNull(it.next()); + } }