Add BytesRefIterator to TermInSetQuery (#13806)

TermInSetQuery used to have an accessor to its terms that was removed in #12173
to protect leaking internal encoding details. This introduces an accessor to the
term data in the query that doesn't expose internals but merely allows iterating
over the decoded BytesRef, making inspection of the querys content possible again.

Closes #13804
This commit is contained in:
Christoph Büscher 2024-09-19 11:51:42 +02:00 committed by GitHub
parent 6d987e1ce1
commit e4ac57746e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 29 additions and 7 deletions

View File

@ -295,6 +295,8 @@ Build
API Changes
---------------------
* GITHUB#13806: Add TermInSetQuery#getBytesRefIterator to be able to iterate over query terms. (Christoph Büscher)
* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)
* GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)

View File

@ -27,13 +27,7 @@ import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefComparator;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringSorter;
import org.apache.lucene.util.*;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@ -141,6 +135,16 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
return termData.size();
}
/**
* Get an iterator over the encoded terms for query inspection.
*
* @lucene.experimental
*/
public BytesRefIterator getBytesRefIterator() {
final TermIterator iterator = this.termData.iterator();
return () -> iterator.next();
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {

View File

@ -52,6 +52,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@ -527,4 +528,19 @@ public class TestTermInSetQuery extends LuceneTestCase {
}
});
}
public void testTermsIterator() throws IOException {
TermInSetQuery empty = new TermInSetQuery("field", Collections.emptyList());
BytesRefIterator it = empty.getBytesRefIterator();
assertNull(it.next());
TermInSetQuery query =
new TermInSetQuery(
"field", List.of(newBytesRef("term1"), newBytesRef("term2"), newBytesRef("term3")));
it = query.getBytesRefIterator();
assertEquals(newBytesRef("term1"), it.next());
assertEquals(newBytesRef("term2"), it.next());
assertEquals(newBytesRef("term3"), it.next());
assertNull(it.next());
}
}