wip: add base test + minor tweaks

This commit is contained in:
Mateus Lucas 2024-10-19 14:44:33 -03:00
parent 1faf33a02a
commit aa9601a8dd
No known key found for this signature in database
GPG Key ID: 14DD2B2F5FD73F96
3 changed files with 60 additions and 25 deletions

View File

@ -405,12 +405,10 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
} }
protected String getPreTag(String[] preTags, int num) { protected String getPreTag(String[] preTags, int num) {
int n = num % preTags.length; return preTags[num];
return preTags[n];
} }
protected String getPostTag(String[] postTags, int num) { protected String getPostTag(String[] postTags, int num) {
int n = num % postTags.length; return postTags[num];
return postTags[n];
} }
} }

View File

@ -57,7 +57,9 @@ public class FieldQuery {
// fieldMatch==false, Map<null,setOfTermsInQueries> // fieldMatch==false, Map<null,setOfTermsInQueries>
Map<String, Set<String>> termSetMap = new HashMap<>(); Map<String, Set<String>> termSetMap = new HashMap<>();
int termOrPhraseNumber; // used for colored tag support // index of the original query term or phrase in the list of expanded terms or phrases
final Map<String, Integer> queryIndexHighlights = new HashMap<>();
int previousIndex = 0;
// The maximum number of different matching terms accumulated from any one MultiTermQuery // The maximum number of different matching terms accumulated from any one MultiTermQuery
private static final int MAX_MTQ_TERMS = 1024; private static final int MAX_MTQ_TERMS = 1024;
@ -65,18 +67,23 @@ public class FieldQuery {
public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch)
throws IOException { throws IOException {
this.fieldMatch = fieldMatch; this.fieldMatch = fieldMatch;
Set<Query> flatQueries = new LinkedHashSet<>(); final Set<Query> flatQueries = new LinkedHashSet<>();
IndexSearcher searcher; final IndexSearcher searcher = reader == null ? null : new IndexSearcher(reader);
if (reader == null) {
searcher = null; buildQueryIndexHighlights(query);
} else {
searcher = new IndexSearcher(reader);
}
flatten(query, searcher, flatQueries, 1f); flatten(query, searcher, flatQueries, 1f);
saveTerms(flatQueries, searcher); saveTerms(flatQueries, searcher);
Collection<Query> expandQueries = expand(flatQueries); Collection<Query> expandQueries = expand(flatQueries);
for (Query flatQuery : expandQueries) { for (Query flatQuery : expandQueries) {
int queryIndex;
if (this.queryIndexHighlights.containsKey(flatQuery.toString())) {
queryIndex = this.queryIndexHighlights.get(flatQuery.toString());
previousIndex = queryIndex;
} else {
queryIndex = previousIndex;
}
QueryPhraseMap rootMap = getRootMap(flatQuery); QueryPhraseMap rootMap = getRootMap(flatQuery);
rootMap.add(flatQuery, reader); rootMap.add(flatQuery, reader);
float boost = 1f; float boost = 1f;
@ -88,12 +95,21 @@ public class FieldQuery {
if (!phraseHighlight && flatQuery instanceof PhraseQuery) { if (!phraseHighlight && flatQuery instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) flatQuery; PhraseQuery pq = (PhraseQuery) flatQuery;
if (pq.getTerms().length > 1) { if (pq.getTerms().length > 1) {
for (Term term : pq.getTerms()) rootMap.addTerm(term, boost); for (Term term : pq.getTerms()) rootMap.addTerm(term, boost, queryIndex);
} }
} }
} }
} }
private void buildQueryIndexHighlights(Query query) {
if (query instanceof BooleanQuery booleanQuery) {
final List<BooleanClause> clauses = booleanQuery.clauses();
for (int i = 0; i < clauses.size(); i++) {
queryIndexHighlights.put(clauses.get(i).query().toString(), i);
}
}
}
/** /**
* For backwards compatibility you can initialize FieldQuery without an IndexReader, which is only * For backwards compatibility you can initialize FieldQuery without an IndexReader, which is only
* required to support MultiTermQuery * required to support MultiTermQuery
@ -372,10 +388,6 @@ public class FieldQuery {
return rootMaps.get(fieldMatch ? fieldName : null); return rootMaps.get(fieldMatch ? fieldName : null);
} }
int nextTermOrPhraseNumber() {
return termOrPhraseNumber++;
}
/** Internal structure of a query for highlighting: represents a nested query structure */ /** Internal structure of a query for highlighting: represents a nested query structure */
public static class QueryPhraseMap { public static class QueryPhraseMap {
@ -390,9 +402,9 @@ public class FieldQuery {
this.fieldQuery = fieldQuery; this.fieldQuery = fieldQuery;
} }
void addTerm(Term term, float boost) { void addTerm(Term term, float boost, int queryIndex) {
QueryPhraseMap map = getOrNewMap(subMap, term.text()); QueryPhraseMap map = getOrNewMap(subMap, term.text());
map.markTerminal(boost); map.markTerminal(boost, queryIndex);
} }
private QueryPhraseMap getOrNewMap(Map<String, QueryPhraseMap> subMap, String term) { private QueryPhraseMap getOrNewMap(Map<String, QueryPhraseMap> subMap, String term) {
@ -405,6 +417,12 @@ public class FieldQuery {
} }
void add(Query query, IndexReader reader) { void add(Query query, IndexReader reader) {
int highlightsLength = fieldQuery.queryIndexHighlights.size();
int queryIndex = Math.min(fieldQuery.previousIndex + 1, highlightsLength - 1);
if (fieldQuery.queryIndexHighlights.containsKey(query.toString())) {
queryIndex = fieldQuery.queryIndexHighlights.get(query.toString());
}
float boost = 1f; float boost = 1f;
while (query instanceof BoostQuery) { while (query instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) query; BoostQuery bq = (BoostQuery) query;
@ -412,7 +430,7 @@ public class FieldQuery {
boost = bq.getBoost(); boost = bq.getBoost();
} }
if (query instanceof TermQuery) { if (query instanceof TermQuery) {
addTerm(((TermQuery) query).getTerm(), boost); addTerm(((TermQuery) query).getTerm(), boost, queryIndex);
} else if (query instanceof PhraseQuery) { } else if (query instanceof PhraseQuery) {
PhraseQuery pq = (PhraseQuery) query; PhraseQuery pq = (PhraseQuery) query;
Term[] terms = pq.getTerms(); Term[] terms = pq.getTerms();
@ -422,7 +440,7 @@ public class FieldQuery {
qpm = getOrNewMap(map, term.text()); qpm = getOrNewMap(map, term.text());
map = qpm.subMap; map = qpm.subMap;
} }
qpm.markTerminal(pq.getSlop(), boost); qpm.markTerminal(pq.getSlop(), boost, queryIndex);
} else } else
throw new RuntimeException("query \"" + query.toString() + "\" must be flatten first."); throw new RuntimeException("query \"" + query.toString() + "\" must be flatten first.");
} }
@ -431,15 +449,15 @@ public class FieldQuery {
return subMap.get(term); return subMap.get(term);
} }
private void markTerminal(float boost) { private void markTerminal(float boost, int queryIndex) {
markTerminal(0, boost); markTerminal(0, boost, queryIndex);
} }
private void markTerminal(int slop, float boost) { private void markTerminal(int slop, float boost, int queryIndex) {
this.terminal = true; this.terminal = true;
this.slop = slop; this.slop = slop;
this.boost = boost; this.boost = boost;
this.termOrPhraseNumber = fieldQuery.nextTermOrPhraseNumber(); this.termOrPhraseNumber = queryIndex;
} }
public boolean isTerminal() { public boolean isTerminal() {

View File

@ -23,6 +23,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.BoostQuery;
@ -36,6 +38,7 @@ import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap;
import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
public class TestFieldQuery extends AbstractTestCase { public class TestFieldQuery extends AbstractTestCase {
@ -953,4 +956,20 @@ public class TestFieldQuery extends AbstractTestCase {
fq.flatten(query, searcher, flatQueries, 1f); fq.flatten(query, searcher, flatQueries, 1f);
assertCollectionQueries(flatQueries, tq(boost, "A")); assertCollectionQueries(flatQueries, tq(boost, "A"));
} }
public void testTermOrPhraseNumberShouldBeSameAsOriginalQuerySize() throws IOException {
// Arrange
final String field = "field";
final QueryParser queryParser = new QueryParser(field, new MockAnalyzer(random()));
final BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(queryParser.createBooleanQuery(field, "A B"), Occur.SHOULD);
queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 0), Occur.SHOULD);
queryBuilder.add(queryParser.createPhraseQuery(field, "C B", 2), Occur.SHOULD);
// Act
final FieldQuery fieldQuery = new FieldQuery(queryBuilder.build(), true, true);
// Assert
assertEquals(2, fieldQuery.previousIndex);
}
} }