LUCENE-2013: SpanRegexQuery does not work with QueryScorer.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2009-10-29 16:41:04 +00:00
parent 1b38f9c24d
commit 88d3d41992
4 changed files with 116 additions and 9 deletions

View File

@ -160,6 +160,9 @@ Bug fixes
* LUCENE-2004: Fix Constants.LUCENE_MAIN_VERSION to not be inlined
by client code. (Uwe Schindler)
* LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
(Benjamin Keil via Mark Miller)
New features

View File

@ -27,18 +27,26 @@
<property name="memory.jar" location="${common.dir}/build/contrib/memory/lucene-memory-${version}.jar"/>
<available property="memory.jar.present" type="file" file="${memory.jar}"/>
<property name="regex.jar" location="${common.dir}/build/contrib/regex/lucene-regex-${version}.jar"/>
<available property="regex.jar.present" type="file" file="${regex.jar}"/>
<path id="classpath">
<pathelement path="${lucene.jar}"/>
<pathelement path="${memory.jar}"/>
<pathelement path="${regex.jar}"/>
<pathelement path="${project.classpath}"/>
</path>
<target name="compile-core" depends="build-memory, common.compile-core" />
<target name="compile-core" depends="build-memory, build-regex, common.compile-core" />
<target name="build-memory" unless="memory.jar.present">
<echo>Highlighter building dependency ${memory.jar}</echo>
<ant antfile="../memory/build.xml" target="default" inheritall="false" dir="../memory" />
</target>
<target name="build-regex" unless="regex.jar.present">
<echo>Highlighter building dependency ${regex.jar}</echo>
<ant antfile="../regex/build.xml" target="default" inheritall="false" dir="../regex" />
</target>
</project>

View File

@ -32,7 +32,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
@ -220,16 +223,11 @@ public class WeightedSpanTermExtractor {
* @throws IOException
*/
private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<Term>();
spanQuery.extractTerms(nonWeightedTerms);
Set<String> fieldNames;
if (fieldName == null) {
fieldNames = new HashSet<String>();
for (final Term queryTerm : nonWeightedTerms) {
fieldNames.add(queryTerm.field());
}
collectSpanQueryFields(spanQuery, fieldNames);
} else {
fieldNames = new HashSet<String>(1);
fieldNames.add(fieldName);
@ -238,13 +236,33 @@ public class WeightedSpanTermExtractor {
if (defaultField != null) {
fieldNames.add(defaultField);
}
Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
Set<Term> nonWeightedTerms = new HashSet<Term>();
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
if (mustRewriteQuery) {
for (final String field : fieldNames) {
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field));
queries.put(field, rewrittenQuery);
rewrittenQuery.extractTerms(nonWeightedTerms);
}
} else {
spanQuery.extractTerms(nonWeightedTerms);
}
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
for (final String field : fieldNames) {
IndexReader reader = getReaderForField(field);
Spans spans = spanQuery.getSpans(reader);
final Spans spans;
if (mustRewriteQuery) {
spans = queries.get(field).getSpans(reader);
} else {
spans = spanQuery.getSpans(reader);
}
// collect span positions
while (spans.next()) {
@ -429,6 +447,57 @@ public class WeightedSpanTermExtractor {
return terms;
}
private void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
if (spanQuery instanceof FieldMaskingSpanQuery) {
collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
} else if (spanQuery instanceof SpanFirstQuery) {
collectSpanQueryFields(((SpanFirstQuery)spanQuery).getMatch(), fieldNames);
} else if (spanQuery instanceof SpanNearQuery) {
for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
collectSpanQueryFields(clause, fieldNames);
}
} else if (spanQuery instanceof SpanNotQuery) {
collectSpanQueryFields(((SpanNotQuery)spanQuery).getInclude(), fieldNames);
} else if (spanQuery instanceof SpanOrQuery) {
for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
collectSpanQueryFields(clause, fieldNames);
}
} else {
fieldNames.add(spanQuery.getField());
}
}
private boolean mustRewriteQuery(SpanQuery spanQuery) {
if (!expandMultiTermQuery) {
return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
} else if (spanQuery instanceof FieldMaskingSpanQuery) {
return mustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery());
} else if (spanQuery instanceof SpanFirstQuery) {
return mustRewriteQuery(((SpanFirstQuery)spanQuery).getMatch());
} else if (spanQuery instanceof SpanNearQuery) {
for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
if (mustRewriteQuery(clause)) {
return true;
}
}
return false;
} else if (spanQuery instanceof SpanNotQuery) {
SpanNotQuery spanNotQuery = (SpanNotQuery)spanQuery;
return mustRewriteQuery(spanNotQuery.getInclude()) || mustRewriteQuery(spanNotQuery.getExclude());
} else if (spanQuery instanceof SpanOrQuery) {
for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
if (mustRewriteQuery(clause)) {
return true;
}
}
return false;
} else if (spanQuery instanceof SpanTermQuery) {
return false;
} else {
return true;
}
}
/**
* This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins.

View File

@ -70,8 +70,10 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
import org.apache.lucene.search.regex.SpanRegexQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
@ -306,6 +308,31 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void testSpanRegexQuery() throws Exception {
query = new SpanOrQuery(new SpanQuery [] {
new SpanRegexQuery(new Term(FIELD_NAME, "ken.*")) });
searcher = new IndexSearcher(ramDir, true);
hits = searcher.search(query, 100);
int maxNumFragmentsRequired = 2;
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
System.out.println("\t" + result);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 5);
}
public void testNumericRangeQuery() throws Exception {
// doesn't currently highlight, but make sure it doesn't cause exception either
query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);