mirror of https://github.com/apache/lucene.git
LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@831024 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1b38f9c24d
commit
88d3d41992
|
@ -161,6 +161,9 @@ Bug fixes
|
|||
* LUCENE-2004: Fix Constants.LUCENE_MAIN_VERSION to not be inlined
|
||||
by client code. (Uwe Schindler)
|
||||
|
||||
* LUCENE-2013: SpanRegexQuery does not work with QueryScorer.
|
||||
(Benjamin Keil via Mark Miller)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-1933: Provide a convenience AttributeFactory that creates a
|
||||
|
|
|
@ -28,17 +28,25 @@
|
|||
<property name="memory.jar" location="${common.dir}/build/contrib/memory/lucene-memory-${version}.jar"/>
|
||||
<available property="memory.jar.present" type="file" file="${memory.jar}"/>
|
||||
|
||||
<property name="regex.jar" location="${common.dir}/build/contrib/regex/lucene-regex-${version}.jar"/>
|
||||
<available property="regex.jar.present" type="file" file="${regex.jar}"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${lucene.jar}"/>
|
||||
<pathelement path="${memory.jar}"/>
|
||||
<pathelement path="${regex.jar}"/>
|
||||
<pathelement path="${project.classpath}"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="build-memory, common.compile-core" />
|
||||
<target name="compile-core" depends="build-memory, build-regex, common.compile-core" />
|
||||
|
||||
<target name="build-memory" unless="memory.jar.present">
|
||||
<echo>Highlighter building dependency ${memory.jar}</echo>
|
||||
<ant antfile="../memory/build.xml" target="default" inheritall="false" dir="../memory" />
|
||||
</target>
|
||||
|
||||
<target name="build-regex" unless="regex.jar.present">
|
||||
<echo>Highlighter building dependency ${regex.jar}</echo>
|
||||
<ant antfile="../regex/build.xml" target="default" inheritall="false" dir="../regex" />
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -32,7 +32,10 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.memory.MemoryIndex;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanFirstQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
@ -220,16 +223,11 @@ public class WeightedSpanTermExtractor {
|
|||
* @throws IOException
|
||||
*/
|
||||
private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
|
||||
Set<Term> nonWeightedTerms = new HashSet<Term>();
|
||||
spanQuery.extractTerms(nonWeightedTerms);
|
||||
|
||||
Set<String> fieldNames;
|
||||
|
||||
if (fieldName == null) {
|
||||
fieldNames = new HashSet<String>();
|
||||
for (final Term queryTerm : nonWeightedTerms) {
|
||||
fieldNames.add(queryTerm.field());
|
||||
}
|
||||
collectSpanQueryFields(spanQuery, fieldNames);
|
||||
} else {
|
||||
fieldNames = new HashSet<String>(1);
|
||||
fieldNames.add(fieldName);
|
||||
|
@ -239,12 +237,32 @@ public class WeightedSpanTermExtractor {
|
|||
fieldNames.add(defaultField);
|
||||
}
|
||||
|
||||
Map<String, SpanQuery> queries = new HashMap<String, SpanQuery>();
|
||||
|
||||
Set<Term> nonWeightedTerms = new HashSet<Term>();
|
||||
final boolean mustRewriteQuery = mustRewriteQuery(spanQuery);
|
||||
if (mustRewriteQuery) {
|
||||
for (final String field : fieldNames) {
|
||||
final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getReaderForField(field));
|
||||
queries.put(field, rewrittenQuery);
|
||||
rewrittenQuery.extractTerms(nonWeightedTerms);
|
||||
}
|
||||
} else {
|
||||
spanQuery.extractTerms(nonWeightedTerms);
|
||||
}
|
||||
|
||||
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
|
||||
|
||||
for (final String field : fieldNames) {
|
||||
|
||||
IndexReader reader = getReaderForField(field);
|
||||
Spans spans = spanQuery.getSpans(reader);
|
||||
final Spans spans;
|
||||
if (mustRewriteQuery) {
|
||||
spans = queries.get(field).getSpans(reader);
|
||||
} else {
|
||||
spans = spanQuery.getSpans(reader);
|
||||
}
|
||||
|
||||
|
||||
// collect span positions
|
||||
while (spans.next()) {
|
||||
|
@ -429,6 +447,57 @@ public class WeightedSpanTermExtractor {
|
|||
return terms;
|
||||
}
|
||||
|
||||
private void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
|
||||
if (spanQuery instanceof FieldMaskingSpanQuery) {
|
||||
collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
|
||||
} else if (spanQuery instanceof SpanFirstQuery) {
|
||||
collectSpanQueryFields(((SpanFirstQuery)spanQuery).getMatch(), fieldNames);
|
||||
} else if (spanQuery instanceof SpanNearQuery) {
|
||||
for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
|
||||
collectSpanQueryFields(clause, fieldNames);
|
||||
}
|
||||
} else if (spanQuery instanceof SpanNotQuery) {
|
||||
collectSpanQueryFields(((SpanNotQuery)spanQuery).getInclude(), fieldNames);
|
||||
} else if (spanQuery instanceof SpanOrQuery) {
|
||||
for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
|
||||
collectSpanQueryFields(clause, fieldNames);
|
||||
}
|
||||
} else {
|
||||
fieldNames.add(spanQuery.getField());
|
||||
}
|
||||
}
|
||||
|
||||
private boolean mustRewriteQuery(SpanQuery spanQuery) {
|
||||
if (!expandMultiTermQuery) {
|
||||
return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
|
||||
} else if (spanQuery instanceof FieldMaskingSpanQuery) {
|
||||
return mustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery());
|
||||
} else if (spanQuery instanceof SpanFirstQuery) {
|
||||
return mustRewriteQuery(((SpanFirstQuery)spanQuery).getMatch());
|
||||
} else if (spanQuery instanceof SpanNearQuery) {
|
||||
for (final SpanQuery clause : ((SpanNearQuery)spanQuery).getClauses()) {
|
||||
if (mustRewriteQuery(clause)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else if (spanQuery instanceof SpanNotQuery) {
|
||||
SpanNotQuery spanNotQuery = (SpanNotQuery)spanQuery;
|
||||
return mustRewriteQuery(spanNotQuery.getInclude()) || mustRewriteQuery(spanNotQuery.getExclude());
|
||||
} else if (spanQuery instanceof SpanOrQuery) {
|
||||
for (final SpanQuery clause : ((SpanOrQuery)spanQuery).getClauses()) {
|
||||
if (mustRewriteQuery(clause)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} else if (spanQuery instanceof SpanTermQuery) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This class makes sure that if both position sensitive and insensitive
|
||||
* versions of the same term are added, the position insensitive one wins.
|
||||
|
|
|
@ -70,8 +70,10 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
|
||||
import org.apache.lucene.search.regex.SpanRegexQuery;
|
||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||
import org.apache.lucene.search.spans.SpanNotQuery;
|
||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -306,6 +308,31 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
}
|
||||
|
||||
public void testSpanRegexQuery() throws Exception {
|
||||
query = new SpanOrQuery(new SpanQuery [] {
|
||||
new SpanRegexQuery(new Term(FIELD_NAME, "ken.*")) });
|
||||
searcher = new IndexSearcher(ramDir, true);
|
||||
hits = searcher.search(query, 100);
|
||||
int maxNumFragmentsRequired = 2;
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
System.out.println("\t" + result);
|
||||
}
|
||||
|
||||
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
|
||||
numHighlights == 5);
|
||||
}
|
||||
|
||||
public void testNumericRangeQuery() throws Exception {
|
||||
// doesn't currently highlight, but make sure it doesn't cause exception either
|
||||
query = NumericRangeQuery.newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true);
|
||||
|
|
Loading…
Reference in New Issue