diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java new file mode 100644 index 00000000000..8124ae81159 --- /dev/null +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java @@ -0,0 +1,31 @@ +package org.apache.lucene.search.highlight; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Utility class to record Positions Spans + * @lucene.internal + */ +public class PositionSpan { + int start; + int end; + + public PositionSpan(int start, int end) { + this.start = start; + this.end = end; + } +} \ No newline at end of file diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java index 504a3e075c8..59fff98857b 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java @@ -207,8 +207,7 @@ public class QueryScorer implements Scorer { } private TokenStream initExtractor(TokenStream tokenStream) throws IOException { - WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor() - : new WeightedSpanTermExtractor(defaultField); + WeightedSpanTermExtractor qse = newTermExtractor(defaultField); qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze); qse.setExpandMultiTermQuery(expandMultiTermQuery); qse.setWrapIfNotCachingTokenFilter(wrapToCaching); @@ -225,6 +224,11 @@ public class QueryScorer implements Scorer { return null; } + + protected WeightedSpanTermExtractor newTermExtractor(String defaultField) { + return defaultField == null ? new WeightedSpanTermExtractor() + : new WeightedSpanTermExtractor(defaultField); + } /* * (non-Javadoc) diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java index 50a8ad8adc9..eadff70aad7 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java @@ -89,16 +89,7 @@ public class WeightedSpanTerm extends WeightedTerm{ public List getPositionSpans() { return positionSpans; } + } -// Utility class to store a Span -class PositionSpan { - int start; - int end; - - public PositionSpan(int start, int end) { - this.start = start; - this.end = end; - } -} diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 8124ed14e0c..cf9c352b7d8 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -90,7 +90,7 @@ public class WeightedSpanTermExtractor { * Map to place created WeightedSpanTerms in * @throws IOException */ - private void extract(Query query, Map terms) throws IOException { + protected void extract(Query query, Map terms) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); @@ -206,6 +206,12 @@ public class WeightedSpanTermExtractor { extractWeightedSpanTerms(terms, sp); } } + extractUnknownQuery(query, terms); + } + + protected void extractUnknownQuery(Query query, + Map terms) throws IOException { + // for sub-classing to extract custom queries } /** @@ -217,7 +223,7 @@ public class WeightedSpanTermExtractor { * SpanQuery to extract Terms from * @throws IOException */ - private void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException { + protected void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException { Set fieldNames; if (fieldName == null) { @@ -305,7 +311,7 @@ public class WeightedSpanTermExtractor { * Query to extract Terms from * @throws IOException */ - private void extractWeightedTerms(Map terms, Query query) throws IOException { + protected void extractWeightedTerms(Map terms, Query query) throws IOException { Set nonWeightedTerms = new HashSet(); query.extractTerms(nonWeightedTerms); @@ -321,13 +327,13 @@ public class WeightedSpanTermExtractor { /** * Necessary to implement matches for queries against defaultField */ - private boolean fieldNameComparator(String fieldNameToCheck) { + protected boolean fieldNameComparator(String fieldNameToCheck) { boolean rv = fieldName == null || fieldName.equals(fieldNameToCheck) || (defaultField != null && defaultField.equals(fieldNameToCheck)); return rv; } - private AtomicReaderContext getLeafContextForField(String field) throws IOException { + protected AtomicReaderContext getLeafContextForField(String field) throws IOException { if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); cachedTokenStream = true; @@ -449,7 +455,7 @@ public class WeightedSpanTermExtractor { return terms; } - private void collectSpanQueryFields(SpanQuery spanQuery, Set fieldNames) { + protected void collectSpanQueryFields(SpanQuery spanQuery, Set fieldNames) { if (spanQuery instanceof FieldMaskingSpanQuery) { collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames); } else if (spanQuery instanceof SpanFirstQuery) { @@ -469,7 +475,7 @@ public class WeightedSpanTermExtractor { } } - private boolean mustRewriteQuery(SpanQuery spanQuery) { + protected boolean mustRewriteQuery(SpanQuery spanQuery) { if (!expandMultiTermQuery) { return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery. } else if (spanQuery instanceof FieldMaskingSpanQuery) { @@ -504,7 +510,8 @@ public class WeightedSpanTermExtractor { * This class makes sure that if both position sensitive and insensitive * versions of the same term are added, the position insensitive one wins. */ - static private class PositionCheckingMap extends HashMap { + @SuppressWarnings("serial") + protected static class PositionCheckingMap extends HashMap { @Override public void putAll(Map m) { diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java new file mode 100644 index 00000000000..c76a8ff9e50 --- /dev/null +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java @@ -0,0 +1,186 @@ +package org.apache.lucene.search.highlight.custom; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import java.io.IOException; +import java.io.StringReader; +import java.util.Map; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleFragmenter; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.WeightedSpanTerm; +import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; +import org.apache.lucene.util.LuceneTestCase; + +/** + * Tests the extensibility of {@link WeightedSpanTermExtractor} and + * {@link QueryScorer} in a user defined package + */ +public class HighlightCustomQueryTest extends LuceneTestCase { + + private static final String FIELD_NAME = "contents"; + + public void testHighlightCustomQuery() throws IOException, + InvalidTokenOffsetsException { + String s1 = "I call our world Flatland, not because we call it so,"; + + // Verify that a query against the default field results in text being + // highlighted + // regardless of the field name. + + CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world")); + + String expected = "I call our world Flatland, not because we call it so,"; + String observed = highlightField(q, "SOME_FIELD_NAME", s1); + if (VERBOSE) + System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + + observed); + assertEquals( + "Query in the default field results in text for *ANY* field being highlighted", + expected, observed); + + // Verify that a query against a named field does not result in any + // highlighting + // when the query field name differs from the name of the field being + // highlighted, + // which in this example happens to be the default field name. + q = new CustomQuery(new Term("text", "world")); + + expected = s1; + observed = highlightField(q, FIELD_NAME, s1); + if (VERBOSE) + System.out.println("Expected: \"" + expected + "\n" + "Observed: \"" + + observed); + assertEquals( + "Query in a named field does not result in highlighting when that field isn't in the query", + s1, highlightField(q, FIELD_NAME, s1)); + + } + + /** + * This method intended for use with + * testHighlightingWithDefaultField() + * + * @throws InvalidTokenOffsetsException + */ + private static String highlightField(Query query, String fieldName, + String text) throws IOException, InvalidTokenOffsetsException { + TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, + true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName, + new StringReader(text)); + // Assuming "", "" used to highlight + SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); + MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME); + Highlighter highlighter = new Highlighter(formatter, scorer); + highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE)); + + String rv = highlighter.getBestFragments(tokenStream, text, 1, + "(FIELD TEXT TRUNCATED)"); + return rv.length() == 0 ? text : rv; + } + + public static class MyWeightedSpanTermExtractor extends + WeightedSpanTermExtractor { + + public MyWeightedSpanTermExtractor() { + super(); + } + + public MyWeightedSpanTermExtractor(String defaultField) { + super(defaultField); + } + + @Override + protected void extractUnknownQuery(Query query, + Map terms) throws IOException { + if (query instanceof CustomQuery) { + extractWeightedTerms(terms, new TermQuery(((CustomQuery) query).term)); + } + } + + } + + public static class MyQueryScorer extends QueryScorer { + + public MyQueryScorer(Query query, String field, String defaultField) { + super(query, field, defaultField); + } + + @Override + protected WeightedSpanTermExtractor newTermExtractor(String defaultField) { + return defaultField == null ? new MyWeightedSpanTermExtractor() + : new MyWeightedSpanTermExtractor(defaultField); + } + + } + + public static class CustomQuery extends Query { + private final Term term; + + public CustomQuery(Term term) { + super(); + this.term = term; + } + + @Override + public String toString(String field) { + return new TermQuery(term).toString(field); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + return new TermQuery(term); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + CustomQuery other = (CustomQuery) obj; + if (term == null) { + if (other.term != null) + return false; + } else if (!term.equals(other.term)) + return false; + return true; + } + + } +}