From 81c7ba4601a9aaf16e2255fe493ee582abe72a90 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 5 Feb 2013 13:59:26 +0000 Subject: [PATCH] LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten against the highlighting IndexReader to obtain primitive queries before discarding the query entirely. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1442590 13f79535-47bb-0310-9956-ffa450edef68 --- .../maven/lucene/highlighter/pom.xml.template | 5 + lucene/CHANGES.txt | 6 + lucene/highlighter/build.xml | 1 + .../lucene/search/highlight/Highlighter.java | 8 +- .../highlight/WeightedSpanTermExtractor.java | 140 ++++++++++++------ .../search/vectorhighlight/FieldQuery.java | 37 ++++- .../search/highlight/HighlighterTest.java | 86 ++++++++++- .../FastVectorHighlighterTest.java | 51 +++++++ .../vectorhighlight/FieldQueryTest.java | 42 ++++++ .../lucene/index/memory/MemoryIndex.java | 4 +- 10 files changed, 321 insertions(+), 59 deletions(-) diff --git a/dev-tools/maven/lucene/highlighter/pom.xml.template b/dev-tools/maven/lucene/highlighter/pom.xml.template index 0d5ef3a5804..f739538cb12 100644 --- a/dev-tools/maven/lucene/highlighter/pom.xml.template +++ b/dev-tools/maven/lucene/highlighter/pom.xml.template @@ -61,6 +61,11 @@ lucene-memory ${project.version} + + ${project.groupId} + lucene-queries + ${project.version} + ${module-path}/src/java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 44a1311b2a3..ccd4a69755e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -94,6 +94,12 @@ New Features * LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer creation via the resulting factories using NewAnalyzerTask. (Steve Rowe) +* LUCENE-4728: Unknown and not explicitly mapped queries are now rewritten + against the highlighting IndexReader to obtain primitive queries before + discarding the query entirely. WeightedSpanTermExtractor now builds a + MemoryIndex only once even if multiple fields are highlighted. + (Simon Willnauer) + API Changes * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera) diff --git a/lucene/highlighter/build.xml b/lucene/highlighter/build.xml index aff4394aa98..290cbe608f6 100644 --- a/lucene/highlighter/build.xml +++ b/lucene/highlighter/build.xml @@ -27,6 +27,7 @@ + diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java index 72f05145460..af0e54cd12a 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java @@ -187,11 +187,9 @@ public class Highlighter ArrayList docFrags = new ArrayList(); StringBuilder newText=new StringBuilder(); - CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - tokenStream.addAttribute(PositionIncrementAttribute.class); - tokenStream.reset(); - + CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); + tokenStream.reset(); TextFragment currentFrag = new TextFragment(newText,newText.length(), docFrags.size()); if (fragmentScorer instanceof QueryScorer) { diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index 4412738cdff..56551ae9389 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -18,7 +18,7 @@ package org.apache.lucene.search.highlight; */ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -29,11 +29,18 @@ import java.util.TreeSet; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FilterAtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; import org.apache.lucene.search.spans.SpanFirstQuery; @@ -44,6 +51,8 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IOUtils; + /** * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether @@ -53,12 +62,13 @@ public class WeightedSpanTermExtractor { private String fieldName; private TokenStream tokenStream; - private Map readers = new HashMap(10); private String defaultField; private boolean expandMultiTermQuery; private boolean cachedTokenStream; private boolean wrapToCaching = true; private int maxDocCharsToAnalyze; + private AtomicReader reader = null; + public WeightedSpanTermExtractor() { } @@ -69,18 +79,6 @@ public class WeightedSpanTermExtractor { } } - private void closeReaders() { - Collection ctxSet = readers.values(); - - for (final AtomicReaderContext ctx : ctxSet) { - try { - ctx.reader().close(); - } catch (IOException e) { - // alert? - } - } - } - /** * Fills a Map with <@link WeightedSpanTerm>s using the terms from the supplied Query. * @@ -146,21 +144,14 @@ public class WeightedSpanTermExtractor { if (q != null) { extract(q, terms); } + } else if (query instanceof CommonTermsQuery) { + // specialized since rewriting would change the result query + // this query is TermContext sensitive. + extractWeightedTerms(terms, query); } else if (query instanceof DisjunctionMaxQuery) { for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) { extract(iterator.next(), terms); } - } else if (query instanceof MultiTermQuery && expandMultiTermQuery) { - MultiTermQuery mtq = ((MultiTermQuery)query); - if(mtq.getRewriteMethod() != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE) { - mtq = (MultiTermQuery) mtq.clone(); - mtq.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - query = mtq; - } - if (mtq.getField() != null) { - IndexReader ir = getLeafContextForField(mtq.getField()).reader(); - extract(query.rewrite(ir), terms); - } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final List termArrays = mpq.getTermArrays(); @@ -210,12 +201,30 @@ public class WeightedSpanTermExtractor { sp.setBoost(query.getBoost()); extractWeightedSpanTerms(terms, sp); } + } else { + Query origQuery = query; + if (query instanceof MultiTermQuery) { + if (!expandMultiTermQuery) { + return; + } + MultiTermQuery copy = (MultiTermQuery) query.clone(); + copy.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); + origQuery = copy; + } + final IndexReader reader = getLeafContext().reader(); + Query rewritten = origQuery.rewrite(reader); + if (rewritten != origQuery) { + // only rewrite once and then flatten again - the rewritten query could have a speacial treatment + // if this method is overwritten in a subclass or above in the next recursion + extract(rewritten, terms); + } } extractUnknownQuery(query, terms); } protected void extractUnknownQuery(Query query, Map terms) throws IOException { + // for sub-classing to extract custom queries } @@ -249,7 +258,7 @@ public class WeightedSpanTermExtractor { final boolean mustRewriteQuery = mustRewriteQuery(spanQuery); if (mustRewriteQuery) { for (final String field : fieldNames) { - final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContextForField(field).reader()); + final SpanQuery rewrittenQuery = (SpanQuery) spanQuery.rewrite(getLeafContext().reader()); queries.put(field, rewrittenQuery); rewrittenQuery.extractTerms(nonWeightedTerms); } @@ -266,7 +275,7 @@ public class WeightedSpanTermExtractor { } else { q = spanQuery; } - AtomicReaderContext context = getLeafContextForField(field); + AtomicReaderContext context = getLeafContext(); Map termContexts = new HashMap(); TreeSet extractedTerms = new TreeSet(); q.extractTerms(extractedTerms); @@ -338,23 +347,69 @@ public class WeightedSpanTermExtractor { return rv; } - protected AtomicReaderContext getLeafContextForField(String field) throws IOException { - if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { - tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); - cachedTokenStream = true; - } - AtomicReaderContext context = readers.get(field); - if (context == null) { - MemoryIndex indexer = new MemoryIndex(); - indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); + protected AtomicReaderContext getLeafContext() throws IOException { + if (reader == null) { + if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) { + assert !cachedTokenStream; + tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); + cachedTokenStream = true; + } + final MemoryIndex indexer = new MemoryIndex(true); + indexer.addField(DelegatingAtomicReader.FIELD_NAME, tokenStream); tokenStream.reset(); - IndexSearcher searcher = indexer.createSearcher(); + final IndexSearcher searcher = indexer.createSearcher(); // MEM index has only atomic ctx - context = (AtomicReaderContext) searcher.getTopReaderContext(); - readers.put(field, context); + reader = new DelegatingAtomicReader(((AtomicReaderContext)searcher.getTopReaderContext()).reader()); + } + return reader.getContext(); + } + + /* + * This reader will just delegate every call to a single field in the wrapped + * AtomicReader. This way we only need to build this field once rather than + * N-Times + */ + static final class DelegatingAtomicReader extends FilterAtomicReader { + private static final String FIELD_NAME = "shadowed_field"; + + DelegatingAtomicReader(AtomicReader in) { + super(in); + } + + @Override + public FieldInfos getFieldInfos() { + throw new UnsupportedOperationException(); } - return context; + @Override + public Fields fields() throws IOException { + return new FilterFields(super.fields()) { + @Override + public Terms terms(String field) throws IOException { + return super.terms(DelegatingAtomicReader.FIELD_NAME); + } + + @Override + public Iterator iterator() { + return Collections.singletonList(DelegatingAtomicReader.FIELD_NAME).iterator(); + } + + @Override + public int size() { + return 1; + } + }; + } + + @Override + public DocValues docValues(String field) throws IOException { + return super.docValues(FIELD_NAME); + } + + @Override + public DocValues normValues(String field) throws IOException { + return super.normValues(FIELD_NAME); + } } /** @@ -401,7 +456,7 @@ public class WeightedSpanTermExtractor { try { extract(query, terms); } finally { - closeReaders(); + IOUtils.close(reader); } return terms; @@ -449,8 +504,7 @@ public class WeightedSpanTermExtractor { weightedSpanTerm.weight *= idf; } } finally { - - closeReaders(); + IOUtils.close(reader); } return terms; diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java index 50ad2477ccb..c0284794446 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldQuery.java @@ -28,9 +28,12 @@ import java.util.Set; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DisjunctionMaxQuery; +import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; @@ -92,8 +95,7 @@ public class FieldQuery { if( !clause.isProhibited() ) flatten( clause.getQuery(), reader, flatQueries ); } - } - else if( sourceQuery instanceof DisjunctionMaxQuery ){ + } else if( sourceQuery instanceof DisjunctionMaxQuery ){ DisjunctionMaxQuery dmq = (DisjunctionMaxQuery)sourceQuery; for( Query query : dmq ){ flatten( query, reader, flatQueries ); @@ -103,12 +105,6 @@ public class FieldQuery { if( !flatQueries.contains( sourceQuery ) ) flatQueries.add( sourceQuery ); } - else if (sourceQuery instanceof MultiTermQuery && reader != null) { - MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); - copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); - BooleanQuery mtqTerms = (BooleanQuery) copy.rewrite(reader); - flatten(mtqTerms, reader, flatQueries); - } else if( sourceQuery instanceof PhraseQuery ){ if( !flatQueries.contains( sourceQuery ) ){ PhraseQuery pq = (PhraseQuery)sourceQuery; @@ -118,6 +114,31 @@ public class FieldQuery { flatQueries.add( new TermQuery( pq.getTerms()[0] ) ); } } + } else if (sourceQuery instanceof ConstantScoreQuery) { + final Query q = ((ConstantScoreQuery) sourceQuery).getQuery(); + if (q != null) { + flatten(q, reader, flatQueries); + } + } else if (sourceQuery instanceof FilteredQuery) { + final Query q = ((FilteredQuery) sourceQuery).getQuery(); + if (q != null) { + flatten(q, reader, flatQueries); + } + } else if (reader != null){ + Query query = sourceQuery; + if (sourceQuery instanceof MultiTermQuery) { + MultiTermQuery copy = (MultiTermQuery) sourceQuery.clone(); + copy.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(MAX_MTQ_TERMS)); + query = copy; + } + Query rewritten = query.rewrite(reader); + if (rewritten != query) { + // only rewrite once and then flatten again - the rewritten query could have a speacial treatment + // if this method is overwritten in a subclass. + flatten(rewritten, reader, flatQueries); + + } + // if the query is already rewritten we discard it } // else discard queries } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index d41185f8b00..1760023f292 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -46,6 +46,7 @@ import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner; @@ -114,6 +115,87 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte } } + public void testHighlightingCommonTermsQuery() throws Exception { + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3); + query.add(new Term(FIELD_NAME, "this")); + query.add(new Term(FIELD_NAME, "long")); + query.add(new Term(FIELD_NAME, "very")); + + searcher = new IndexSearcher(reader); + TopDocs hits = searcher.search(query, 10); + assertEquals(2, hits.totalHits); + QueryScorer scorer = new QueryScorer(query, FIELD_NAME); + Highlighter highlighter = new Highlighter(scorer); + + StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc); + String storedField = doc.get(FIELD_NAME); + + TokenStream stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer); + Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); + highlighter.setTextFragmenter(fragmenter); + String fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment); + + doc = searcher.doc(hits.scoreDocs[1].doc); + storedField = doc.get(FIELD_NAME); + + stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer); + highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); + fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very", fragment); + } + + public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException { + Query query = new Query() { + + @Override + public Query rewrite(IndexReader reader) throws IOException { + CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3); + query.add(new Term(FIELD_NAME, "this")); + query.add(new Term(FIELD_NAME, "long")); + query.add(new Term(FIELD_NAME, "very")); + return query; + } + + @Override + public String toString(String field) { + return null; + } + + }; + + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); + + searcher = new IndexSearcher(reader); + TopDocs hits = searcher.search(query, 10); + assertEquals(2, hits.totalHits); + QueryScorer scorer = new QueryScorer(query, FIELD_NAME); + Highlighter highlighter = new Highlighter(scorer); + + StoredDocument doc = searcher.doc(hits.scoreDocs[0].doc); + String storedField = doc.get(FIELD_NAME); + + TokenStream stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[0].doc, FIELD_NAME, doc, analyzer); + Fragmenter fragmenter = new SimpleSpanFragmenter(scorer); + highlighter.setTextFragmenter(fragmenter); + String fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", fragment); + + doc = searcher.doc(hits.scoreDocs[1].doc); + storedField = doc.get(FIELD_NAME); + + stream = TokenSources.getAnyTokenStream(searcher + .getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer); + highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer)); + fragment = highlighter.getBestFragment(stream, storedField); + assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very", fragment); + + } + public void testHighlightingWithDefaultField() throws Exception { String s1 = "I call our world Flatland, not because we call it so,"; @@ -150,7 +232,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte "Query in a named field does not result in highlighting when that field isn't in the query", s1, highlightField(q, FIELD_NAME, s1)); } - + /** * This method intended for use with testHighlightingWithDefaultField() */ @@ -603,7 +685,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // Not sure we can assert anything here - just running to check we dont // throw any exceptions } - + public void testSpanHighlighting() throws Exception { Query query1 = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD_NAME, "wordx")), diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java index 44e8b06a2be..de6b60e7f70 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FastVectorHighlighterTest.java @@ -18,6 +18,8 @@ package org.apache.lucene.search.vectorhighlight; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -26,7 +28,13 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.CommonTermsQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -62,4 +70,47 @@ public class FastVectorHighlighterTest extends LuceneTestCase { writer.close(); dir.close(); } + + public void testCommonTermsQueryHighlightTest() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))); + FieldType type = new FieldType(TextField.TYPE_STORED); + type.setStoreTermVectorOffsets(true); + type.setStoreTermVectorPositions(true); + type.setStoreTermVectors(true); + type.freeze(); + String[] texts = { + "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", + "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", + "JFK has been shot", "John Kennedy has been shot", + "This text has a typo in referring to Keneddy", + "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" }; + for (int i = 0; i < texts.length; i++) { + Document doc = new Document(); + Field field = new Field("field", texts[i], type); + doc.add(field); + writer.addDocument(doc); + } + CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2); + query.add(new Term("field", "text")); + query.add(new Term("field", "long")); + query.add(new Term("field", "very")); + + FastVectorHighlighter highlighter = new FastVectorHighlighter(); + IndexReader reader = DirectoryReader.open(writer, true); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs hits = searcher.search(query, 10); + assertEquals(2, hits.totalHits); + FieldQuery fieldQuery = highlighter.getFieldQuery(query, reader); + String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[0].doc, "field", 1000, 1); + assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", bestFragments[0]); + + fieldQuery = highlighter.getFieldQuery(query, reader); + bestFragments = highlighter.getBestFragments(fieldQuery, reader, hits.scoreDocs[1].doc, "field", 1000, 1); + assertEquals("Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]); + + reader.close(); + writer.close(); + dir.close(); + } } diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java index 14cdd58f557..f381fa7e482 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldQueryTest.java @@ -23,8 +23,13 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; @@ -35,6 +40,7 @@ import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.vectorhighlight.FieldQuery.QueryPhraseMap; import org.apache.lucene.search.vectorhighlight.FieldTermStack.TermInfo; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; public class FieldQueryTest extends AbstractTestCase { @@ -905,4 +911,40 @@ public class FieldQueryTest extends AbstractTestCase { assertNotNull (fq.searchPhrase(F, phraseCandidate)); } + public void testStopRewrite() throws Exception { + Query q = new Query() { + + @Override + public String toString(String field) { + return "DummyQuery"; + } + + }; + make1d1fIndex( "a" ); + assertNotNull(reader); + new FieldQuery(q, reader, true, true ); + } + + public void testFlattenFilteredQuery() throws Exception { + Query query = new FilteredQuery(pqF( "A" ), new Filter() { + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) + throws IOException { + return null; + } + }); + FieldQuery fq = new FieldQuery( query, true, true ); + Set flatQueries = new HashSet(); + fq.flatten( query, reader, flatQueries ); + assertCollectionQueries( flatQueries, tq( "A" ) ); + } + + public void testFlattenConstantScoreQuery() throws Exception { + Query query = new ConstantScoreQuery(pqF( "A" )); + FieldQuery fq = new FieldQuery( query, true, true ); + Set flatQueries = new HashSet(); + fq.flatten( query, reader, flatQueries ); + assertCollectionQueries( flatQueries, tq( "A" ) ); + } + } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 64a9e366185..08a5e044431 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -465,7 +465,9 @@ public class MemoryIndex { throw new RuntimeException(e); } finally { try { - if (stream != null) stream.close(); + if (stream != null) { + stream.close(); + } } catch (IOException e2) { throw new RuntimeException(e2); }