From 5682889026771a34894ef87058e7e54d351dd2d2 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 14 Nov 2011 17:02:30 +0000 Subject: [PATCH] LUCENE-3533: nuke spanfilters git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201787 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../highlight/WeightedSpanTermExtractor.java | 21 ++- .../lucene/search/CachingSpanFilter.java | 136 ---------------- .../org/apache/lucene/search/SpanFilter.java | 39 ----- .../lucene/search/SpanFilterResult.java | 119 -------------- .../apache/lucene/search/SpanQueryFilter.java | 103 ------------ .../search/payloads/PayloadNearQuery.java | 2 +- .../search/payloads/PayloadSpanUtil.java | 12 +- .../search/payloads/PayloadTermQuery.java | 2 +- .../search/spans/FieldMaskingSpanQuery.java | 6 +- .../lucene/search/spans/NearSpansOrdered.java | 11 +- .../search/spans/NearSpansUnordered.java | 7 +- .../spans/SpanMultiTermQueryWrapper.java | 6 +- .../lucene/search/spans/SpanNearQuery.java | 12 +- .../lucene/search/spans/SpanNotQuery.java | 8 +- .../lucene/search/spans/SpanOrQuery.java | 8 +- .../search/spans/SpanPositionCheckQuery.java | 10 +- .../apache/lucene/search/spans/SpanQuery.java | 5 +- .../lucene/search/spans/SpanTermQuery.java | 55 +++++-- .../lucene/search/spans/SpanWeight.java | 16 +- .../lucene/search/JustCompileSearch.java | 13 -- .../lucene/search/TestCachingSpanFilter.java | 147 ------------------ .../lucene/search/TestSpanQueryFilter.java | 86 ---------- .../search/spans/JustCompileSearchSpans.java | 5 +- .../search/spans/MultiSpansWrapper.java | 29 +++- 25 files changed, 155 insertions(+), 706 deletions(-) delete mode 100644 lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java delete mode 100644 lucene/src/java/org/apache/lucene/search/SpanFilter.java delete mode 100644 lucene/src/java/org/apache/lucene/search/SpanFilterResult.java delete mode 100644 lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java delete mode 100644 lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java delete mode 100644 lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3e006f9a3d0..5c64fea7726 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -199,6 +199,9 @@ Changes in backwards compatibility policy as these are no longer used by the scoring system. See MIGRATE.txt for more details. (Robert Muir) +* LUCENE-3533: Removed SpanFilters, they created large lists of objects and + did not scale. (Robert Muir) + Changes in Runtime Behavior * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java index bb4278296ad..8124ed14e0c 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java @@ -25,6 +25,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeSet; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -42,6 +43,7 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; /** * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether @@ -247,16 +249,21 @@ public class WeightedSpanTermExtractor { List spanPositions = new ArrayList(); for (final String field : fieldNames) { - - AtomicReaderContext context = getLeafContextForField(field); - Bits acceptDocs = context.reader.getLiveDocs(); - final Spans spans; + final SpanQuery q; if (mustRewriteQuery) { - spans = queries.get(field).getSpans(context, acceptDocs); + q = queries.get(field); } else { - spans = spanQuery.getSpans(context, acceptDocs); + q = spanQuery; } - + AtomicReaderContext context = getLeafContextForField(field); + Map termContexts = new HashMap(); + TreeSet extractedTerms = new TreeSet(); + q.extractTerms(extractedTerms); + for (Term term : extractedTerms) { + termContexts.put(term, TermContext.build(context, term, true)); + } + Bits acceptDocs = context.reader.getLiveDocs(); + final Spans spans = q.getSpans(context, acceptDocs, termContexts); // collect span positions while (spans.next()) { diff --git a/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java b/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java deleted file mode 100644 index 1d65c4624a1..00000000000 --- a/lucene/src/java/org/apache/lucene/search/CachingSpanFilter.java +++ /dev/null @@ -1,136 +0,0 @@ -package org.apache.lucene.search; -/** - * Copyright 2005 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FixedBitSet; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Wraps another SpanFilter's result and caches it. The purpose is to allow - * filters to simply filter, and then wrap with this class to add caching. - */ -public class CachingSpanFilter extends SpanFilter { - private SpanFilter filter; - - /** - * A transient Filter cache (package private because of test) - */ - private final CachingWrapperFilter.FilterCache cache; - - /** Wraps another SpanFilter's result and caches it. - * @param filter Filter to cache results of - */ - public CachingSpanFilter(SpanFilter filter) { - this.filter = filter; - this.cache = new CachingWrapperFilter.FilterCache(); - } - - @Override - public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { - final SpanFilterResult result = getCachedResult(context); - return BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs); - } - - @Override - public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException { - final SpanFilterResult result = getCachedResult(context); - if (acceptDocs == null) { - return result; - } else { - // TODO: filter positions more efficient - List allPositions = result.getPositions(); - List positions = new ArrayList(allPositions.size() / 2 + 1); - for (SpanFilterResult.PositionInfo p : allPositions) { - if (acceptDocs.get(p.getDoc())) { - positions.add(p); - } - } - return new SpanFilterResult(BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs), positions); - } - } - - /** Provide the DocIdSet to be cached, using the DocIdSet provided - * by the wrapped Filter. - *

This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} - * returns true, else it copies the {@link DocIdSetIterator} into - * an {@link FixedBitSet}. - */ - protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException { - if (result == null || result.getDocIdSet() == null) { - // this is better than returning null, as the nonnull result can be cached - return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; - } else if (result.getDocIdSet().isCacheable()) { - return result; - } else { - final DocIdSetIterator it = result.getDocIdSet().iterator(); - // null is allowed to be returned by iterator(), - // in this case we wrap with the empty set, - // which is cacheable. - if (it == null) { - return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT; - } else { - final FixedBitSet bits = new FixedBitSet(reader.maxDoc()); - bits.or(it); - return new SpanFilterResult(bits, result.getPositions()); - } - } - } - - // for testing - int hitCount, missCount; - - private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException { - final IndexReader reader = context.reader; - final Object coreKey = reader.getCoreCacheKey(); - - SpanFilterResult result = cache.get(reader, coreKey); - if (result != null) { - hitCount++; - return result; - } else { - missCount++; - // cache miss: we use no acceptDocs here - // (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set) - result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader); - cache.put(coreKey, result); - } - - return result; - } - - @Override - public String toString() { - return "CachingSpanFilter("+filter+")"; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof CachingSpanFilter)) return false; - return this.filter.equals(((CachingSpanFilter)o).filter); - } - - @Override - public int hashCode() { - return filter.hashCode() ^ 0x1117BF25; - } -} diff --git a/lucene/src/java/org/apache/lucene/search/SpanFilter.java b/lucene/src/java/org/apache/lucene/search/SpanFilter.java deleted file mode 100644 index b19968af2a7..00000000000 --- a/lucene/src/java/org/apache/lucene/search/SpanFilter.java +++ /dev/null @@ -1,39 +0,0 @@ -package org.apache.lucene.search; -/** - * Copyright 2007 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.util.Bits; - -import java.io.IOException; - -/** Abstract base class providing a mechanism to restrict searches to a subset - of an index and also maintains and returns position information. - - This is useful if you want to compare the positions from a SpanQuery with the positions of items in - a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, - and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could - then compare position information for post processing. - */ -public abstract class SpanFilter extends Filter{ - /** Returns a SpanFilterResult with true for documents which should be permitted in - search results, and false for those that should not and Spans for where the true docs match. - * @param context The {@link AtomicReaderContext} to load position and DocIdSet information from - * @return A {@link SpanFilterResult} - * @throws java.io.IOException if there was an issue accessing the necessary information - * */ - public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException; -} diff --git a/lucene/src/java/org/apache/lucene/search/SpanFilterResult.java b/lucene/src/java/org/apache/lucene/search/SpanFilterResult.java deleted file mode 100644 index 3337cb6cc6a..00000000000 --- a/lucene/src/java/org/apache/lucene/search/SpanFilterResult.java +++ /dev/null @@ -1,119 +0,0 @@ -package org.apache.lucene.search; -/** - * Copyright 2005 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - - -/** - * The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery - * - * @lucene.experimental - * - **/ -public class SpanFilterResult { - private DocIdSet docIdSet; - private List positions;//Spans spans; - - public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT = - new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.emptyList()); - - /** - * - * @param docIdSet The DocIdSet for the Filter - * @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects - */ - public SpanFilterResult(DocIdSet docIdSet, List positions) { - this.docIdSet = docIdSet; - this.positions = positions; - } - - /** - * The first entry in the array corresponds to the first "on" bit. - * Entries are increasing by document order - * @return A List of PositionInfo objects - */ - public List getPositions() { - return positions; - } - - /** Returns the docIdSet */ - public DocIdSet getDocIdSet() { - return docIdSet; - } - - public static class PositionInfo { - private int doc; - private List positions; - - - public PositionInfo(int doc) { - this.doc = doc; - positions = new ArrayList(); - } - - public void addPosition(int start, int end) - { - positions.add(new StartEnd(start, end)); - } - - public int getDoc() { - return doc; - } - - /** - * - * @return Positions - */ - public List getPositions() { - return positions; - } - } - - public static class StartEnd - { - private int start; - private int end; - - - public StartEnd(int start, int end) { - this.start = start; - this.end = end; - } - - /** - * - * @return The end position of this match - */ - public int getEnd() { - return end; - } - - /** - * The Start position - * @return The start position of this match - */ - public int getStart() { - return start; - } - - } -} - - - diff --git a/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java b/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java deleted file mode 100644 index de758ac2b88..00000000000 --- a/lucene/src/java/org/apache/lucene/search/SpanQueryFilter.java +++ /dev/null @@ -1,103 +0,0 @@ -package org.apache.lucene.search; -/** - * Copyright 2007 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.Spans; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.FixedBitSet; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** - * Constrains search results to only match those which also match a provided - * query. Also provides position information about where each document matches - * at the cost of extra space compared with the QueryWrapperFilter. - * There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely, - * the position information for each matching document is stored. - *

- * This filter does not cache. See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that - * caches. - */ -public class SpanQueryFilter extends SpanFilter { - protected SpanQuery query; - - protected SpanQueryFilter() - { - - } - - /** Constructs a filter which only matches documents matching - * query. - * @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter. - */ - public SpanQueryFilter(SpanQuery query) { - this.query = query; - } - - @Override - public final DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - SpanFilterResult result = bitSpans(context, acceptDocs); - return result.getDocIdSet(); - } - - @Override - public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { - - final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc()); - Spans spans = query.getSpans(context, acceptDocs); - List tmp = new ArrayList(20); - int currentDoc = -1; - SpanFilterResult.PositionInfo currentInfo = null; - while (spans.next()) - { - int doc = spans.doc(); - bits.set(doc); - if (currentDoc != doc) - { - currentInfo = new SpanFilterResult.PositionInfo(doc); - tmp.add(currentInfo); - currentDoc = doc; - } - currentInfo.addPosition(spans.start(), spans.end()); - } - return new SpanFilterResult(bits, tmp); - } - - - public SpanQuery getQuery() { - return query; - } - - @Override - public String toString() { - return "SpanQueryFilter(" + query + ")"; - } - - @Override - public boolean equals(Object o) { - return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query); - } - - @Override - public int hashCode() { - return query.hashCode() ^ 0x923F64B9; - } -} diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java index edf1658573d..1e1c65e3f55 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadNearQuery.java @@ -150,7 +150,7 @@ public class PayloadNearQuery extends SpanNearQuery { @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs), this, + return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity, similarity.sloppyDocScorer(stats, query.getField(), context)); } diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java index 5ec92fb6e1e..61748cdde9e 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadSpanUtil.java @@ -20,8 +20,11 @@ package org.apache.lucene.search.payloads; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.TreeSet; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -41,6 +44,7 @@ import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.TermContext; /** * Experimental class to get set of payloads for most standard Lucene queries. @@ -174,9 +178,15 @@ public class PayloadSpanUtil { private void getPayloads(Collection payloads, SpanQuery query) throws IOException { + Map termContexts = new HashMap(); + TreeSet terms = new TreeSet(); + query.extractTerms(terms); + for (Term term : terms) { + termContexts.put(term, TermContext.build(context, term, true)); + } final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); for (AtomicReaderContext atomicReaderContext : leaves) { - final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs()); + final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs(), termContexts); while (spans.next() == true) { if (spans.isPayloadAvailable()) { Collection payload = spans.getPayload(); diff --git a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java index d7ae18cd72a..0606181c4de 100644 --- a/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/payloads/PayloadTermQuery.java @@ -81,7 +81,7 @@ public class PayloadTermQuery extends SpanTermQuery { @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs), + return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context)); } diff --git a/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java b/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java index 7d06b38bc08..502dea4e727 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.util.Map; import java.util.Set; import org.apache.lucene.index.IndexReader; @@ -27,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Weight; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; /** @@ -93,8 +95,8 @@ public class FieldMaskingSpanQuery extends SpanQuery { // ...this is done to be more consistent with things like SpanFirstQuery @Override - public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { - return maskedQuery.getSpans(context, acceptDocs); + public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + return maskedQuery.getSpans(context, acceptDocs, termContexts); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java index 9d642708a37..ad07032a3a0 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java +++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java @@ -17,9 +17,11 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; import java.io.IOException; import java.util.ArrayList; @@ -28,6 +30,7 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Collection; +import java.util.Map; import java.util.Set; /** A Spans that is formed from the ordered subspans of a SpanNearQuery @@ -78,11 +81,11 @@ public class NearSpansOrdered extends Spans { private SpanNearQuery query; private boolean collectPayloads = true; - public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs) throws IOException { - this(spanNearQuery, context, acceptDocs, true); + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + this(spanNearQuery, context, acceptDocs, termContexts, true); } - public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, boolean collectPayloads) + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map termContexts, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " @@ -95,7 +98,7 @@ public class NearSpansOrdered extends Spans { matchPayload = new LinkedList(); subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { - subSpans[i] = clauses[i].getSpans(context, acceptDocs); + subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. diff --git a/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java b/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java index 0532a43fde5..31f21559712 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java +++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansUnordered.java @@ -17,14 +17,17 @@ package org.apache.lucene.search.spans; * limitations under the License. */ +import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.TermContext; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.HashSet; @@ -132,7 +135,7 @@ public class NearSpansUnordered extends Spans { } - public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs) + public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { this.query = query; this.slop = query.getSlop(); @@ -142,7 +145,7 @@ public class NearSpansUnordered extends Spans { subSpans = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { SpansCell cell = - new SpansCell(clauses[i].getSpans(context, acceptDocs), i); + new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i); ordered.add(cell); subSpans[i] = cell.spans; } diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java b/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java index bcce4e91839..fcb95029182 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java @@ -18,6 +18,7 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.util.Map; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -90,7 +91,7 @@ public class SpanMultiTermQueryWrapper extends SpanQue } @Override - public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { throw new UnsupportedOperationException("Query should have been rewritten"); } @@ -157,6 +158,9 @@ public class SpanMultiTermQueryWrapper extends SpanQue @Override protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) { + // TODO: would be nice to not lose term-state here. + // we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span + // (if you put this thing in another span query, it would extractTerms/double-seek anyway) final SpanTermQuery q = new SpanTermQuery(term); q.setBoost(boost); topLevel.addClause(q); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java index 05b0330df9f..36eeadfc3da 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanNearQuery.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.util.List; import java.util.ArrayList; import java.util.Iterator; +import java.util.Map; import java.util.Set; @@ -31,6 +32,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; /** Matches spans which are near one another. One can specify slop, the @@ -118,16 +120,16 @@ public class SpanNearQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { if (clauses.size() == 0) // optimize 0-clause case - return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs); + return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts); if (clauses.size() == 1) // optimize 1-clause case - return clauses.get(0).getSpans(context, acceptDocs); + return clauses.get(0).getSpans(context, acceptDocs, termContexts); return inOrder - ? (Spans) new NearSpansOrdered(this, context, acceptDocs, collectPayloads) - : (Spans) new NearSpansUnordered(this, context, acceptDocs); + ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads) + : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java index 537c54b0094..2defb333f00 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanNotQuery.java @@ -22,11 +22,13 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Map; import java.util.Set; /** Removes matches which overlap with another SpanQuery. */ @@ -76,12 +78,12 @@ public class SpanNotQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { + public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { return new Spans() { - private Spans includeSpans = include.getSpans(context, acceptDocs); + private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts); private boolean moreInclude = true; - private Spans excludeSpans = exclude.getSpans(context, acceptDocs); + private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts); private boolean moreExclude = excludeSpans.next(); @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java index 841d4a265cc..2f654fbc584 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanOrQuery.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Collection; import java.util.ArrayList; import java.util.Iterator; +import java.util.Map; import java.util.Set; import org.apache.lucene.index.IndexReader; @@ -30,6 +31,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.search.Query; @@ -164,9 +166,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { } @Override - public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { + public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { if (clauses.size() == 1) // optimize 1-clause case - return (clauses.get(0)).getSpans(context, acceptDocs); + return (clauses.get(0)).getSpans(context, acceptDocs, termContexts); return new Spans() { private SpanQueue queue = null; @@ -175,7 +177,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable { queue = new SpanQueue(clauses.size()); Iterator i = clauses.iterator(); while (i.hasNext()) { - Spans spans = i.next().getSpans(context, acceptDocs); + Spans spans = i.next().getSpans(context, acceptDocs, termContexts); if ( ((target == -1) && spans.next()) || ((target != -1) && spans.skipTo(target))) { queue.add(spans); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java index 5f4c225d441..8a5bbed0fce 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanPositionCheckQuery.java @@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Map; import java.util.Set; @@ -82,8 +84,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; @Override - public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { - return new PositionCheckSpan(context, acceptDocs); + public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + return new PositionCheckSpan(context, acceptDocs, termContexts); } @@ -107,8 +109,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea protected class PositionCheckSpan extends Spans { private Spans spans; - public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs) throws IOException { - spans = match.getSpans(context, acceptDocs); + public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + spans = match.getSpans(context, acceptDocs, termContexts); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java index de6e720dc4c..a197cdf359e 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanQuery.java @@ -18,18 +18,21 @@ package org.apache.lucene.search.spans; */ import java.io.IOException; +import java.util.Map; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { /** Expert: Returns the matches for this query in an index. Used internally * to search for spans. */ - public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException; + public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException; /** Returns the name of the field matched by this query.*/ public abstract String getField(); diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index a780d2ffa7b..32744b2dc4c 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -19,12 +19,19 @@ package org.apache.lucene.search.spans; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Map; import java.util.Set; /** Matches spans containing a term. */ @@ -82,22 +89,46 @@ public class SpanTermQuery extends SpanQuery { } @Override - public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { - final IndexReader reader = context.reader; - final DocsAndPositionsEnum postings = reader.termPositionsEnum(acceptDocs, - term.field(), - term.bytes()); + public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + TermContext termContext = termContexts.get(term); + final TermState state; + if (termContext == null) { + // this happens with span-not query, as it doesn't include the NOT side in extractTerms() + // so we seek to the term now in this segment..., this sucks because its ugly mostly! + final Fields fields = context.reader.fields(); + if (fields != null) { + final Terms terms = fields.terms(term.field()); + if (terms != null) { + final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! + if (termsEnum.seekExact(term.bytes(), true)) { + state = termsEnum.termState(); + } else { + state = null; + } + } else { + state = null; + } + } else { + state = null; + } + } else { + state = termContext.get(context.ord); + } + + if (state == null) { // term is not present in that reader + return TermSpans.EMPTY_TERM_SPANS; + } + + final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); + termsEnum.seekExact(term.bytes(), state); + + final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null); if (postings != null) { return new TermSpans(postings, term); } else { - if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) { - // term does exist, but has no positions - throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); - } else { - // term does not exist - return TermSpans.EMPTY_TERM_SPANS; - } + // term does exist, but has no positions + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); } } } diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java index 5205807f2ba..207a9f660e3 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanWeight.java @@ -27,7 +27,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.TermContext; import java.io.IOException; -import java.util.Set; +import java.util.HashMap; +import java.util.Map; import java.util.TreeSet; /** @@ -35,7 +36,7 @@ import java.util.TreeSet; */ public class SpanWeight extends Weight { protected Similarity similarity; - protected Set terms; + protected Map termContexts; protected SpanQuery query; protected Similarity.Stats stats; @@ -44,15 +45,16 @@ public class SpanWeight extends Weight { this.similarity = searcher.getSimilarityProvider().get(query.getField()); this.query = query; - terms=new TreeSet(); + termContexts = new HashMap(); + TreeSet terms = new TreeSet(); query.extractTerms(terms); final ReaderContext context = searcher.getTopReaderContext(); - final TermContext states[] = new TermContext[terms.size()]; final TermStatistics termStats[] = new TermStatistics[terms.size()]; int i = 0; for (Term term : terms) { - states[i] = TermContext.build(context, term, true); - termStats[i] = searcher.termStatistics(term, states[i]); + TermContext state = TermContext.build(context, term, true); + termStats[i] = searcher.termStatistics(term, state); + termContexts.put(term, state); i++; } stats = similarity.computeStats( @@ -77,7 +79,7 @@ public class SpanWeight extends Weight { @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - return new SpanScorer(query.getSpans(context, acceptDocs), this, similarity.sloppyDocScorer(stats, query.getField(), context)); + return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context)); } @Override diff --git a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java index 24f3282b28b..34f69f89662 100644 --- a/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -281,19 +281,6 @@ final class JustCompileSearch { } } - static final class JustCompileSpanFilter extends SpanFilter { - - @Override - public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - return null; - } - } - static final class JustCompileTopDocsCollector extends TopDocsCollector { protected JustCompileTopDocsCollector(PriorityQueue pq) { diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java deleted file mode 100644 index 3f8c9858fc0..00000000000 --- a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java +++ /dev/null @@ -1,147 +0,0 @@ -package org.apache.lucene.search; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.SerialMergeScheduler; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; - -public class TestCachingSpanFilter extends LuceneTestCase { - - public void testEnforceDeletions() throws Exception { - Directory dir = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter( - random, - dir, - newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)). - setMergeScheduler(new SerialMergeScheduler()). - // asserts below requires no unexpected merges: - setMergePolicy(newLogMergePolicy(10)) - ); - - // NOTE: cannot use writer.getReader because RIW (on - // flipping a coin) may give us a newly opened reader, - // but we use .reopen on this reader below and expect to - // (must) get an NRT reader: - IndexReader reader = IndexReader.open(writer.w, true); - // same reason we don't wrap? - IndexSearcher searcher = newSearcher(reader, false); - - // add a doc, refresh the reader, and check that its there - Document doc = new Document(); - FieldType customType = new FieldType(TextField.TYPE_STORED); - customType.setTokenized(false); - doc.add(newField("id", "1", customType)); - writer.addDocument(doc); - - reader = refreshReader(reader); - searcher.close(); - searcher = newSearcher(reader, false); - - TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1); - assertEquals("Should find a hit...", 1, docs.totalHits); - - final SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1"))); - - CachingSpanFilter filter = new CachingSpanFilter(startFilter); - - docs = searcher.search(new MatchAllDocsQuery(), filter, 1); - assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits); - int missCount = filter.missCount; - assertTrue(missCount > 0); - Query constantScore = new ConstantScoreQuery(filter); - docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 1, docs.totalHits); - assertEquals(missCount, filter.missCount); - - // NOTE: important to hold ref here so GC doesn't clear - // the cache entry! Else the assert below may sometimes - // fail: - IndexReader oldReader = reader; - - writer.addDocument(doc); - reader = refreshReader(reader); - searcher.close(); - searcher = newSearcher(reader, false); - - docs = searcher.search(new MatchAllDocsQuery(), filter, 1); - assertEquals("[query + filter] Should find 2 hits...", 2, docs.totalHits); - assertTrue(filter.missCount > missCount); - missCount = filter.missCount; - - constantScore = new ConstantScoreQuery(filter); - docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should find a hit...", 2, docs.totalHits); - assertEquals(missCount, filter.missCount); - - // NOTE: important to hold ref here so GC doesn't clear - // the cache entry! Else the assert below may sometimes - // fail: - IndexReader oldReader2 = reader; - - // now delete the doc, refresh the reader, and see that it's not there - writer.deleteDocuments(new Term("id", "1")); - - reader = refreshReader(reader); - searcher.close(); - searcher = newSearcher(reader, false); - - docs = searcher.search(new MatchAllDocsQuery(), filter, 1); - assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits); - assertEquals(missCount, filter.missCount); - - docs = searcher.search(constantScore, 1); - assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits); - assertEquals(missCount, filter.missCount); - - // NOTE: silliness to make sure JRE does not optimize - // away our holding onto oldReader to prevent - // CachingWrapperFilter's WeakHashMap from dropping the - // entry: - assertTrue(oldReader != null); - assertTrue(oldReader2 != null); - - searcher.close(); - writer.close(); - reader.close(); - dir.close(); - } - - private static IndexReader refreshReader(IndexReader reader) throws IOException { - IndexReader oldReader = reader; - reader = IndexReader.openIfChanged(reader); - if (reader != null) { - oldReader.close(); - return reader; - } else { - return oldReader; - } - } - -} diff --git a/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java b/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java deleted file mode 100644 index 130cb711f01..00000000000 --- a/lucene/src/test/org/apache/lucene/search/TestSpanQueryFilter.java +++ /dev/null @@ -1,86 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.util.List; - -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.RandomIndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.English; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.ReaderUtil; - -public class TestSpanQueryFilter extends LuceneTestCase { - - public void testFilterWorks() throws Exception { - Directory dir = newDirectory(); - RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); - for (int i = 0; i < 500; i++) { - Document document = new Document(); - document.add(newField("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i), - TextField.TYPE_UNSTORED)); - writer.addDocument(document); - } - final int number = 10; - IndexReader reader = writer.getReader(); - writer.close(); - AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext()); - int subIndex = ReaderUtil.subIndex(number, leaves); // find the reader with this document in it - SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(number).trim())); - SpanQueryFilter filter = new SpanQueryFilter(query); - SpanFilterResult result = filter.bitSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs()); - DocIdSet docIdSet = result.getDocIdSet(); - assertTrue("docIdSet is null and it shouldn't be", docIdSet != null); - assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, number - leaves[subIndex].docBase); - List spans = result.getPositions(); - assertTrue("spans is null and it shouldn't be", spans != null); - int size = getDocIdSetSize(docIdSet); - assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size); - for (final SpanFilterResult.PositionInfo info: spans) { - assertTrue("info is null and it shouldn't be", info != null); - //The doc should indicate the bit is on - assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc()); - //There should be two positions in each - assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2); - } - - reader.close(); - dir.close(); - } - - int getDocIdSetSize(DocIdSet docIdSet) throws Exception { - int size = 0; - DocIdSetIterator it = docIdSet.iterator(); - while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - size++; - } - return size; - } - - public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception { - DocIdSetIterator it = docIdSet.iterator(); - assertTrue(msg, it.advance(docId) != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(msg, it.docID() == docId); - } -} diff --git a/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java b/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java index 86a837081a0..63c741d76ab 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/JustCompileSearchSpans.java @@ -19,11 +19,14 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Collection; +import java.util.Map; +import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Weight; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.TermContext; /** * Holds all implementations of classes in the o.a.l.s.spans package as a @@ -83,7 +86,7 @@ final class JustCompileSearchSpans { } @Override - public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { + public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { throw new UnsupportedOperationException(UNSUPPORTED_MSG); } diff --git a/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java b/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java index a14e09f5c5d..f29ffbda39b 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java +++ b/lucene/src/test/org/apache/lucene/search/spans/MultiSpansWrapper.java @@ -20,11 +20,16 @@ package org.apache.lucene.search.spans; import java.io.IOException; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeSet; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.TermContext; /** * @@ -39,19 +44,27 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t private AtomicReaderContext[] leaves; private int leafOrd = 0; private Spans current; + private Map termContexts; - private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) { + private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query, Map termContexts) { this.query = query; this.leaves = leaves; + this.termContexts = termContexts; } public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException { + Map termContexts = new HashMap(); + TreeSet terms = new TreeSet(); + query.extractTerms(terms); + for (Term term : terms) { + termContexts.put(term, TermContext.build(topLevelReaderContext, term, true)); + } AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext); if(leaves.length == 1) { - return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs()); + return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs(), termContexts); } - return new MultiSpansWrapper(leaves, query); + return new MultiSpansWrapper(leaves, query, termContexts); } @Override @@ -60,14 +73,14 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t return false; } if (current == null) { - current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); + current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts); } while(true) { if (current.next()) { return true; } if (++leafOrd < leaves.length) { - current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); + current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts); } else { current = null; break; @@ -85,17 +98,17 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t int subIndex = ReaderUtil.subIndex(target, leaves); assert subIndex >= leafOrd; if (subIndex != leafOrd) { - current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs()); + current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs(), termContexts); leafOrd = subIndex; } else if (current == null) { - current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); + current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts); } while (true) { if (current.skipTo(target - leaves[leafOrd].docBase)) { return true; } if (++leafOrd < leaves.length) { - current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); + current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts); } else { current = null; break;