LUCENE-3533: nuke spanfilters

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201787 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-11-14 17:02:30 +00:00
parent f389654f9c
commit 5682889026
25 changed files with 155 additions and 706 deletions

View File

@ -199,6 +199,9 @@ Changes in backwards compatibility policy
as these are no longer used by the scoring system. See MIGRATE.txt for more as these are no longer used by the scoring system. See MIGRATE.txt for more
details. (Robert Muir) details. (Robert Muir)
* LUCENE-3533: Removed SpanFilters, they created large lists of objects and
did not scale. (Robert Muir)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you * LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you

View File

@ -25,6 +25,7 @@ import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
@ -42,6 +43,7 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans; import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
/** /**
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether * Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
@ -247,16 +249,21 @@ public class WeightedSpanTermExtractor {
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>(); List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
for (final String field : fieldNames) { for (final String field : fieldNames) {
final SpanQuery q;
AtomicReaderContext context = getLeafContextForField(field);
Bits acceptDocs = context.reader.getLiveDocs();
final Spans spans;
if (mustRewriteQuery) { if (mustRewriteQuery) {
spans = queries.get(field).getSpans(context, acceptDocs); q = queries.get(field);
} else { } else {
spans = spanQuery.getSpans(context, acceptDocs); q = spanQuery;
} }
AtomicReaderContext context = getLeafContextForField(field);
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> extractedTerms = new TreeSet<Term>();
q.extractTerms(extractedTerms);
for (Term term : extractedTerms) {
termContexts.put(term, TermContext.build(context, term, true));
}
Bits acceptDocs = context.reader.getLiveDocs();
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
// collect span positions // collect span positions
while (spans.next()) { while (spans.next()) {

View File

@ -1,136 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Wraps another SpanFilter's result and caches it. The purpose is to allow
* filters to simply filter, and then wrap with this class to add caching.
*/
public class CachingSpanFilter extends SpanFilter {
private SpanFilter filter;
/**
* A transient Filter cache (package private because of test)
*/
private final CachingWrapperFilter.FilterCache<SpanFilterResult> cache;
/** Wraps another SpanFilter's result and caches it.
* @param filter Filter to cache results of
*/
public CachingSpanFilter(SpanFilter filter) {
this.filter = filter;
this.cache = new CachingWrapperFilter.FilterCache<SpanFilterResult>();
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SpanFilterResult result = getCachedResult(context);
return BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs);
}
@Override
public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
final SpanFilterResult result = getCachedResult(context);
if (acceptDocs == null) {
return result;
} else {
// TODO: filter positions more efficient
List<SpanFilterResult.PositionInfo> allPositions = result.getPositions();
List<SpanFilterResult.PositionInfo> positions = new ArrayList<SpanFilterResult.PositionInfo>(allPositions.size() / 2 + 1);
for (SpanFilterResult.PositionInfo p : allPositions) {
if (acceptDocs.get(p.getDoc())) {
positions.add(p);
}
}
return new SpanFilterResult(BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs), positions);
}
}
/** Provide the DocIdSet to be cached, using the DocIdSet provided
* by the wrapped Filter.
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
* an {@link FixedBitSet}.
*/
protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException {
if (result == null || result.getDocIdSet() == null) {
// this is better than returning null, as the nonnull result can be cached
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
} else if (result.getDocIdSet().isCacheable()) {
return result;
} else {
final DocIdSetIterator it = result.getDocIdSet().iterator();
// null is allowed to be returned by iterator(),
// in this case we wrap with the empty set,
// which is cacheable.
if (it == null) {
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
} else {
final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
bits.or(it);
return new SpanFilterResult(bits, result.getPositions());
}
}
}
// for testing
int hitCount, missCount;
private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException {
final IndexReader reader = context.reader;
final Object coreKey = reader.getCoreCacheKey();
SpanFilterResult result = cache.get(reader, coreKey);
if (result != null) {
hitCount++;
return result;
} else {
missCount++;
// cache miss: we use no acceptDocs here
// (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set)
result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader);
cache.put(coreKey, result);
}
return result;
}
@Override
public String toString() {
return "CachingSpanFilter("+filter+")";
}
@Override
public boolean equals(Object o) {
if (!(o instanceof CachingSpanFilter)) return false;
return this.filter.equals(((CachingSpanFilter)o).filter);
}
@Override
public int hashCode() {
return filter.hashCode() ^ 0x1117BF25;
}
}

View File

@ -1,39 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.Bits;
import java.io.IOException;
/** Abstract base class providing a mechanism to restrict searches to a subset
of an index and also maintains and returns position information.
This is useful if you want to compare the positions from a SpanQuery with the positions of items in
a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
then compare position information for post processing.
*/
public abstract class SpanFilter extends Filter{
/** Returns a SpanFilterResult with true for documents which should be permitted in
search results, and false for those that should not and Spans for where the true docs match.
* @param context The {@link AtomicReaderContext} to load position and DocIdSet information from
* @return A {@link SpanFilterResult}
* @throws java.io.IOException if there was an issue accessing the necessary information
* */
public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
}

View File

@ -1,119 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery
*
* @lucene.experimental
*
**/
public class SpanFilterResult {
private DocIdSet docIdSet;
private List<PositionInfo> positions;//Spans spans;
public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT =
new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.<PositionInfo>emptyList());
/**
*
* @param docIdSet The DocIdSet for the Filter
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
*/
public SpanFilterResult(DocIdSet docIdSet, List<PositionInfo> positions) {
this.docIdSet = docIdSet;
this.positions = positions;
}
/**
* The first entry in the array corresponds to the first "on" bit.
* Entries are increasing by document order
* @return A List of PositionInfo objects
*/
public List<PositionInfo> getPositions() {
return positions;
}
/** Returns the docIdSet */
public DocIdSet getDocIdSet() {
return docIdSet;
}
public static class PositionInfo {
private int doc;
private List<StartEnd> positions;
public PositionInfo(int doc) {
this.doc = doc;
positions = new ArrayList<StartEnd>();
}
public void addPosition(int start, int end)
{
positions.add(new StartEnd(start, end));
}
public int getDoc() {
return doc;
}
/**
*
* @return Positions
*/
public List<StartEnd> getPositions() {
return positions;
}
}
public static class StartEnd
{
private int start;
private int end;
public StartEnd(int start, int end) {
this.start = start;
this.end = end;
}
/**
*
* @return The end position of this match
*/
public int getEnd() {
return end;
}
/**
* The Start position
* @return The start position of this match
*/
public int getStart() {
return start;
}
}
}

View File

@ -1,103 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2007 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Constrains search results to only match those which also match a provided
* query. Also provides position information about where each document matches
* at the cost of extra space compared with the QueryWrapperFilter.
* There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely,
* the position information for each matching document is stored.
* <p/>
* This filter does not cache. See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that
* caches.
*/
public class SpanQueryFilter extends SpanFilter {
protected SpanQuery query;
protected SpanQueryFilter()
{
}
/** Constructs a filter which only matches documents matching
* <code>query</code>.
* @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter.
*/
public SpanQueryFilter(SpanQuery query) {
this.query = query;
}
@Override
public final DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
SpanFilterResult result = bitSpans(context, acceptDocs);
return result.getDocIdSet();
}
@Override
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc());
Spans spans = query.getSpans(context, acceptDocs);
List<SpanFilterResult.PositionInfo> tmp = new ArrayList<SpanFilterResult.PositionInfo>(20);
int currentDoc = -1;
SpanFilterResult.PositionInfo currentInfo = null;
while (spans.next())
{
int doc = spans.doc();
bits.set(doc);
if (currentDoc != doc)
{
currentInfo = new SpanFilterResult.PositionInfo(doc);
tmp.add(currentInfo);
currentDoc = doc;
}
currentInfo.addPosition(spans.start(), spans.end());
}
return new SpanFilterResult(bits, tmp);
}
public SpanQuery getQuery() {
return query;
}
@Override
public String toString() {
return "SpanQueryFilter(" + query + ")";
}
@Override
public boolean equals(Object o) {
return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query);
}
@Override
public int hashCode() {
return query.hashCode() ^ 0x923F64B9;
}
}

View File

@ -150,7 +150,7 @@ public class PayloadNearQuery extends SpanNearQuery {
@Override @Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs), this, return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
similarity, similarity.sloppyDocScorer(stats, query.getField(), context)); similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
} }

View File

@ -20,8 +20,11 @@ package org.apache.lucene.search.payloads;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
@ -41,6 +44,7 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans; import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
/** /**
* Experimental class to get set of payloads for most standard Lucene queries. * Experimental class to get set of payloads for most standard Lucene queries.
@ -174,9 +178,15 @@ public class PayloadSpanUtil {
private void getPayloads(Collection<byte []> payloads, SpanQuery query) private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException { throws IOException {
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term, true));
}
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context); final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
for (AtomicReaderContext atomicReaderContext : leaves) { for (AtomicReaderContext atomicReaderContext : leaves) {
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs()); final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs(), termContexts);
while (spans.next() == true) { while (spans.next() == true) {
if (spans.isPayloadAvailable()) { if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload(); Collection<byte[]> payload = spans.getPayload();

View File

@ -81,7 +81,7 @@ public class PayloadTermQuery extends SpanTermQuery {
@Override @Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs), return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
this, similarity.sloppyDocScorer(stats, query.getField(), context)); this, similarity.sloppyDocScorer(stats, query.getField(), context));
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -27,6 +28,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
/** /**
@ -93,8 +95,8 @@ public class FieldMaskingSpanQuery extends SpanQuery {
// ...this is done to be more consistent with things like SpanFirstQuery // ...this is done to be more consistent with things like SpanFirstQuery
@Override @Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return maskedQuery.getSpans(context, acceptDocs); return maskedQuery.getSpans(context, acceptDocs, termContexts);
} }
@Override @Override

View File

@ -17,9 +17,11 @@ package org.apache.lucene.search.spans;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -28,6 +30,7 @@ import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Collection; import java.util.Collection;
import java.util.Map;
import java.util.Set; import java.util.Set;
/** A Spans that is formed from the ordered subspans of a SpanNearQuery /** A Spans that is formed from the ordered subspans of a SpanNearQuery
@ -78,11 +81,11 @@ public class NearSpansOrdered extends Spans {
private SpanNearQuery query; private SpanNearQuery query;
private boolean collectPayloads = true; private boolean collectPayloads = true;
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs) throws IOException { public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
this(spanNearQuery, context, acceptDocs, true); this(spanNearQuery, context, acceptDocs, termContexts, true);
} }
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, boolean collectPayloads) public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
throws IOException { throws IOException {
if (spanNearQuery.getClauses().length < 2) { if (spanNearQuery.getClauses().length < 2) {
throw new IllegalArgumentException("Less than 2 clauses: " throw new IllegalArgumentException("Less than 2 clauses: "
@ -95,7 +98,7 @@ public class NearSpansOrdered extends Spans {
matchPayload = new LinkedList<byte[]>(); matchPayload = new LinkedList<byte[]>();
subSpansByDoc = new Spans[clauses.length]; subSpansByDoc = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) { for (int i = 0; i < clauses.length; i++) {
subSpans[i] = clauses[i].getSpans(context, acceptDocs); subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
} }
query = spanNearQuery; // kept for toString() only. query = spanNearQuery; // kept for toString() only.

View File

@ -17,14 +17,17 @@ package org.apache.lucene.search.spans;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TermContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.HashSet; import java.util.HashSet;
@ -132,7 +135,7 @@ public class NearSpansUnordered extends Spans {
} }
public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs) public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
throws IOException { throws IOException {
this.query = query; this.query = query;
this.slop = query.getSlop(); this.slop = query.getSlop();
@ -142,7 +145,7 @@ public class NearSpansUnordered extends Spans {
subSpans = new Spans[clauses.length]; subSpans = new Spans[clauses.length];
for (int i = 0; i < clauses.length; i++) { for (int i = 0; i < clauses.length; i++) {
SpansCell cell = SpansCell cell =
new SpansCell(clauses[i].getSpans(context, acceptDocs), i); new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
ordered.add(cell); ordered.add(cell);
subSpans[i] = cell.spans; subSpans[i] = cell.spans;
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
@ -90,7 +91,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
} }
@Override @Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
throw new UnsupportedOperationException("Query should have been rewritten"); throw new UnsupportedOperationException("Query should have been rewritten");
} }
@ -157,6 +158,9 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
@Override @Override
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) { protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
// TODO: would be nice to not lose term-state here.
// we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span
// (if you put this thing in another span query, it would extractTerms/double-seek anyway)
final SpanTermQuery q = new SpanTermQuery(term); final SpanTermQuery q = new SpanTermQuery(term);
q.setBoost(boost); q.setBoost(boost);
topLevel.addClause(q); topLevel.addClause(q);

View File

@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import java.util.Set; import java.util.Set;
@ -31,6 +32,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
/** Matches spans which are near one another. One can specify <i>slop</i>, the /** Matches spans which are near one another. One can specify <i>slop</i>, the
@ -118,16 +120,16 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
} }
@Override @Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 0) // optimize 0-clause case if (clauses.size() == 0) // optimize 0-clause case
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs); return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
if (clauses.size() == 1) // optimize 1-clause case if (clauses.size() == 1) // optimize 1-clause case
return clauses.get(0).getSpans(context, acceptDocs); return clauses.get(0).getSpans(context, acceptDocs, termContexts);
return inOrder return inOrder
? (Spans) new NearSpansOrdered(this, context, acceptDocs, collectPayloads) ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
: (Spans) new NearSpansUnordered(this, context, acceptDocs); : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
} }
@Override @Override

View File

@ -22,11 +22,13 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Map;
import java.util.Set; import java.util.Set;
/** Removes matches which overlap with another SpanQuery. */ /** Removes matches which overlap with another SpanQuery. */
@ -76,12 +78,12 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
} }
@Override @Override
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
return new Spans() { return new Spans() {
private Spans includeSpans = include.getSpans(context, acceptDocs); private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
private boolean moreInclude = true; private boolean moreInclude = true;
private Spans excludeSpans = exclude.getSpans(context, acceptDocs); private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
private boolean moreExclude = excludeSpans.next(); private boolean moreExclude = excludeSpans.next();
@Override @Override

View File

@ -23,6 +23,7 @@ import java.util.List;
import java.util.Collection; import java.util.Collection;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
@ -30,6 +31,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -164,9 +166,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
} }
@Override @Override
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
if (clauses.size() == 1) // optimize 1-clause case if (clauses.size() == 1) // optimize 1-clause case
return (clauses.get(0)).getSpans(context, acceptDocs); return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
return new Spans() { return new Spans() {
private SpanQueue queue = null; private SpanQueue queue = null;
@ -175,7 +177,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
queue = new SpanQueue(clauses.size()); queue = new SpanQueue(clauses.size());
Iterator<SpanQuery> i = clauses.iterator(); Iterator<SpanQuery> i = clauses.iterator();
while (i.hasNext()) { while (i.hasNext()) {
Spans spans = i.next().getSpans(context, acceptDocs); Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
if ( ((target == -1) && spans.next()) if ( ((target == -1) && spans.next())
|| ((target != -1) && spans.skipTo(target))) { || ((target != -1) && spans.skipTo(target))) {
queue.add(spans); queue.add(spans);

View File

@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Map;
import java.util.Set; import java.util.Set;
@ -82,8 +84,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException; protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
@Override @Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
return new PositionCheckSpan(context, acceptDocs); return new PositionCheckSpan(context, acceptDocs, termContexts);
} }
@ -107,8 +109,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
protected class PositionCheckSpan extends Spans { protected class PositionCheckSpan extends Spans {
private Spans spans; private Spans spans;
public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs) throws IOException { public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
spans = match.getSpans(context, acceptDocs); spans = match.getSpans(context, acceptDocs, termContexts);
} }
@Override @Override

View File

@ -18,18 +18,21 @@ package org.apache.lucene.search.spans;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
/** Base class for span-based queries. */ /** Base class for span-based queries. */
public abstract class SpanQuery extends Query { public abstract class SpanQuery extends Query {
/** Expert: Returns the matches for this query in an index. Used internally /** Expert: Returns the matches for this query in an index. Used internally
* to search for spans. */ * to search for spans. */
public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException; public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
/** Returns the name of the field matched by this query.*/ /** Returns the name of the field matched by this query.*/
public abstract String getField(); public abstract String getField();

View File

@ -19,12 +19,19 @@ package org.apache.lucene.search.spans;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import java.io.IOException; import java.io.IOException;
import java.util.Map;
import java.util.Set; import java.util.Set;
/** Matches spans containing a term. */ /** Matches spans containing a term. */
@ -82,22 +89,46 @@ public class SpanTermQuery extends SpanQuery {
} }
@Override @Override
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
final IndexReader reader = context.reader; TermContext termContext = termContexts.get(term);
final DocsAndPositionsEnum postings = reader.termPositionsEnum(acceptDocs, final TermState state;
term.field(), if (termContext == null) {
term.bytes()); // this happens with span-not query, as it doesn't include the NOT side in extractTerms()
// so we seek to the term now in this segment..., this sucks because its ugly mostly!
final Fields fields = context.reader.fields();
if (fields != null) {
final Terms terms = fields.terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
if (termsEnum.seekExact(term.bytes(), true)) {
state = termsEnum.termState();
} else {
state = null;
}
} else {
state = null;
}
} else {
state = null;
}
} else {
state = termContext.get(context.ord);
}
if (state == null) { // term is not present in that reader
return TermSpans.EMPTY_TERM_SPANS;
}
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
termsEnum.seekExact(term.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
if (postings != null) { if (postings != null) {
return new TermSpans(postings, term); return new TermSpans(postings, term);
} else { } else {
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
} else {
// term does not exist
return TermSpans.EMPTY_TERM_SPANS;
}
} }
} }
} }

View File

@ -27,7 +27,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext; import org.apache.lucene.util.TermContext;
import java.io.IOException; import java.io.IOException;
import java.util.Set; import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
/** /**
@ -35,7 +36,7 @@ import java.util.TreeSet;
*/ */
public class SpanWeight extends Weight { public class SpanWeight extends Weight {
protected Similarity similarity; protected Similarity similarity;
protected Set<Term> terms; protected Map<Term,TermContext> termContexts;
protected SpanQuery query; protected SpanQuery query;
protected Similarity.Stats stats; protected Similarity.Stats stats;
@ -44,15 +45,16 @@ public class SpanWeight extends Weight {
this.similarity = searcher.getSimilarityProvider().get(query.getField()); this.similarity = searcher.getSimilarityProvider().get(query.getField());
this.query = query; this.query = query;
terms=new TreeSet<Term>(); termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms); query.extractTerms(terms);
final ReaderContext context = searcher.getTopReaderContext(); final ReaderContext context = searcher.getTopReaderContext();
final TermContext states[] = new TermContext[terms.size()];
final TermStatistics termStats[] = new TermStatistics[terms.size()]; final TermStatistics termStats[] = new TermStatistics[terms.size()];
int i = 0; int i = 0;
for (Term term : terms) { for (Term term : terms) {
states[i] = TermContext.build(context, term, true); TermContext state = TermContext.build(context, term, true);
termStats[i] = searcher.termStatistics(term, states[i]); termStats[i] = searcher.termStatistics(term, state);
termContexts.put(term, state);
i++; i++;
} }
stats = similarity.computeStats( stats = similarity.computeStats(
@ -77,7 +79,7 @@ public class SpanWeight extends Weight {
@Override @Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException { boolean topScorer, Bits acceptDocs) throws IOException {
return new SpanScorer(query.getSpans(context, acceptDocs), this, similarity.sloppyDocScorer(stats, query.getField(), context)); return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
} }
@Override @Override

View File

@ -281,19 +281,6 @@ final class JustCompileSearch {
} }
} }
static final class JustCompileSpanFilter extends SpanFilter {
@Override
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
return null;
}
}
static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> { static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> {
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) { protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {

View File

@ -1,147 +0,0 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestCachingSpanFilter extends LuceneTestCase {
public void testEnforceDeletions() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
random,
dir,
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).
setMergeScheduler(new SerialMergeScheduler()).
// asserts below requires no unexpected merges:
setMergePolicy(newLogMergePolicy(10))
);
// NOTE: cannot use writer.getReader because RIW (on
// flipping a coin) may give us a newly opened reader,
// but we use .reopen on this reader below and expect to
// (must) get an NRT reader:
IndexReader reader = IndexReader.open(writer.w, true);
// same reason we don't wrap?
IndexSearcher searcher = newSearcher(reader, false);
// add a doc, refresh the reader, and check that its there
Document doc = new Document();
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setTokenized(false);
doc.add(newField("id", "1", customType));
writer.addDocument(doc);
reader = refreshReader(reader);
searcher.close();
searcher = newSearcher(reader, false);
TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1);
assertEquals("Should find a hit...", 1, docs.totalHits);
final SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1")));
CachingSpanFilter filter = new CachingSpanFilter(startFilter);
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
int missCount = filter.missCount;
assertTrue(missCount > 0);
Query constantScore = new ConstantScoreQuery(filter);
docs = searcher.search(constantScore, 1);
assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);
assertEquals(missCount, filter.missCount);
// NOTE: important to hold ref here so GC doesn't clear
// the cache entry! Else the assert below may sometimes
// fail:
IndexReader oldReader = reader;
writer.addDocument(doc);
reader = refreshReader(reader);
searcher.close();
searcher = newSearcher(reader, false);
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
assertEquals("[query + filter] Should find 2 hits...", 2, docs.totalHits);
assertTrue(filter.missCount > missCount);
missCount = filter.missCount;
constantScore = new ConstantScoreQuery(filter);
docs = searcher.search(constantScore, 1);
assertEquals("[just filter] Should find a hit...", 2, docs.totalHits);
assertEquals(missCount, filter.missCount);
// NOTE: important to hold ref here so GC doesn't clear
// the cache entry! Else the assert below may sometimes
// fail:
IndexReader oldReader2 = reader;
// now delete the doc, refresh the reader, and see that it's not there
writer.deleteDocuments(new Term("id", "1"));
reader = refreshReader(reader);
searcher.close();
searcher = newSearcher(reader, false);
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);
assertEquals(missCount, filter.missCount);
docs = searcher.search(constantScore, 1);
assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits);
assertEquals(missCount, filter.missCount);
// NOTE: silliness to make sure JRE does not optimize
// away our holding onto oldReader to prevent
// CachingWrapperFilter's WeakHashMap from dropping the
// entry:
assertTrue(oldReader != null);
assertTrue(oldReader2 != null);
searcher.close();
writer.close();
reader.close();
dir.close();
}
private static IndexReader refreshReader(IndexReader reader) throws IOException {
IndexReader oldReader = reader;
reader = IndexReader.openIfChanged(reader);
if (reader != null) {
oldReader.close();
return reader;
} else {
return oldReader;
}
}
}

View File

@ -1,86 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ReaderUtil;
public class TestSpanQueryFilter extends LuceneTestCase {
public void testFilterWorks() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 500; i++) {
Document document = new Document();
document.add(newField("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
TextField.TYPE_UNSTORED));
writer.addDocument(document);
}
final int number = 10;
IndexReader reader = writer.getReader();
writer.close();
AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
int subIndex = ReaderUtil.subIndex(number, leaves); // find the reader with this document in it
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(number).trim()));
SpanQueryFilter filter = new SpanQueryFilter(query);
SpanFilterResult result = filter.bitSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
DocIdSet docIdSet = result.getDocIdSet();
assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, number - leaves[subIndex].docBase);
List<SpanFilterResult.PositionInfo> spans = result.getPositions();
assertTrue("spans is null and it shouldn't be", spans != null);
int size = getDocIdSetSize(docIdSet);
assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
for (final SpanFilterResult.PositionInfo info: spans) {
assertTrue("info is null and it shouldn't be", info != null);
//The doc should indicate the bit is on
assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
//There should be two positions in each
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
}
reader.close();
dir.close();
}
int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
int size = 0;
DocIdSetIterator it = docIdSet.iterator();
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
size++;
}
return size;
}
public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
DocIdSetIterator it = docIdSet.iterator();
assertTrue(msg, it.advance(docId) != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(msg, it.docID() == docId);
}
}

View File

@ -19,11 +19,14 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Map;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Weight; import org.apache.lucene.search.Weight;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.TermContext;
/** /**
* Holds all implementations of classes in the o.a.l.s.spans package as a * Holds all implementations of classes in the o.a.l.s.spans package as a
@ -83,7 +86,7 @@ final class JustCompileSearchSpans {
} }
@Override @Override
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException { public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG); throw new UnsupportedOperationException(UNSUPPORTED_MSG);
} }

View File

@ -20,11 +20,16 @@ package org.apache.lucene.search.spans;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeSet;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
/** /**
* *
@ -39,19 +44,27 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
private AtomicReaderContext[] leaves; private AtomicReaderContext[] leaves;
private int leafOrd = 0; private int leafOrd = 0;
private Spans current; private Spans current;
private Map<Term,TermContext> termContexts;
private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) { private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
this.query = query; this.query = query;
this.leaves = leaves; this.leaves = leaves;
this.termContexts = termContexts;
} }
public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException { public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(topLevelReaderContext, term, true));
}
AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext); AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext);
if(leaves.length == 1) { if(leaves.length == 1) {
return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs()); return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs(), termContexts);
} }
return new MultiSpansWrapper(leaves, query); return new MultiSpansWrapper(leaves, query, termContexts);
} }
@Override @Override
@ -60,14 +73,14 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
return false; return false;
} }
if (current == null) { if (current == null) {
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} }
while(true) { while(true) {
if (current.next()) { if (current.next()) {
return true; return true;
} }
if (++leafOrd < leaves.length) { if (++leafOrd < leaves.length) {
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} else { } else {
current = null; current = null;
break; break;
@ -85,17 +98,17 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
int subIndex = ReaderUtil.subIndex(target, leaves); int subIndex = ReaderUtil.subIndex(target, leaves);
assert subIndex >= leafOrd; assert subIndex >= leafOrd;
if (subIndex != leafOrd) { if (subIndex != leafOrd) {
current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs()); current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs(), termContexts);
leafOrd = subIndex; leafOrd = subIndex;
} else if (current == null) { } else if (current == null) {
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} }
while (true) { while (true) {
if (current.skipTo(target - leaves[leafOrd].docBase)) { if (current.skipTo(target - leaves[leafOrd].docBase)) {
return true; return true;
} }
if (++leafOrd < leaves.length) { if (++leafOrd < leaves.length) {
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs()); current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
} else { } else {
current = null; current = null;
break; break;