mirror of https://github.com/apache/lucene.git
LUCENE-3533: nuke spanfilters
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201787 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f389654f9c
commit
5682889026
|
@ -199,6 +199,9 @@ Changes in backwards compatibility policy
|
|||
as these are no longer used by the scoring system. See MIGRATE.txt for more
|
||||
details. (Robert Muir)
|
||||
|
||||
* LUCENE-3533: Removed SpanFilters, they created large lists of objects and
|
||||
did not scale. (Robert Muir)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -42,6 +43,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
|
||||
|
@ -247,16 +249,21 @@ public class WeightedSpanTermExtractor {
|
|||
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
|
||||
|
||||
for (final String field : fieldNames) {
|
||||
|
||||
AtomicReaderContext context = getLeafContextForField(field);
|
||||
Bits acceptDocs = context.reader.getLiveDocs();
|
||||
final Spans spans;
|
||||
final SpanQuery q;
|
||||
if (mustRewriteQuery) {
|
||||
spans = queries.get(field).getSpans(context, acceptDocs);
|
||||
q = queries.get(field);
|
||||
} else {
|
||||
spans = spanQuery.getSpans(context, acceptDocs);
|
||||
q = spanQuery;
|
||||
}
|
||||
|
||||
AtomicReaderContext context = getLeafContextForField(field);
|
||||
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||
TreeSet<Term> extractedTerms = new TreeSet<Term>();
|
||||
q.extractTerms(extractedTerms);
|
||||
for (Term term : extractedTerms) {
|
||||
termContexts.put(term, TermContext.build(context, term, true));
|
||||
}
|
||||
Bits acceptDocs = context.reader.getLiveDocs();
|
||||
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
// collect span positions
|
||||
while (spans.next()) {
|
||||
|
|
|
@ -1,136 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Wraps another SpanFilter's result and caches it. The purpose is to allow
|
||||
* filters to simply filter, and then wrap with this class to add caching.
|
||||
*/
|
||||
public class CachingSpanFilter extends SpanFilter {
|
||||
private SpanFilter filter;
|
||||
|
||||
/**
|
||||
* A transient Filter cache (package private because of test)
|
||||
*/
|
||||
private final CachingWrapperFilter.FilterCache<SpanFilterResult> cache;
|
||||
|
||||
/** Wraps another SpanFilter's result and caches it.
|
||||
* @param filter Filter to cache results of
|
||||
*/
|
||||
public CachingSpanFilter(SpanFilter filter) {
|
||||
this.filter = filter;
|
||||
this.cache = new CachingWrapperFilter.FilterCache<SpanFilterResult>();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
final SpanFilterResult result = getCachedResult(context);
|
||||
return BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
final SpanFilterResult result = getCachedResult(context);
|
||||
if (acceptDocs == null) {
|
||||
return result;
|
||||
} else {
|
||||
// TODO: filter positions more efficient
|
||||
List<SpanFilterResult.PositionInfo> allPositions = result.getPositions();
|
||||
List<SpanFilterResult.PositionInfo> positions = new ArrayList<SpanFilterResult.PositionInfo>(allPositions.size() / 2 + 1);
|
||||
for (SpanFilterResult.PositionInfo p : allPositions) {
|
||||
if (acceptDocs.get(p.getDoc())) {
|
||||
positions.add(p);
|
||||
}
|
||||
}
|
||||
return new SpanFilterResult(BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs), positions);
|
||||
}
|
||||
}
|
||||
|
||||
/** Provide the DocIdSet to be cached, using the DocIdSet provided
|
||||
* by the wrapped Filter.
|
||||
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
|
||||
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
|
||||
* an {@link FixedBitSet}.
|
||||
*/
|
||||
protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException {
|
||||
if (result == null || result.getDocIdSet() == null) {
|
||||
// this is better than returning null, as the nonnull result can be cached
|
||||
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
|
||||
} else if (result.getDocIdSet().isCacheable()) {
|
||||
return result;
|
||||
} else {
|
||||
final DocIdSetIterator it = result.getDocIdSet().iterator();
|
||||
// null is allowed to be returned by iterator(),
|
||||
// in this case we wrap with the empty set,
|
||||
// which is cacheable.
|
||||
if (it == null) {
|
||||
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
|
||||
} else {
|
||||
final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
|
||||
bits.or(it);
|
||||
return new SpanFilterResult(bits, result.getPositions());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// for testing
|
||||
int hitCount, missCount;
|
||||
|
||||
private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException {
|
||||
final IndexReader reader = context.reader;
|
||||
final Object coreKey = reader.getCoreCacheKey();
|
||||
|
||||
SpanFilterResult result = cache.get(reader, coreKey);
|
||||
if (result != null) {
|
||||
hitCount++;
|
||||
return result;
|
||||
} else {
|
||||
missCount++;
|
||||
// cache miss: we use no acceptDocs here
|
||||
// (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set)
|
||||
result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader);
|
||||
cache.put(coreKey, result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "CachingSpanFilter("+filter+")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof CachingSpanFilter)) return false;
|
||||
return this.filter.equals(((CachingSpanFilter)o).filter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return filter.hashCode() ^ 0x1117BF25;
|
||||
}
|
||||
}
|
|
@ -1,39 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Abstract base class providing a mechanism to restrict searches to a subset
|
||||
of an index and also maintains and returns position information.
|
||||
|
||||
This is useful if you want to compare the positions from a SpanQuery with the positions of items in
|
||||
a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
|
||||
and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
|
||||
then compare position information for post processing.
|
||||
*/
|
||||
public abstract class SpanFilter extends Filter{
|
||||
/** Returns a SpanFilterResult with true for documents which should be permitted in
|
||||
search results, and false for those that should not and Spans for where the true docs match.
|
||||
* @param context The {@link AtomicReaderContext} to load position and DocIdSet information from
|
||||
* @return A {@link SpanFilterResult}
|
||||
* @throws java.io.IOException if there was an issue accessing the necessary information
|
||||
* */
|
||||
public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
|
||||
}
|
|
@ -1,119 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
**/
|
||||
public class SpanFilterResult {
|
||||
private DocIdSet docIdSet;
|
||||
private List<PositionInfo> positions;//Spans spans;
|
||||
|
||||
public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT =
|
||||
new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.<PositionInfo>emptyList());
|
||||
|
||||
/**
|
||||
*
|
||||
* @param docIdSet The DocIdSet for the Filter
|
||||
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
|
||||
*/
|
||||
public SpanFilterResult(DocIdSet docIdSet, List<PositionInfo> positions) {
|
||||
this.docIdSet = docIdSet;
|
||||
this.positions = positions;
|
||||
}
|
||||
|
||||
/**
|
||||
* The first entry in the array corresponds to the first "on" bit.
|
||||
* Entries are increasing by document order
|
||||
* @return A List of PositionInfo objects
|
||||
*/
|
||||
public List<PositionInfo> getPositions() {
|
||||
return positions;
|
||||
}
|
||||
|
||||
/** Returns the docIdSet */
|
||||
public DocIdSet getDocIdSet() {
|
||||
return docIdSet;
|
||||
}
|
||||
|
||||
public static class PositionInfo {
|
||||
private int doc;
|
||||
private List<StartEnd> positions;
|
||||
|
||||
|
||||
public PositionInfo(int doc) {
|
||||
this.doc = doc;
|
||||
positions = new ArrayList<StartEnd>();
|
||||
}
|
||||
|
||||
public void addPosition(int start, int end)
|
||||
{
|
||||
positions.add(new StartEnd(start, end));
|
||||
}
|
||||
|
||||
public int getDoc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return Positions
|
||||
*/
|
||||
public List<StartEnd> getPositions() {
|
||||
return positions;
|
||||
}
|
||||
}
|
||||
|
||||
public static class StartEnd
|
||||
{
|
||||
private int start;
|
||||
private int end;
|
||||
|
||||
|
||||
public StartEnd(int start, int end) {
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return The end position of this match
|
||||
*/
|
||||
public int getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
/**
|
||||
* The Start position
|
||||
* @return The start position of this match
|
||||
*/
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -1,103 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
/**
|
||||
* Copyright 2007 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Constrains search results to only match those which also match a provided
|
||||
* query. Also provides position information about where each document matches
|
||||
* at the cost of extra space compared with the QueryWrapperFilter.
|
||||
* There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely,
|
||||
* the position information for each matching document is stored.
|
||||
* <p/>
|
||||
* This filter does not cache. See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that
|
||||
* caches.
|
||||
*/
|
||||
public class SpanQueryFilter extends SpanFilter {
|
||||
protected SpanQuery query;
|
||||
|
||||
protected SpanQueryFilter()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/** Constructs a filter which only matches documents matching
|
||||
* <code>query</code>.
|
||||
* @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter.
|
||||
*/
|
||||
public SpanQueryFilter(SpanQuery query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
SpanFilterResult result = bitSpans(context, acceptDocs);
|
||||
return result.getDocIdSet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
|
||||
final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc());
|
||||
Spans spans = query.getSpans(context, acceptDocs);
|
||||
List<SpanFilterResult.PositionInfo> tmp = new ArrayList<SpanFilterResult.PositionInfo>(20);
|
||||
int currentDoc = -1;
|
||||
SpanFilterResult.PositionInfo currentInfo = null;
|
||||
while (spans.next())
|
||||
{
|
||||
int doc = spans.doc();
|
||||
bits.set(doc);
|
||||
if (currentDoc != doc)
|
||||
{
|
||||
currentInfo = new SpanFilterResult.PositionInfo(doc);
|
||||
tmp.add(currentInfo);
|
||||
currentDoc = doc;
|
||||
}
|
||||
currentInfo.addPosition(spans.start(), spans.end());
|
||||
}
|
||||
return new SpanFilterResult(bits, tmp);
|
||||
}
|
||||
|
||||
|
||||
public SpanQuery getQuery() {
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SpanQueryFilter(" + query + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return query.hashCode() ^ 0x923F64B9;
|
||||
}
|
||||
}
|
|
@ -150,7 +150,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs), this,
|
||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,11 @@ package org.apache.lucene.search.payloads;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
|
@ -41,6 +44,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* Experimental class to get set of payloads for most standard Lucene queries.
|
||||
|
@ -174,9 +178,15 @@ public class PayloadSpanUtil {
|
|||
|
||||
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
|
||||
throws IOException {
|
||||
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||
TreeSet<Term> terms = new TreeSet<Term>();
|
||||
query.extractTerms(terms);
|
||||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(context, term, true));
|
||||
}
|
||||
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
|
||||
for (AtomicReaderContext atomicReaderContext : leaves) {
|
||||
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs());
|
||||
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs(), termContexts);
|
||||
while (spans.next() == true) {
|
||||
if (spans.isPayloadAvailable()) {
|
||||
Collection<byte[]> payload = spans.getPayload();
|
||||
|
|
|
@ -81,7 +81,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs),
|
||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||
this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -27,6 +28,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
|
@ -93,8 +95,8 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
|||
// ...this is done to be more consistent with things like SpanFirstQuery
|
||||
|
||||
@Override
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return maskedQuery.getSpans(context, acceptDocs);
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
return maskedQuery.getSpans(context, acceptDocs, termContexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,9 +17,11 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -28,6 +30,7 @@ import java.util.HashSet;
|
|||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||
|
@ -78,11 +81,11 @@ public class NearSpansOrdered extends Spans {
|
|||
private SpanNearQuery query;
|
||||
private boolean collectPayloads = true;
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
this(spanNearQuery, context, acceptDocs, true);
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
this(spanNearQuery, context, acceptDocs, termContexts, true);
|
||||
}
|
||||
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, boolean collectPayloads)
|
||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
|
||||
throws IOException {
|
||||
if (spanNearQuery.getClauses().length < 2) {
|
||||
throw new IllegalArgumentException("Less than 2 clauses: "
|
||||
|
@ -95,7 +98,7 @@ public class NearSpansOrdered extends Spans {
|
|||
matchPayload = new LinkedList<byte[]>();
|
||||
subSpansByDoc = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
subSpans[i] = clauses[i].getSpans(context, acceptDocs);
|
||||
subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
|
||||
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
||||
}
|
||||
query = spanNearQuery; // kept for toString() only.
|
||||
|
|
|
@ -17,14 +17,17 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
|
@ -132,7 +135,7 @@ public class NearSpansUnordered extends Spans {
|
|||
}
|
||||
|
||||
|
||||
public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs)
|
||||
public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
|
||||
throws IOException {
|
||||
this.query = query;
|
||||
this.slop = query.getSlop();
|
||||
|
@ -142,7 +145,7 @@ public class NearSpansUnordered extends Spans {
|
|||
subSpans = new Spans[clauses.length];
|
||||
for (int i = 0; i < clauses.length; i++) {
|
||||
SpansCell cell =
|
||||
new SpansCell(clauses[i].getSpans(context, acceptDocs), i);
|
||||
new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
|
||||
ordered.add(cell);
|
||||
subSpans[i] = cell.spans;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
|
@ -90,7 +91,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
throw new UnsupportedOperationException("Query should have been rewritten");
|
||||
}
|
||||
|
||||
|
@ -157,6 +158,9 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
|||
|
||||
@Override
|
||||
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
|
||||
// TODO: would be nice to not lose term-state here.
|
||||
// we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span
|
||||
// (if you put this thing in another span query, it would extractTerms/double-seek anyway)
|
||||
final SpanTermQuery q = new SpanTermQuery(term);
|
||||
q.setBoost(boost);
|
||||
topLevel.addClause(q);
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||
|
@ -118,16 +120,16 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 0) // optimize 0-clause case
|
||||
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs);
|
||||
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return clauses.get(0).getSpans(context, acceptDocs);
|
||||
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
return inOrder
|
||||
? (Spans) new NearSpansOrdered(this, context, acceptDocs, collectPayloads)
|
||||
: (Spans) new NearSpansUnordered(this, context, acceptDocs);
|
||||
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
|
||||
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,11 +22,13 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Removes matches which overlap with another SpanQuery. */
|
||||
|
@ -76,12 +78,12 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
return new Spans() {
|
||||
private Spans includeSpans = include.getSpans(context, acceptDocs);
|
||||
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreInclude = true;
|
||||
|
||||
private Spans excludeSpans = exclude.getSpans(context, acceptDocs);
|
||||
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||
private boolean moreExclude = excludeSpans.next();
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.List;
|
|||
import java.util.Collection;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -30,6 +31,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
||||
|
@ -164,9 +166,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||
if (clauses.size() == 1) // optimize 1-clause case
|
||||
return (clauses.get(0)).getSpans(context, acceptDocs);
|
||||
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
|
||||
|
||||
return new Spans() {
|
||||
private SpanQueue queue = null;
|
||||
|
@ -175,7 +177,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
|||
queue = new SpanQueue(clauses.size());
|
||||
Iterator<SpanQuery> i = clauses.iterator();
|
||||
while (i.hasNext()) {
|
||||
Spans spans = i.next().getSpans(context, acceptDocs);
|
||||
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
|
||||
if ( ((target == -1) && spans.next())
|
||||
|| ((target != -1) && spans.skipTo(target))) {
|
||||
queue.add(spans);
|
||||
|
|
|
@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
|
@ -82,8 +84,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return new PositionCheckSpan(context, acceptDocs);
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
return new PositionCheckSpan(context, acceptDocs, termContexts);
|
||||
}
|
||||
|
||||
|
||||
|
@ -107,8 +109,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
|||
protected class PositionCheckSpan extends Spans {
|
||||
private Spans spans;
|
||||
|
||||
public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
spans = match.getSpans(context, acceptDocs);
|
||||
public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
spans = match.getSpans(context, acceptDocs, termContexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,18 +18,21 @@ package org.apache.lucene.search.spans;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/** Base class for span-based queries. */
|
||||
public abstract class SpanQuery extends Query {
|
||||
/** Expert: Returns the matches for this query in an index. Used internally
|
||||
* to search for spans. */
|
||||
public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
|
||||
public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
|
||||
|
||||
/** Returns the name of the field matched by this query.*/
|
||||
public abstract String getField();
|
||||
|
|
|
@ -19,12 +19,19 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Matches spans containing a term. */
|
||||
|
@ -82,22 +89,46 @@ public class SpanTermQuery extends SpanQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final IndexReader reader = context.reader;
|
||||
final DocsAndPositionsEnum postings = reader.termPositionsEnum(acceptDocs,
|
||||
term.field(),
|
||||
term.bytes());
|
||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
TermContext termContext = termContexts.get(term);
|
||||
final TermState state;
|
||||
if (termContext == null) {
|
||||
// this happens with span-not query, as it doesn't include the NOT side in extractTerms()
|
||||
// so we seek to the term now in this segment..., this sucks because its ugly mostly!
|
||||
final Fields fields = context.reader.fields();
|
||||
if (fields != null) {
|
||||
final Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
||||
if (termsEnum.seekExact(term.bytes(), true)) {
|
||||
state = termsEnum.termState();
|
||||
} else {
|
||||
state = null;
|
||||
}
|
||||
} else {
|
||||
state = null;
|
||||
}
|
||||
} else {
|
||||
state = null;
|
||||
}
|
||||
} else {
|
||||
state = termContext.get(context.ord);
|
||||
}
|
||||
|
||||
if (state == null) { // term is not present in that reader
|
||||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
|
||||
|
||||
if (postings != null) {
|
||||
return new TermSpans(postings, term);
|
||||
} else {
|
||||
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
} else {
|
||||
// term does not exist
|
||||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
}
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,8 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
|
@ -35,7 +36,7 @@ import java.util.TreeSet;
|
|||
*/
|
||||
public class SpanWeight extends Weight {
|
||||
protected Similarity similarity;
|
||||
protected Set<Term> terms;
|
||||
protected Map<Term,TermContext> termContexts;
|
||||
protected SpanQuery query;
|
||||
protected Similarity.Stats stats;
|
||||
|
||||
|
@ -44,15 +45,16 @@ public class SpanWeight extends Weight {
|
|||
this.similarity = searcher.getSimilarityProvider().get(query.getField());
|
||||
this.query = query;
|
||||
|
||||
terms=new TreeSet<Term>();
|
||||
termContexts = new HashMap<Term,TermContext>();
|
||||
TreeSet<Term> terms = new TreeSet<Term>();
|
||||
query.extractTerms(terms);
|
||||
final ReaderContext context = searcher.getTopReaderContext();
|
||||
final TermContext states[] = new TermContext[terms.size()];
|
||||
final TermStatistics termStats[] = new TermStatistics[terms.size()];
|
||||
int i = 0;
|
||||
for (Term term : terms) {
|
||||
states[i] = TermContext.build(context, term, true);
|
||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
||||
TermContext state = TermContext.build(context, term, true);
|
||||
termStats[i] = searcher.termStatistics(term, state);
|
||||
termContexts.put(term, state);
|
||||
i++;
|
||||
}
|
||||
stats = similarity.computeStats(
|
||||
|
@ -77,7 +79,7 @@ public class SpanWeight extends Weight {
|
|||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -281,19 +281,6 @@ final class JustCompileSearch {
|
|||
}
|
||||
}
|
||||
|
||||
static final class JustCompileSpanFilter extends SpanFilter {
|
||||
|
||||
@Override
|
||||
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> {
|
||||
|
||||
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {
|
||||
|
|
|
@ -1,147 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestCachingSpanFilter extends LuceneTestCase {
|
||||
|
||||
public void testEnforceDeletions() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(
|
||||
random,
|
||||
dir,
|
||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
||||
setMergeScheduler(new SerialMergeScheduler()).
|
||||
// asserts below requires no unexpected merges:
|
||||
setMergePolicy(newLogMergePolicy(10))
|
||||
);
|
||||
|
||||
// NOTE: cannot use writer.getReader because RIW (on
|
||||
// flipping a coin) may give us a newly opened reader,
|
||||
// but we use .reopen on this reader below and expect to
|
||||
// (must) get an NRT reader:
|
||||
IndexReader reader = IndexReader.open(writer.w, true);
|
||||
// same reason we don't wrap?
|
||||
IndexSearcher searcher = newSearcher(reader, false);
|
||||
|
||||
// add a doc, refresh the reader, and check that its there
|
||||
Document doc = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setTokenized(false);
|
||||
doc.add(newField("id", "1", customType));
|
||||
writer.addDocument(doc);
|
||||
|
||||
reader = refreshReader(reader);
|
||||
searcher.close();
|
||||
searcher = newSearcher(reader, false);
|
||||
|
||||
TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1);
|
||||
assertEquals("Should find a hit...", 1, docs.totalHits);
|
||||
|
||||
final SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1")));
|
||||
|
||||
CachingSpanFilter filter = new CachingSpanFilter(startFilter);
|
||||
|
||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
||||
assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
|
||||
int missCount = filter.missCount;
|
||||
assertTrue(missCount > 0);
|
||||
Query constantScore = new ConstantScoreQuery(filter);
|
||||
docs = searcher.search(constantScore, 1);
|
||||
assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);
|
||||
assertEquals(missCount, filter.missCount);
|
||||
|
||||
// NOTE: important to hold ref here so GC doesn't clear
|
||||
// the cache entry! Else the assert below may sometimes
|
||||
// fail:
|
||||
IndexReader oldReader = reader;
|
||||
|
||||
writer.addDocument(doc);
|
||||
reader = refreshReader(reader);
|
||||
searcher.close();
|
||||
searcher = newSearcher(reader, false);
|
||||
|
||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
||||
assertEquals("[query + filter] Should find 2 hits...", 2, docs.totalHits);
|
||||
assertTrue(filter.missCount > missCount);
|
||||
missCount = filter.missCount;
|
||||
|
||||
constantScore = new ConstantScoreQuery(filter);
|
||||
docs = searcher.search(constantScore, 1);
|
||||
assertEquals("[just filter] Should find a hit...", 2, docs.totalHits);
|
||||
assertEquals(missCount, filter.missCount);
|
||||
|
||||
// NOTE: important to hold ref here so GC doesn't clear
|
||||
// the cache entry! Else the assert below may sometimes
|
||||
// fail:
|
||||
IndexReader oldReader2 = reader;
|
||||
|
||||
// now delete the doc, refresh the reader, and see that it's not there
|
||||
writer.deleteDocuments(new Term("id", "1"));
|
||||
|
||||
reader = refreshReader(reader);
|
||||
searcher.close();
|
||||
searcher = newSearcher(reader, false);
|
||||
|
||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
||||
assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);
|
||||
assertEquals(missCount, filter.missCount);
|
||||
|
||||
docs = searcher.search(constantScore, 1);
|
||||
assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits);
|
||||
assertEquals(missCount, filter.missCount);
|
||||
|
||||
// NOTE: silliness to make sure JRE does not optimize
|
||||
// away our holding onto oldReader to prevent
|
||||
// CachingWrapperFilter's WeakHashMap from dropping the
|
||||
// entry:
|
||||
assertTrue(oldReader != null);
|
||||
assertTrue(oldReader2 != null);
|
||||
|
||||
searcher.close();
|
||||
writer.close();
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static IndexReader refreshReader(IndexReader reader) throws IOException {
|
||||
IndexReader oldReader = reader;
|
||||
reader = IndexReader.openIfChanged(reader);
|
||||
if (reader != null) {
|
||||
oldReader.close();
|
||||
return reader;
|
||||
} else {
|
||||
return oldReader;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,86 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
public class TestSpanQueryFilter extends LuceneTestCase {
|
||||
|
||||
public void testFilterWorks() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
||||
for (int i = 0; i < 500; i++) {
|
||||
Document document = new Document();
|
||||
document.add(newField("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
|
||||
TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(document);
|
||||
}
|
||||
final int number = 10;
|
||||
IndexReader reader = writer.getReader();
|
||||
writer.close();
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
|
||||
int subIndex = ReaderUtil.subIndex(number, leaves); // find the reader with this document in it
|
||||
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(number).trim()));
|
||||
SpanQueryFilter filter = new SpanQueryFilter(query);
|
||||
SpanFilterResult result = filter.bitSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
|
||||
DocIdSet docIdSet = result.getDocIdSet();
|
||||
assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
|
||||
assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, number - leaves[subIndex].docBase);
|
||||
List<SpanFilterResult.PositionInfo> spans = result.getPositions();
|
||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
||||
int size = getDocIdSetSize(docIdSet);
|
||||
assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
|
||||
for (final SpanFilterResult.PositionInfo info: spans) {
|
||||
assertTrue("info is null and it shouldn't be", info != null);
|
||||
//The doc should indicate the bit is on
|
||||
assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
|
||||
//There should be two positions in each
|
||||
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
|
||||
int size = 0;
|
||||
DocIdSetIterator it = docIdSet.iterator();
|
||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
size++;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
|
||||
DocIdSetIterator it = docIdSet.iterator();
|
||||
assertTrue(msg, it.advance(docId) != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(msg, it.docID() == docId);
|
||||
}
|
||||
}
|
|
@ -19,11 +19,14 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* Holds all implementations of classes in the o.a.l.s.spans package as a
|
||||
|
@ -83,7 +86,7 @@ final class JustCompileSearchSpans {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,11 +20,16 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -39,19 +44,27 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
|||
private AtomicReaderContext[] leaves;
|
||||
private int leafOrd = 0;
|
||||
private Spans current;
|
||||
private Map<Term,TermContext> termContexts;
|
||||
|
||||
private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) {
|
||||
private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
|
||||
this.query = query;
|
||||
this.leaves = leaves;
|
||||
this.termContexts = termContexts;
|
||||
|
||||
}
|
||||
|
||||
public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
|
||||
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||
TreeSet<Term> terms = new TreeSet<Term>();
|
||||
query.extractTerms(terms);
|
||||
for (Term term : terms) {
|
||||
termContexts.put(term, TermContext.build(topLevelReaderContext, term, true));
|
||||
}
|
||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext);
|
||||
if(leaves.length == 1) {
|
||||
return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs());
|
||||
return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs(), termContexts);
|
||||
}
|
||||
return new MultiSpansWrapper(leaves, query);
|
||||
return new MultiSpansWrapper(leaves, query, termContexts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -60,14 +73,14 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
|||
return false;
|
||||
}
|
||||
if (current == null) {
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||
}
|
||||
while(true) {
|
||||
if (current.next()) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < leaves.length) {
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
|
@ -85,17 +98,17 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
|||
int subIndex = ReaderUtil.subIndex(target, leaves);
|
||||
assert subIndex >= leafOrd;
|
||||
if (subIndex != leafOrd) {
|
||||
current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
|
||||
current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs(), termContexts);
|
||||
leafOrd = subIndex;
|
||||
} else if (current == null) {
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||
}
|
||||
while (true) {
|
||||
if (current.skipTo(target - leaves[leafOrd].docBase)) {
|
||||
return true;
|
||||
}
|
||||
if (++leafOrd < leaves.length) {
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||
} else {
|
||||
current = null;
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue