mirror of https://github.com/apache/lucene.git
LUCENE-3533: nuke spanfilters
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1201787 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f389654f9c
commit
5682889026
|
@ -199,6 +199,9 @@ Changes in backwards compatibility policy
|
||||||
as these are no longer used by the scoring system. See MIGRATE.txt for more
|
as these are no longer used by the scoring system. See MIGRATE.txt for more
|
||||||
details. (Robert Muir)
|
details. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3533: Removed SpanFilters, they created large lists of objects and
|
||||||
|
did not scale. (Robert Muir)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -42,6 +43,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.Spans;
|
import org.apache.lucene.search.spans.Spans;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
|
* Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether
|
||||||
|
@ -247,16 +249,21 @@ public class WeightedSpanTermExtractor {
|
||||||
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
|
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
|
||||||
|
|
||||||
for (final String field : fieldNames) {
|
for (final String field : fieldNames) {
|
||||||
|
final SpanQuery q;
|
||||||
AtomicReaderContext context = getLeafContextForField(field);
|
|
||||||
Bits acceptDocs = context.reader.getLiveDocs();
|
|
||||||
final Spans spans;
|
|
||||||
if (mustRewriteQuery) {
|
if (mustRewriteQuery) {
|
||||||
spans = queries.get(field).getSpans(context, acceptDocs);
|
q = queries.get(field);
|
||||||
} else {
|
} else {
|
||||||
spans = spanQuery.getSpans(context, acceptDocs);
|
q = spanQuery;
|
||||||
}
|
}
|
||||||
|
AtomicReaderContext context = getLeafContextForField(field);
|
||||||
|
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||||
|
TreeSet<Term> extractedTerms = new TreeSet<Term>();
|
||||||
|
q.extractTerms(extractedTerms);
|
||||||
|
for (Term term : extractedTerms) {
|
||||||
|
termContexts.put(term, TermContext.build(context, term, true));
|
||||||
|
}
|
||||||
|
Bits acceptDocs = context.reader.getLiveDocs();
|
||||||
|
final Spans spans = q.getSpans(context, acceptDocs, termContexts);
|
||||||
|
|
||||||
// collect span positions
|
// collect span positions
|
||||||
while (spans.next()) {
|
while (spans.next()) {
|
||||||
|
|
|
@ -1,136 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
/**
|
|
||||||
* Copyright 2005 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wraps another SpanFilter's result and caches it. The purpose is to allow
|
|
||||||
* filters to simply filter, and then wrap with this class to add caching.
|
|
||||||
*/
|
|
||||||
public class CachingSpanFilter extends SpanFilter {
|
|
||||||
private SpanFilter filter;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A transient Filter cache (package private because of test)
|
|
||||||
*/
|
|
||||||
private final CachingWrapperFilter.FilterCache<SpanFilterResult> cache;
|
|
||||||
|
|
||||||
/** Wraps another SpanFilter's result and caches it.
|
|
||||||
* @param filter Filter to cache results of
|
|
||||||
*/
|
|
||||||
public CachingSpanFilter(SpanFilter filter) {
|
|
||||||
this.filter = filter;
|
|
||||||
this.cache = new CachingWrapperFilter.FilterCache<SpanFilterResult>();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
|
||||||
final SpanFilterResult result = getCachedResult(context);
|
|
||||||
return BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SpanFilterResult bitSpans(AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
|
||||||
final SpanFilterResult result = getCachedResult(context);
|
|
||||||
if (acceptDocs == null) {
|
|
||||||
return result;
|
|
||||||
} else {
|
|
||||||
// TODO: filter positions more efficient
|
|
||||||
List<SpanFilterResult.PositionInfo> allPositions = result.getPositions();
|
|
||||||
List<SpanFilterResult.PositionInfo> positions = new ArrayList<SpanFilterResult.PositionInfo>(allPositions.size() / 2 + 1);
|
|
||||||
for (SpanFilterResult.PositionInfo p : allPositions) {
|
|
||||||
if (acceptDocs.get(p.getDoc())) {
|
|
||||||
positions.add(p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new SpanFilterResult(BitsFilteredDocIdSet.wrap(result.getDocIdSet(), acceptDocs), positions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Provide the DocIdSet to be cached, using the DocIdSet provided
|
|
||||||
* by the wrapped Filter.
|
|
||||||
* <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
|
|
||||||
* returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
|
|
||||||
* an {@link FixedBitSet}.
|
|
||||||
*/
|
|
||||||
protected SpanFilterResult spanFilterResultToCache(SpanFilterResult result, IndexReader reader) throws IOException {
|
|
||||||
if (result == null || result.getDocIdSet() == null) {
|
|
||||||
// this is better than returning null, as the nonnull result can be cached
|
|
||||||
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
|
|
||||||
} else if (result.getDocIdSet().isCacheable()) {
|
|
||||||
return result;
|
|
||||||
} else {
|
|
||||||
final DocIdSetIterator it = result.getDocIdSet().iterator();
|
|
||||||
// null is allowed to be returned by iterator(),
|
|
||||||
// in this case we wrap with the empty set,
|
|
||||||
// which is cacheable.
|
|
||||||
if (it == null) {
|
|
||||||
return SpanFilterResult.EMPTY_SPAN_FILTER_RESULT;
|
|
||||||
} else {
|
|
||||||
final FixedBitSet bits = new FixedBitSet(reader.maxDoc());
|
|
||||||
bits.or(it);
|
|
||||||
return new SpanFilterResult(bits, result.getPositions());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// for testing
|
|
||||||
int hitCount, missCount;
|
|
||||||
|
|
||||||
private SpanFilterResult getCachedResult(AtomicReaderContext context) throws IOException {
|
|
||||||
final IndexReader reader = context.reader;
|
|
||||||
final Object coreKey = reader.getCoreCacheKey();
|
|
||||||
|
|
||||||
SpanFilterResult result = cache.get(reader, coreKey);
|
|
||||||
if (result != null) {
|
|
||||||
hitCount++;
|
|
||||||
return result;
|
|
||||||
} else {
|
|
||||||
missCount++;
|
|
||||||
// cache miss: we use no acceptDocs here
|
|
||||||
// (this saves time on building SpanFilterResult, the acceptDocs will be applied on the cached set)
|
|
||||||
result = spanFilterResultToCache(filter.bitSpans(context, null/**!!!*/), reader);
|
|
||||||
cache.put(coreKey, result);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "CachingSpanFilter("+filter+")";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (!(o instanceof CachingSpanFilter)) return false;
|
|
||||||
return this.filter.equals(((CachingSpanFilter)o).filter);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return filter.hashCode() ^ 0x1117BF25;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
/**
|
|
||||||
* Copyright 2007 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/** Abstract base class providing a mechanism to restrict searches to a subset
|
|
||||||
of an index and also maintains and returns position information.
|
|
||||||
|
|
||||||
This is useful if you want to compare the positions from a SpanQuery with the positions of items in
|
|
||||||
a filter. For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents,
|
|
||||||
and then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could
|
|
||||||
then compare position information for post processing.
|
|
||||||
*/
|
|
||||||
public abstract class SpanFilter extends Filter{
|
|
||||||
/** Returns a SpanFilterResult with true for documents which should be permitted in
|
|
||||||
search results, and false for those that should not and Spans for where the true docs match.
|
|
||||||
* @param context The {@link AtomicReaderContext} to load position and DocIdSet information from
|
|
||||||
* @return A {@link SpanFilterResult}
|
|
||||||
* @throws java.io.IOException if there was an issue accessing the necessary information
|
|
||||||
* */
|
|
||||||
public abstract SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
|
|
||||||
}
|
|
|
@ -1,119 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
/**
|
|
||||||
* Copyright 2005 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*
|
|
||||||
**/
|
|
||||||
public class SpanFilterResult {
|
|
||||||
private DocIdSet docIdSet;
|
|
||||||
private List<PositionInfo> positions;//Spans spans;
|
|
||||||
|
|
||||||
public static final SpanFilterResult EMPTY_SPAN_FILTER_RESULT =
|
|
||||||
new SpanFilterResult(DocIdSet.EMPTY_DOCIDSET, Collections.<PositionInfo>emptyList());
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param docIdSet The DocIdSet for the Filter
|
|
||||||
* @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
|
|
||||||
*/
|
|
||||||
public SpanFilterResult(DocIdSet docIdSet, List<PositionInfo> positions) {
|
|
||||||
this.docIdSet = docIdSet;
|
|
||||||
this.positions = positions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The first entry in the array corresponds to the first "on" bit.
|
|
||||||
* Entries are increasing by document order
|
|
||||||
* @return A List of PositionInfo objects
|
|
||||||
*/
|
|
||||||
public List<PositionInfo> getPositions() {
|
|
||||||
return positions;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the docIdSet */
|
|
||||||
public DocIdSet getDocIdSet() {
|
|
||||||
return docIdSet;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class PositionInfo {
|
|
||||||
private int doc;
|
|
||||||
private List<StartEnd> positions;
|
|
||||||
|
|
||||||
|
|
||||||
public PositionInfo(int doc) {
|
|
||||||
this.doc = doc;
|
|
||||||
positions = new ArrayList<StartEnd>();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void addPosition(int start, int end)
|
|
||||||
{
|
|
||||||
positions.add(new StartEnd(start, end));
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getDoc() {
|
|
||||||
return doc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return Positions
|
|
||||||
*/
|
|
||||||
public List<StartEnd> getPositions() {
|
|
||||||
return positions;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class StartEnd
|
|
||||||
{
|
|
||||||
private int start;
|
|
||||||
private int end;
|
|
||||||
|
|
||||||
|
|
||||||
public StartEnd(int start, int end) {
|
|
||||||
this.start = start;
|
|
||||||
this.end = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @return The end position of this match
|
|
||||||
*/
|
|
||||||
public int getEnd() {
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The Start position
|
|
||||||
* @return The start position of this match
|
|
||||||
*/
|
|
||||||
public int getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,103 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
/**
|
|
||||||
* Copyright 2007 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
|
||||||
import org.apache.lucene.search.spans.Spans;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constrains search results to only match those which also match a provided
|
|
||||||
* query. Also provides position information about where each document matches
|
|
||||||
* at the cost of extra space compared with the QueryWrapperFilter.
|
|
||||||
* There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. Namely,
|
|
||||||
* the position information for each matching document is stored.
|
|
||||||
* <p/>
|
|
||||||
* This filter does not cache. See the {@link org.apache.lucene.search.CachingSpanFilter} for a wrapper that
|
|
||||||
* caches.
|
|
||||||
*/
|
|
||||||
public class SpanQueryFilter extends SpanFilter {
|
|
||||||
protected SpanQuery query;
|
|
||||||
|
|
||||||
protected SpanQueryFilter()
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Constructs a filter which only matches documents matching
|
|
||||||
* <code>query</code>.
|
|
||||||
* @param query The {@link org.apache.lucene.search.spans.SpanQuery} to use as the basis for the Filter.
|
|
||||||
*/
|
|
||||||
public SpanQueryFilter(SpanQuery query) {
|
|
||||||
this.query = query;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public final DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
SpanFilterResult result = bitSpans(context, acceptDocs);
|
|
||||||
return result.getDocIdSet();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
|
|
||||||
final FixedBitSet bits = new FixedBitSet(context.reader.maxDoc());
|
|
||||||
Spans spans = query.getSpans(context, acceptDocs);
|
|
||||||
List<SpanFilterResult.PositionInfo> tmp = new ArrayList<SpanFilterResult.PositionInfo>(20);
|
|
||||||
int currentDoc = -1;
|
|
||||||
SpanFilterResult.PositionInfo currentInfo = null;
|
|
||||||
while (spans.next())
|
|
||||||
{
|
|
||||||
int doc = spans.doc();
|
|
||||||
bits.set(doc);
|
|
||||||
if (currentDoc != doc)
|
|
||||||
{
|
|
||||||
currentInfo = new SpanFilterResult.PositionInfo(doc);
|
|
||||||
tmp.add(currentInfo);
|
|
||||||
currentDoc = doc;
|
|
||||||
}
|
|
||||||
currentInfo.addPosition(spans.start(), spans.end());
|
|
||||||
}
|
|
||||||
return new SpanFilterResult(bits, tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public SpanQuery getQuery() {
|
|
||||||
return query;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "SpanQueryFilter(" + query + ")";
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
return o instanceof SpanQueryFilter && this.query.equals(((SpanQueryFilter) o).query);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return query.hashCode() ^ 0x923F64B9;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -150,7 +150,7 @@ public class PayloadNearQuery extends SpanNearQuery {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs), this,
|
return new PayloadNearSpanScorer(query.getSpans(context, acceptDocs, termContexts), this,
|
||||||
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
|
similarity, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,8 +20,11 @@ package org.apache.lucene.search.payloads;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
@ -41,6 +44,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.search.spans.Spans;
|
import org.apache.lucene.search.spans.Spans;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Experimental class to get set of payloads for most standard Lucene queries.
|
* Experimental class to get set of payloads for most standard Lucene queries.
|
||||||
|
@ -174,9 +178,15 @@ public class PayloadSpanUtil {
|
||||||
|
|
||||||
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
|
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||||
|
TreeSet<Term> terms = new TreeSet<Term>();
|
||||||
|
query.extractTerms(terms);
|
||||||
|
for (Term term : terms) {
|
||||||
|
termContexts.put(term, TermContext.build(context, term, true));
|
||||||
|
}
|
||||||
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
|
final AtomicReaderContext[] leaves = ReaderUtil.leaves(context);
|
||||||
for (AtomicReaderContext atomicReaderContext : leaves) {
|
for (AtomicReaderContext atomicReaderContext : leaves) {
|
||||||
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs());
|
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader.getLiveDocs(), termContexts);
|
||||||
while (spans.next() == true) {
|
while (spans.next() == true) {
|
||||||
if (spans.isPayloadAvailable()) {
|
if (spans.isPayloadAvailable()) {
|
||||||
Collection<byte[]> payload = spans.getPayload();
|
Collection<byte[]> payload = spans.getPayload();
|
||||||
|
|
|
@ -81,7 +81,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs),
|
return new PayloadTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts),
|
||||||
this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -27,6 +28,7 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -93,8 +95,8 @@ public class FieldMaskingSpanQuery extends SpanQuery {
|
||||||
// ...this is done to be more consistent with things like SpanFirstQuery
|
// ...this is done to be more consistent with things like SpanFirstQuery
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
return maskedQuery.getSpans(context, acceptDocs);
|
return maskedQuery.getSpans(context, acceptDocs, termContexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,9 +17,11 @@ package org.apache.lucene.search.spans;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -28,6 +30,7 @@ import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
/** A Spans that is formed from the ordered subspans of a SpanNearQuery
|
||||||
|
@ -78,11 +81,11 @@ public class NearSpansOrdered extends Spans {
|
||||||
private SpanNearQuery query;
|
private SpanNearQuery query;
|
||||||
private boolean collectPayloads = true;
|
private boolean collectPayloads = true;
|
||||||
|
|
||||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
this(spanNearQuery, context, acceptDocs, true);
|
this(spanNearQuery, context, acceptDocs, termContexts, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, boolean collectPayloads)
|
public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts, boolean collectPayloads)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (spanNearQuery.getClauses().length < 2) {
|
if (spanNearQuery.getClauses().length < 2) {
|
||||||
throw new IllegalArgumentException("Less than 2 clauses: "
|
throw new IllegalArgumentException("Less than 2 clauses: "
|
||||||
|
@ -95,7 +98,7 @@ public class NearSpansOrdered extends Spans {
|
||||||
matchPayload = new LinkedList<byte[]>();
|
matchPayload = new LinkedList<byte[]>();
|
||||||
subSpansByDoc = new Spans[clauses.length];
|
subSpansByDoc = new Spans[clauses.length];
|
||||||
for (int i = 0; i < clauses.length; i++) {
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
subSpans[i] = clauses[i].getSpans(context, acceptDocs);
|
subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts);
|
||||||
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
|
||||||
}
|
}
|
||||||
query = spanNearQuery; // kept for toString() only.
|
query = spanNearQuery; // kept for toString() only.
|
||||||
|
|
|
@ -17,14 +17,17 @@ package org.apache.lucene.search.spans;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
||||||
|
@ -132,7 +135,7 @@ public class NearSpansUnordered extends Spans {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs)
|
public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.slop = query.getSlop();
|
this.slop = query.getSlop();
|
||||||
|
@ -142,7 +145,7 @@ public class NearSpansUnordered extends Spans {
|
||||||
subSpans = new Spans[clauses.length];
|
subSpans = new Spans[clauses.length];
|
||||||
for (int i = 0; i < clauses.length; i++) {
|
for (int i = 0; i < clauses.length; i++) {
|
||||||
SpansCell cell =
|
SpansCell cell =
|
||||||
new SpansCell(clauses[i].getSpans(context, acceptDocs), i);
|
new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i);
|
||||||
ordered.add(cell);
|
ordered.add(cell);
|
||||||
subSpans[i] = cell.spans;
|
subSpans[i] = cell.spans;
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search.spans;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
@ -90,7 +91,7 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
throw new UnsupportedOperationException("Query should have been rewritten");
|
throw new UnsupportedOperationException("Query should have been rewritten");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,6 +158,9 @@ public class SpanMultiTermQueryWrapper<Q extends MultiTermQuery> extends SpanQue
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
|
protected void addClause(SpanOrQuery topLevel, Term term, int docCount, float boost, TermContext states) {
|
||||||
|
// TODO: would be nice to not lose term-state here.
|
||||||
|
// we could add a hack option to SpanOrQuery, but the hack would only work if this is the top-level Span
|
||||||
|
// (if you put this thing in another span query, it would extractTerms/double-seek anyway)
|
||||||
final SpanTermQuery q = new SpanTermQuery(term);
|
final SpanTermQuery q = new SpanTermQuery(term);
|
||||||
q.setBoost(boost);
|
q.setBoost(boost);
|
||||||
topLevel.addClause(q);
|
topLevel.addClause(q);
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
|
@ -31,6 +32,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
/** Matches spans which are near one another. One can specify <i>slop</i>, the
|
||||||
|
@ -118,16 +120,16 @@ public class SpanNearQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
if (clauses.size() == 0) // optimize 0-clause case
|
if (clauses.size() == 0) // optimize 0-clause case
|
||||||
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs);
|
return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts);
|
||||||
|
|
||||||
if (clauses.size() == 1) // optimize 1-clause case
|
if (clauses.size() == 1) // optimize 1-clause case
|
||||||
return clauses.get(0).getSpans(context, acceptDocs);
|
return clauses.get(0).getSpans(context, acceptDocs, termContexts);
|
||||||
|
|
||||||
return inOrder
|
return inOrder
|
||||||
? (Spans) new NearSpansOrdered(this, context, acceptDocs, collectPayloads)
|
? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads)
|
||||||
: (Spans) new NearSpansUnordered(this, context, acceptDocs);
|
: (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -22,11 +22,13 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/** Removes matches which overlap with another SpanQuery. */
|
/** Removes matches which overlap with another SpanQuery. */
|
||||||
|
@ -76,12 +78,12 @@ public class SpanNotQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||||
return new Spans() {
|
return new Spans() {
|
||||||
private Spans includeSpans = include.getSpans(context, acceptDocs);
|
private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts);
|
||||||
private boolean moreInclude = true;
|
private boolean moreInclude = true;
|
||||||
|
|
||||||
private Spans excludeSpans = exclude.getSpans(context, acceptDocs);
|
private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts);
|
||||||
private boolean moreExclude = excludeSpans.next();
|
private boolean moreExclude = excludeSpans.next();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.List;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
@ -30,6 +31,7 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
@ -164,9 +166,9 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs) throws IOException {
|
public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map<Term,TermContext> termContexts) throws IOException {
|
||||||
if (clauses.size() == 1) // optimize 1-clause case
|
if (clauses.size() == 1) // optimize 1-clause case
|
||||||
return (clauses.get(0)).getSpans(context, acceptDocs);
|
return (clauses.get(0)).getSpans(context, acceptDocs, termContexts);
|
||||||
|
|
||||||
return new Spans() {
|
return new Spans() {
|
||||||
private SpanQueue queue = null;
|
private SpanQueue queue = null;
|
||||||
|
@ -175,7 +177,7 @@ public class SpanOrQuery extends SpanQuery implements Cloneable {
|
||||||
queue = new SpanQueue(clauses.size());
|
queue = new SpanQueue(clauses.size());
|
||||||
Iterator<SpanQuery> i = clauses.iterator();
|
Iterator<SpanQuery> i = clauses.iterator();
|
||||||
while (i.hasNext()) {
|
while (i.hasNext()) {
|
||||||
Spans spans = i.next().getSpans(context, acceptDocs);
|
Spans spans = i.next().getSpans(context, acceptDocs, termContexts);
|
||||||
if ( ((target == -1) && spans.next())
|
if ( ((target == -1) && spans.next())
|
||||||
|| ((target != -1) && spans.skipTo(target))) {
|
|| ((target != -1) && spans.skipTo(target))) {
|
||||||
queue.add(spans);
|
queue.add(spans);
|
||||||
|
|
|
@ -22,10 +22,12 @@ import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
|
||||||
|
@ -82,8 +84,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
||||||
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
protected abstract AcceptStatus acceptPosition(Spans spans) throws IOException;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
return new PositionCheckSpan(context, acceptDocs);
|
return new PositionCheckSpan(context, acceptDocs, termContexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -107,8 +109,8 @@ public abstract class SpanPositionCheckQuery extends SpanQuery implements Clonea
|
||||||
protected class PositionCheckSpan extends Spans {
|
protected class PositionCheckSpan extends Spans {
|
||||||
private Spans spans;
|
private Spans spans;
|
||||||
|
|
||||||
public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public PositionCheckSpan(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
spans = match.getSpans(context, acceptDocs);
|
spans = match.getSpans(context, acceptDocs, termContexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -18,18 +18,21 @@ package org.apache.lucene.search.spans;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
/** Base class for span-based queries. */
|
/** Base class for span-based queries. */
|
||||||
public abstract class SpanQuery extends Query {
|
public abstract class SpanQuery extends Query {
|
||||||
/** Expert: Returns the matches for this query in an index. Used internally
|
/** Expert: Returns the matches for this query in an index. Used internally
|
||||||
* to search for spans. */
|
* to search for spans. */
|
||||||
public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException;
|
public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException;
|
||||||
|
|
||||||
/** Returns the name of the field matched by this query.*/
|
/** Returns the name of the field matched by this query.*/
|
||||||
public abstract String getField();
|
public abstract String getField();
|
||||||
|
|
|
@ -19,12 +19,19 @@ package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.TermState;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/** Matches spans containing a term. */
|
/** Matches spans containing a term. */
|
||||||
|
@ -82,22 +89,46 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
final IndexReader reader = context.reader;
|
TermContext termContext = termContexts.get(term);
|
||||||
final DocsAndPositionsEnum postings = reader.termPositionsEnum(acceptDocs,
|
final TermState state;
|
||||||
term.field(),
|
if (termContext == null) {
|
||||||
term.bytes());
|
// this happens with span-not query, as it doesn't include the NOT side in extractTerms()
|
||||||
|
// so we seek to the term now in this segment..., this sucks because its ugly mostly!
|
||||||
|
final Fields fields = context.reader.fields();
|
||||||
|
if (fields != null) {
|
||||||
|
final Terms terms = fields.terms(term.field());
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
||||||
|
if (termsEnum.seekExact(term.bytes(), true)) {
|
||||||
|
state = termsEnum.termState();
|
||||||
|
} else {
|
||||||
|
state = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
state = termContext.get(context.ord);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (state == null) { // term is not present in that reader
|
||||||
|
return TermSpans.EMPTY_TERM_SPANS;
|
||||||
|
}
|
||||||
|
|
||||||
|
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
||||||
|
termsEnum.seekExact(term.bytes(), state);
|
||||||
|
|
||||||
|
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
|
||||||
|
|
||||||
if (postings != null) {
|
if (postings != null) {
|
||||||
return new TermSpans(postings, term);
|
return new TermSpans(postings, term);
|
||||||
} else {
|
} else {
|
||||||
if (reader.termDocsEnum(reader.getLiveDocs(), term.field(), term.bytes()) != null) {
|
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")");
|
||||||
} else {
|
|
||||||
// term does not exist
|
|
||||||
return TermSpans.EMPTY_TERM_SPANS;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,8 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.TermContext;
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -35,7 +36,7 @@ import java.util.TreeSet;
|
||||||
*/
|
*/
|
||||||
public class SpanWeight extends Weight {
|
public class SpanWeight extends Weight {
|
||||||
protected Similarity similarity;
|
protected Similarity similarity;
|
||||||
protected Set<Term> terms;
|
protected Map<Term,TermContext> termContexts;
|
||||||
protected SpanQuery query;
|
protected SpanQuery query;
|
||||||
protected Similarity.Stats stats;
|
protected Similarity.Stats stats;
|
||||||
|
|
||||||
|
@ -44,15 +45,16 @@ public class SpanWeight extends Weight {
|
||||||
this.similarity = searcher.getSimilarityProvider().get(query.getField());
|
this.similarity = searcher.getSimilarityProvider().get(query.getField());
|
||||||
this.query = query;
|
this.query = query;
|
||||||
|
|
||||||
terms=new TreeSet<Term>();
|
termContexts = new HashMap<Term,TermContext>();
|
||||||
|
TreeSet<Term> terms = new TreeSet<Term>();
|
||||||
query.extractTerms(terms);
|
query.extractTerms(terms);
|
||||||
final ReaderContext context = searcher.getTopReaderContext();
|
final ReaderContext context = searcher.getTopReaderContext();
|
||||||
final TermContext states[] = new TermContext[terms.size()];
|
|
||||||
final TermStatistics termStats[] = new TermStatistics[terms.size()];
|
final TermStatistics termStats[] = new TermStatistics[terms.size()];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (Term term : terms) {
|
for (Term term : terms) {
|
||||||
states[i] = TermContext.build(context, term, true);
|
TermContext state = TermContext.build(context, term, true);
|
||||||
termStats[i] = searcher.termStatistics(term, states[i]);
|
termStats[i] = searcher.termStatistics(term, state);
|
||||||
|
termContexts.put(term, state);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
stats = similarity.computeStats(
|
stats = similarity.computeStats(
|
||||||
|
@ -77,7 +79,7 @@ public class SpanWeight extends Weight {
|
||||||
@Override
|
@Override
|
||||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||||
return new SpanScorer(query.getSpans(context, acceptDocs), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.sloppyDocScorer(stats, query.getField(), context));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -281,19 +281,6 @@ final class JustCompileSearch {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static final class JustCompileSpanFilter extends SpanFilter {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SpanFilterResult bitSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> {
|
static final class JustCompileTopDocsCollector extends TopDocsCollector<ScoreDoc> {
|
||||||
|
|
||||||
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {
|
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {
|
||||||
|
|
|
@ -1,147 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.SerialMergeScheduler;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util._TestUtil;
|
|
||||||
|
|
||||||
public class TestCachingSpanFilter extends LuceneTestCase {
|
|
||||||
|
|
||||||
public void testEnforceDeletions() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(
|
|
||||||
random,
|
|
||||||
dir,
|
|
||||||
newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).
|
|
||||||
setMergeScheduler(new SerialMergeScheduler()).
|
|
||||||
// asserts below requires no unexpected merges:
|
|
||||||
setMergePolicy(newLogMergePolicy(10))
|
|
||||||
);
|
|
||||||
|
|
||||||
// NOTE: cannot use writer.getReader because RIW (on
|
|
||||||
// flipping a coin) may give us a newly opened reader,
|
|
||||||
// but we use .reopen on this reader below and expect to
|
|
||||||
// (must) get an NRT reader:
|
|
||||||
IndexReader reader = IndexReader.open(writer.w, true);
|
|
||||||
// same reason we don't wrap?
|
|
||||||
IndexSearcher searcher = newSearcher(reader, false);
|
|
||||||
|
|
||||||
// add a doc, refresh the reader, and check that its there
|
|
||||||
Document doc = new Document();
|
|
||||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
|
||||||
customType.setTokenized(false);
|
|
||||||
doc.add(newField("id", "1", customType));
|
|
||||||
writer.addDocument(doc);
|
|
||||||
|
|
||||||
reader = refreshReader(reader);
|
|
||||||
searcher.close();
|
|
||||||
searcher = newSearcher(reader, false);
|
|
||||||
|
|
||||||
TopDocs docs = searcher.search(new MatchAllDocsQuery(), 1);
|
|
||||||
assertEquals("Should find a hit...", 1, docs.totalHits);
|
|
||||||
|
|
||||||
final SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1")));
|
|
||||||
|
|
||||||
CachingSpanFilter filter = new CachingSpanFilter(startFilter);
|
|
||||||
|
|
||||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
|
||||||
assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
|
|
||||||
int missCount = filter.missCount;
|
|
||||||
assertTrue(missCount > 0);
|
|
||||||
Query constantScore = new ConstantScoreQuery(filter);
|
|
||||||
docs = searcher.search(constantScore, 1);
|
|
||||||
assertEquals("[just filter] Should find a hit...", 1, docs.totalHits);
|
|
||||||
assertEquals(missCount, filter.missCount);
|
|
||||||
|
|
||||||
// NOTE: important to hold ref here so GC doesn't clear
|
|
||||||
// the cache entry! Else the assert below may sometimes
|
|
||||||
// fail:
|
|
||||||
IndexReader oldReader = reader;
|
|
||||||
|
|
||||||
writer.addDocument(doc);
|
|
||||||
reader = refreshReader(reader);
|
|
||||||
searcher.close();
|
|
||||||
searcher = newSearcher(reader, false);
|
|
||||||
|
|
||||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
|
||||||
assertEquals("[query + filter] Should find 2 hits...", 2, docs.totalHits);
|
|
||||||
assertTrue(filter.missCount > missCount);
|
|
||||||
missCount = filter.missCount;
|
|
||||||
|
|
||||||
constantScore = new ConstantScoreQuery(filter);
|
|
||||||
docs = searcher.search(constantScore, 1);
|
|
||||||
assertEquals("[just filter] Should find a hit...", 2, docs.totalHits);
|
|
||||||
assertEquals(missCount, filter.missCount);
|
|
||||||
|
|
||||||
// NOTE: important to hold ref here so GC doesn't clear
|
|
||||||
// the cache entry! Else the assert below may sometimes
|
|
||||||
// fail:
|
|
||||||
IndexReader oldReader2 = reader;
|
|
||||||
|
|
||||||
// now delete the doc, refresh the reader, and see that it's not there
|
|
||||||
writer.deleteDocuments(new Term("id", "1"));
|
|
||||||
|
|
||||||
reader = refreshReader(reader);
|
|
||||||
searcher.close();
|
|
||||||
searcher = newSearcher(reader, false);
|
|
||||||
|
|
||||||
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
|
|
||||||
assertEquals("[query + filter] Should *not* find a hit...", 0, docs.totalHits);
|
|
||||||
assertEquals(missCount, filter.missCount);
|
|
||||||
|
|
||||||
docs = searcher.search(constantScore, 1);
|
|
||||||
assertEquals("[just filter] Should *not* find a hit...", 0, docs.totalHits);
|
|
||||||
assertEquals(missCount, filter.missCount);
|
|
||||||
|
|
||||||
// NOTE: silliness to make sure JRE does not optimize
|
|
||||||
// away our holding onto oldReader to prevent
|
|
||||||
// CachingWrapperFilter's WeakHashMap from dropping the
|
|
||||||
// entry:
|
|
||||||
assertTrue(oldReader != null);
|
|
||||||
assertTrue(oldReader2 != null);
|
|
||||||
|
|
||||||
searcher.close();
|
|
||||||
writer.close();
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private static IndexReader refreshReader(IndexReader reader) throws IOException {
|
|
||||||
IndexReader oldReader = reader;
|
|
||||||
reader = IndexReader.openIfChanged(reader);
|
|
||||||
if (reader != null) {
|
|
||||||
oldReader.close();
|
|
||||||
return reader;
|
|
||||||
} else {
|
|
||||||
return oldReader;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,86 +0,0 @@
|
||||||
package org.apache.lucene.search;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copyright 2004 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.util.English;
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
|
||||||
|
|
||||||
public class TestSpanQueryFilter extends LuceneTestCase {
|
|
||||||
|
|
||||||
public void testFilterWorks() throws Exception {
|
|
||||||
Directory dir = newDirectory();
|
|
||||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
|
||||||
for (int i = 0; i < 500; i++) {
|
|
||||||
Document document = new Document();
|
|
||||||
document.add(newField("field", English.intToEnglish(i) + " equals " + English.intToEnglish(i),
|
|
||||||
TextField.TYPE_UNSTORED));
|
|
||||||
writer.addDocument(document);
|
|
||||||
}
|
|
||||||
final int number = 10;
|
|
||||||
IndexReader reader = writer.getReader();
|
|
||||||
writer.close();
|
|
||||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(reader.getTopReaderContext());
|
|
||||||
int subIndex = ReaderUtil.subIndex(number, leaves); // find the reader with this document in it
|
|
||||||
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(number).trim()));
|
|
||||||
SpanQueryFilter filter = new SpanQueryFilter(query);
|
|
||||||
SpanFilterResult result = filter.bitSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
|
|
||||||
DocIdSet docIdSet = result.getDocIdSet();
|
|
||||||
assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
|
|
||||||
assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, number - leaves[subIndex].docBase);
|
|
||||||
List<SpanFilterResult.PositionInfo> spans = result.getPositions();
|
|
||||||
assertTrue("spans is null and it shouldn't be", spans != null);
|
|
||||||
int size = getDocIdSetSize(docIdSet);
|
|
||||||
assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
|
|
||||||
for (final SpanFilterResult.PositionInfo info: spans) {
|
|
||||||
assertTrue("info is null and it shouldn't be", info != null);
|
|
||||||
//The doc should indicate the bit is on
|
|
||||||
assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
|
|
||||||
//There should be two positions in each
|
|
||||||
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
reader.close();
|
|
||||||
dir.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
|
|
||||||
int size = 0;
|
|
||||||
DocIdSetIterator it = docIdSet.iterator();
|
|
||||||
while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
size++;
|
|
||||||
}
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
|
|
||||||
DocIdSetIterator it = docIdSet.iterator();
|
|
||||||
assertTrue(msg, it.advance(docId) != DocIdSetIterator.NO_MORE_DOCS);
|
|
||||||
assertTrue(msg, it.docID() == docId);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -19,11 +19,14 @@ package org.apache.lucene.search.spans;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Holds all implementations of classes in the o.a.l.s.spans package as a
|
* Holds all implementations of classes in the o.a.l.s.spans package as a
|
||||||
|
@ -83,7 +86,7 @@ final class JustCompileSearchSpans {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
public Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map<Term,TermContext> termContexts) throws IOException {
|
||||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,11 +20,16 @@ package org.apache.lucene.search.spans;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
import org.apache.lucene.util.ReaderUtil;
|
||||||
|
import org.apache.lucene.util.TermContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -39,19 +44,27 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
||||||
private AtomicReaderContext[] leaves;
|
private AtomicReaderContext[] leaves;
|
||||||
private int leafOrd = 0;
|
private int leafOrd = 0;
|
||||||
private Spans current;
|
private Spans current;
|
||||||
|
private Map<Term,TermContext> termContexts;
|
||||||
|
|
||||||
private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query) {
|
private MultiSpansWrapper(AtomicReaderContext[] leaves, SpanQuery query, Map<Term,TermContext> termContexts) {
|
||||||
this.query = query;
|
this.query = query;
|
||||||
this.leaves = leaves;
|
this.leaves = leaves;
|
||||||
|
this.termContexts = termContexts;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
|
public static Spans wrap(ReaderContext topLevelReaderContext, SpanQuery query) throws IOException {
|
||||||
|
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||||
|
TreeSet<Term> terms = new TreeSet<Term>();
|
||||||
|
query.extractTerms(terms);
|
||||||
|
for (Term term : terms) {
|
||||||
|
termContexts.put(term, TermContext.build(topLevelReaderContext, term, true));
|
||||||
|
}
|
||||||
AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext);
|
AtomicReaderContext[] leaves = ReaderUtil.leaves(topLevelReaderContext);
|
||||||
if(leaves.length == 1) {
|
if(leaves.length == 1) {
|
||||||
return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs());
|
return query.getSpans(leaves[0], leaves[0].reader.getLiveDocs(), termContexts);
|
||||||
}
|
}
|
||||||
return new MultiSpansWrapper(leaves, query);
|
return new MultiSpansWrapper(leaves, query, termContexts);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -60,14 +73,14 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (current == null) {
|
if (current == null) {
|
||||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||||
}
|
}
|
||||||
while(true) {
|
while(true) {
|
||||||
if (current.next()) {
|
if (current.next()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (++leafOrd < leaves.length) {
|
if (++leafOrd < leaves.length) {
|
||||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||||
} else {
|
} else {
|
||||||
current = null;
|
current = null;
|
||||||
break;
|
break;
|
||||||
|
@ -85,17 +98,17 @@ public class MultiSpansWrapper extends Spans { // can't be package private due t
|
||||||
int subIndex = ReaderUtil.subIndex(target, leaves);
|
int subIndex = ReaderUtil.subIndex(target, leaves);
|
||||||
assert subIndex >= leafOrd;
|
assert subIndex >= leafOrd;
|
||||||
if (subIndex != leafOrd) {
|
if (subIndex != leafOrd) {
|
||||||
current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs());
|
current = query.getSpans(leaves[subIndex], leaves[subIndex].reader.getLiveDocs(), termContexts);
|
||||||
leafOrd = subIndex;
|
leafOrd = subIndex;
|
||||||
} else if (current == null) {
|
} else if (current == null) {
|
||||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||||
}
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
if (current.skipTo(target - leaves[leafOrd].docBase)) {
|
if (current.skipTo(target - leaves[leafOrd].docBase)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (++leafOrd < leaves.length) {
|
if (++leafOrd < leaves.length) {
|
||||||
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs());
|
current = query.getSpans(leaves[leafOrd], leaves[leafOrd].reader.getLiveDocs(), termContexts);
|
||||||
} else {
|
} else {
|
||||||
current = null;
|
current = null;
|
||||||
break;
|
break;
|
||||||
|
|
Loading…
Reference in New Issue