lucene 4: lucene package cleanups
This commit is contained in:
parent
595acd695e
commit
22c14c7354
|
@ -1,116 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
// LUCENE MONITOR: Against TermsFilter
|
||||
public class PublicTermsFilter extends Filter {
|
||||
|
||||
Set<Term> terms = new TreeSet<Term>();
|
||||
|
||||
/**
|
||||
* Adds a term to the list of acceptable terms
|
||||
*
|
||||
* @param term
|
||||
*/
|
||||
public void addTerm(Term term) {
|
||||
terms.add(term);
|
||||
}
|
||||
|
||||
public Set<Term> getTerms() {
|
||||
return terms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if ((obj == null) || (obj.getClass() != this.getClass()))
|
||||
return false;
|
||||
PublicTermsFilter test = (PublicTermsFilter) obj;
|
||||
return (terms == test.terms ||
|
||||
(terms != null && terms.equals(test.terms)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int hash = 9;
|
||||
for (Iterator<Term> iter = terms.iterator(); iter.hasNext(); ) {
|
||||
Term term = iter.next();
|
||||
hash = 31 * hash + term.hashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
FixedBitSet result = null;
|
||||
TermDocs td = reader.termDocs();
|
||||
try {
|
||||
// batch read, in Lucene 4.0 its no longer needed
|
||||
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
|
||||
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
|
||||
for (Term term : terms) {
|
||||
td.seek(term);
|
||||
int number = td.read(docs, freqs);
|
||||
if (number > 0) {
|
||||
if (result == null) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
}
|
||||
while (number > 0) {
|
||||
for (int i = 0; i < number; i++) {
|
||||
result.set(docs[i]);
|
||||
}
|
||||
number = td.read(docs, freqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
td.close();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (Term term : terms) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(term);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
|
@ -48,7 +48,7 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
|
|||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
*/
|
||||
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
|
||||
initialize(size);
|
||||
super(size);
|
||||
setFields(fields);
|
||||
}
|
||||
|
||||
|
@ -83,26 +83,6 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
|
|||
return fields;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of collators, possibly <code>null</code>. The collators
|
||||
* correspond to any SortFields which were given a specific locale.
|
||||
*
|
||||
* @param fields Array of sort fields.
|
||||
* @return Array, possibly <code>null</code>.
|
||||
*/
|
||||
private Collator[] hasCollators(final SortField[] fields) {
|
||||
if (fields == null) return null;
|
||||
Collator[] ret = new Collator[fields.length];
|
||||
for (int i = 0; i < fields.length; ++i) {
|
||||
Locale locale = fields[i].getLocale();
|
||||
if (locale != null)
|
||||
ret[i] = Collator.getInstance(locale);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
*
|
||||
|
@ -116,8 +96,8 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
|
|||
final int n = fields.length;
|
||||
int c = 0;
|
||||
for (int i = 0; i < n && c == 0; ++i) {
|
||||
final int type = fields[i].getType();
|
||||
if (type == SortField.STRING) {
|
||||
final SortField.Type type = fields[i].getType();
|
||||
if (type == SortField.Type.STRING) {
|
||||
final String s1 = (String) docA.fields[i];
|
||||
final String s2 = (String) docB.fields[i];
|
||||
// null values need to be sorted first, because of how FieldCache.getStringIndex()
|
||||
|
|
|
@ -19,91 +19,170 @@
|
|||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.queries.TermsFilter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Similar to {@link TermsFilter} but stores the terms in an array for better memory usage
|
||||
* when cached, and also uses bulk read
|
||||
*/
|
||||
// LUCENE MONITOR: Against TermsFilter
|
||||
// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released
|
||||
public class XTermsFilter extends Filter {
|
||||
|
||||
private final Term[] terms;
|
||||
|
||||
public XTermsFilter(Term term) {
|
||||
this.terms = new Term[]{term};
|
||||
private final Term[] filterTerms;
|
||||
private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset
|
||||
private final int length;
|
||||
|
||||
/**
|
||||
* Creates a new {@link XTermsFilter} from the given collection. The collection
|
||||
* can contain duplicate terms and multiple fields.
|
||||
*/
|
||||
public XTermsFilter(Collection<Term> terms) {
|
||||
this(terms.toArray(new Term[terms.size()]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link XTermsFilter} from the given array. The array can
|
||||
* contain duplicate terms and multiple fields.
|
||||
*/
|
||||
public XTermsFilter(Term... terms) {
|
||||
if (terms == null || terms.length == 0) {
|
||||
throw new IllegalArgumentException("TermsFilter requires at least one term");
|
||||
}
|
||||
Arrays.sort(terms);
|
||||
this.filterTerms = new Term[terms.length];
|
||||
this.resetTermsEnum = new boolean[terms.length];
|
||||
int index = 0;
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
Term currentTerm = terms[i];
|
||||
boolean fieldChanged = true;
|
||||
if (index > 0) {
|
||||
// deduplicate
|
||||
if (filterTerms[index-1].field().equals(currentTerm.field())) {
|
||||
fieldChanged = false;
|
||||
if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
this.filterTerms[index] = currentTerm;
|
||||
this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration
|
||||
|
||||
index++;
|
||||
}
|
||||
length = index;
|
||||
}
|
||||
|
||||
public XTermsFilter(Term[] terms) {
|
||||
Arrays.sort(terms);
|
||||
this.terms = terms;
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
AtomicReader reader = context.reader();
|
||||
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
||||
Fields fields = reader.fields();
|
||||
if (fields == null) {
|
||||
return result;
|
||||
}
|
||||
final BytesRef br = new BytesRef();
|
||||
Terms terms = null;
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docs = null;
|
||||
assert resetTermsEnum[0];
|
||||
for (int i = 0; i < length; i++) {
|
||||
Term term = this.filterTerms[i];
|
||||
if (resetTermsEnum[i]) {
|
||||
terms = fields.terms(term.field());
|
||||
if (terms == null) {
|
||||
i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if ((termsEnum = terms.iterator(termsEnum)) != null) {
|
||||
br.copyBytes(term.bytes());
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.seekExact(br,true)) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, 0);
|
||||
if (result == null) {
|
||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
// lazy init but don't do it in the hot loop since we could read many docs
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Term[] getTerms() {
|
||||
return terms;
|
||||
private final int skipToNextField(int index, int length) {
|
||||
for (int i = index; i < length; i++) {
|
||||
if (resetTermsEnum[i]) {
|
||||
return i-1;
|
||||
}
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if ((obj == null) || (obj.getClass() != this.getClass()))
|
||||
return false;
|
||||
XTermsFilter test = (XTermsFilter) obj;
|
||||
return Arrays.equals(terms, test.terms);
|
||||
if (this == obj) {
|
||||
return true;
|
||||
}
|
||||
if ((obj == null) || (obj.getClass() != this.getClass())) {
|
||||
return false;
|
||||
}
|
||||
XTermsFilter test = (XTermsFilter) obj;
|
||||
if (filterTerms != test.filterTerms) {
|
||||
if (length == test.length) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
// can not be null!
|
||||
if (!filterTerms[i].equals(test.filterTerms[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Arrays.hashCode(terms);
|
||||
int hash = 9;
|
||||
for (int i = 0; i < length; i++) {
|
||||
hash = 31 * hash + filterTerms[i].hashCode();
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
FixedBitSet result = null;
|
||||
TermDocs td = reader.termDocs();
|
||||
try {
|
||||
// batch read, in Lucene 4.0 its no longer needed
|
||||
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
|
||||
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
|
||||
for (Term term : terms) {
|
||||
td.seek(term);
|
||||
int number = td.read(docs, freqs);
|
||||
if (number > 0) {
|
||||
if (result == null) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
}
|
||||
while (number > 0) {
|
||||
for (int i = 0; i < number; i++) {
|
||||
result.set(docs[i]);
|
||||
}
|
||||
number = td.read(docs, freqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
td.close();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (Term term : terms) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
builder.append(term);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (builder.length() > 0) {
|
||||
builder.append(' ');
|
||||
}
|
||||
return builder.toString();
|
||||
builder.append(filterTerms[i]);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,90 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.elasticsearch.common.lucene.uid.UidField;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DocumentBuilder {
|
||||
|
||||
public static final Document EMPTY = new Document();
|
||||
|
||||
public static DocumentBuilder doc() {
|
||||
return new DocumentBuilder();
|
||||
}
|
||||
|
||||
public static Fieldable uidField(String value) {
|
||||
return uidField(value, 0);
|
||||
}
|
||||
|
||||
public static Fieldable uidField(String value, long version) {
|
||||
return new UidField("_uid", value, version);
|
||||
}
|
||||
|
||||
public static FieldBuilder field(String name, String value) {
|
||||
return field(name, value, Field.Store.YES, Field.Index.ANALYZED);
|
||||
}
|
||||
|
||||
public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index) {
|
||||
return new FieldBuilder(name, value, store, index);
|
||||
}
|
||||
|
||||
public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
return new FieldBuilder(name, value, store, index, termVector);
|
||||
}
|
||||
|
||||
public static FieldBuilder field(String name, byte[] value, Field.Store store) {
|
||||
return new FieldBuilder(name, value, store);
|
||||
}
|
||||
|
||||
public static FieldBuilder field(String name, byte[] value, int offset, int length, Field.Store store) {
|
||||
return new FieldBuilder(name, value, offset, length, store);
|
||||
}
|
||||
|
||||
private final Document document;
|
||||
|
||||
private DocumentBuilder() {
|
||||
this.document = new Document();
|
||||
}
|
||||
|
||||
public DocumentBuilder boost(float boost) {
|
||||
document.setBoost(boost);
|
||||
return this;
|
||||
}
|
||||
|
||||
public DocumentBuilder add(Fieldable field) {
|
||||
document.add(field);
|
||||
return this;
|
||||
}
|
||||
|
||||
public DocumentBuilder add(FieldBuilder fieldBuilder) {
|
||||
document.add(fieldBuilder.build());
|
||||
return this;
|
||||
}
|
||||
|
||||
public Document build() {
|
||||
return document;
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FieldBuilder {
|
||||
|
||||
private final Field field;
|
||||
|
||||
FieldBuilder(String name, String value, Field.Store store, Field.Index index) {
|
||||
field = new Field(name, value, store, index);
|
||||
}
|
||||
|
||||
FieldBuilder(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
field = new Field(name, value, store, index, termVector);
|
||||
}
|
||||
|
||||
FieldBuilder(String name, byte[] value, Field.Store store) {
|
||||
field = new Field(name, value, store);
|
||||
}
|
||||
|
||||
FieldBuilder(String name, byte[] value, int offset, int length, Field.Store store) {
|
||||
field = new Field(name, value, offset, length, store);
|
||||
}
|
||||
|
||||
public FieldBuilder boost(float boost) {
|
||||
field.setBoost(boost);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FieldBuilder omitNorms(boolean omitNorms) {
|
||||
field.setOmitNorms(omitNorms);
|
||||
return this;
|
||||
}
|
||||
|
||||
public FieldBuilder omitTermFreqAndPositions(boolean omitTermFreqAndPositions) {
|
||||
field.setOmitTermFreqAndPositions(omitTermFreqAndPositions);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Field build() {
|
||||
return field;
|
||||
}
|
||||
}
|
|
@ -86,21 +86,11 @@ public abstract class IndexCommitDelegate extends IndexCommit {
|
|||
return delegate.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getVersion() {
|
||||
return delegate.getVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getGeneration() {
|
||||
return delegate.getGeneration();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getTimestamp() throws IOException {
|
||||
return delegate.getTimestamp();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getUserData() throws IOException {
|
||||
return delegate.getUserData();
|
||||
|
|
|
@ -92,18 +92,6 @@ public class Lucene {
|
|||
return countCollector.getTotalHits();
|
||||
}
|
||||
|
||||
public static int docId(IndexReader reader, Term term) throws IOException {
|
||||
TermDocs termDocs = reader.termDocs(term);
|
||||
try {
|
||||
if (termDocs.next()) {
|
||||
return termDocs.doc();
|
||||
}
|
||||
return NO_DOC;
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the index writer, returning <tt>false</tt> if it failed to close.
|
||||
*/
|
||||
|
@ -352,7 +340,7 @@ public class Lucene {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||
|
@ -59,8 +60,8 @@ public class MinimumScoreCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
collector.setNextReader(reader, docBase);
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
collector.setNextReader(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -61,10 +61,10 @@ public class MultiCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
collector.setNextReader(reader, docBase);
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
collector.setNextReader(context);
|
||||
for (Collector collector : collectors) {
|
||||
collector.setNextReader(reader, docBase);
|
||||
collector.setNextReader(context);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,14 +19,21 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.all;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermPositions;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -51,32 +58,35 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(Searcher searcher) throws IOException {
|
||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||
return new AllTermWeight(this, searcher);
|
||||
}
|
||||
|
||||
protected class AllTermWeight extends SpanWeight {
|
||||
|
||||
public AllTermWeight(AllTermQuery query, Searcher searcher) throws IOException {
|
||||
public AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOException {
|
||||
super(query, searcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder,
|
||||
boolean topScorer) throws IOException {
|
||||
return new AllTermSpanScorer((TermSpans) query.getSpans(reader), this, similarity, reader.norms(query.getField()));
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
if (this.stats == null) {
|
||||
return null;
|
||||
}
|
||||
AtomicReader reader = context.reader();
|
||||
SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context);
|
||||
return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer);
|
||||
}
|
||||
|
||||
protected class AllTermSpanScorer extends SpanScorer {
|
||||
// TODO: is this the best way to allocate this?
|
||||
protected byte[] payload = new byte[4];
|
||||
protected TermPositions positions;
|
||||
protected DocsAndPositionsEnum positions;
|
||||
protected float payloadScore;
|
||||
protected int payloadsSeen;
|
||||
|
||||
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException {
|
||||
super(spans, weight, similarity, norms);
|
||||
positions = spans.getPositions();
|
||||
public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException {
|
||||
super(spans, weight, docScorer);
|
||||
positions = spans.getPostings();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -88,12 +98,11 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
freq = 0.0f;
|
||||
payloadScore = 0;
|
||||
payloadsSeen = 0;
|
||||
Similarity similarity1 = getSimilarity();
|
||||
while (more && doc == spans.doc()) {
|
||||
int matchLength = spans.end() - spans.start();
|
||||
|
||||
freq += similarity1.sloppyFreq(matchLength);
|
||||
processPayload(similarity1);
|
||||
freq += docScorer.computeSlopFactor(matchLength);
|
||||
processPayload();
|
||||
|
||||
more = spans.next();// this moves positions to the next match in this
|
||||
// document
|
||||
|
@ -101,10 +110,10 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
return more || (freq != 0);
|
||||
}
|
||||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
if (positions.isPayloadAvailable()) {
|
||||
payload = positions.getPayload(payload, 0);
|
||||
payloadScore += decodeFloat(payload);
|
||||
protected void processPayload() throws IOException {
|
||||
final BytesRef payload;
|
||||
if ((payload = positions.getPayload()) != null) {
|
||||
payloadScore += decodeFloat(payload.bytes, payload.offset);
|
||||
payloadsSeen++;
|
||||
|
||||
} else {
|
||||
|
@ -141,27 +150,40 @@ public class AllTermQuery extends SpanTermQuery {
|
|||
return payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Explanation explain(final int doc) throws IOException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
|
||||
AllTermSpanScorer scorer = (AllTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = scorer.freq();
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation inner = new ComplexExplanation();
|
||||
inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq));
|
||||
inner.addDetail(scoreExplanation);
|
||||
inner.setValue(scoreExplanation.getValue());
|
||||
inner.setMatch(true);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
Explanation nonPayloadExpl = super.explain(doc);
|
||||
result.addDetail(nonPayloadExpl);
|
||||
// QUESTION: Is there a way to avoid this skipTo call? We need to know
|
||||
// whether to load the payload or not
|
||||
result.addDetail(inner);
|
||||
Explanation payloadBoost = new Explanation();
|
||||
result.addDetail(payloadBoost);
|
||||
|
||||
float payloadScore = getPayloadScore();
|
||||
final float payloadScore = scorer.getPayloadScore();
|
||||
payloadBoost.setValue(payloadScore);
|
||||
// GSI: I suppose we could toString the payload, but I don't think that
|
||||
// would be a good idea
|
||||
payloadBoost.setDescription("allPayload(...)");
|
||||
result.setValue(nonPayloadExpl.getValue() * payloadScore);
|
||||
result.setValue(inner.getValue() * payloadScore);
|
||||
result.setDescription("btq, product of:");
|
||||
result.setMatch(nonPayloadExpl.getValue() == 0 ? Boolean.FALSE : Boolean.TRUE); // LUCENE-1303
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -35,8 +35,10 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.encodeFloat;
|
|||
public final class AllTokenStream extends TokenFilter {
|
||||
|
||||
public static TokenStream allTokenStream(String allFieldName, AllEntries allEntries, Analyzer analyzer) throws IOException {
|
||||
return new AllTokenStream(analyzer.reusableTokenStream(allFieldName, allEntries), allEntries);
|
||||
return new AllTokenStream(analyzer.tokenStream(allFieldName, allEntries), allEntries);
|
||||
}
|
||||
|
||||
private final BytesRef payloadSpare = new BytesRef(new byte[4]);
|
||||
|
||||
private final AllEntries allEntries;
|
||||
|
||||
|
@ -60,7 +62,8 @@ public final class AllTokenStream extends TokenFilter {
|
|||
if (allEntries.current() != null) {
|
||||
float boost = allEntries.current().boost();
|
||||
if (boost != 1.0f) {
|
||||
payloadAttribute.setPayload(new Payload(encodeFloat(boost)));
|
||||
encodeFloat(boost, payloadSpare.bytes, payloadSpare.offset);
|
||||
payloadAttribute.setPayload(payloadSpare);
|
||||
} else {
|
||||
payloadAttribute.setPayload(null);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.common.lucene.docset;
|
||||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -58,9 +59,9 @@ public class DocIdSetCollector extends Collector {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
||||
base = docBase;
|
||||
collector.setNextReader(reader, docBase);
|
||||
public void setNextReader(AtomicReaderContext ctx) throws IOException {
|
||||
base = ctx.docBase;
|
||||
collector.setNextReader(ctx);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue