lucene 4: Upgraded AndFilter, NotDeletedFilter, NotFilter, OrFilter, TermFilter, XBooleanFilter. Left a live docs and accepted docs unhandled (used null) for. I added a note at all places.

This commit is contained in:
Martijn van Groningen 2012-10-31 15:38:30 +01:00 committed by Shay Banon
parent 6b4e483f55
commit 5a553a1924
6 changed files with 77 additions and 62 deletions

View File

@ -50,12 +50,14 @@ public class AndFilter extends Filter {
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (filters.size() == 1) {
return filters.get(0).getDocIdSet(context, acceptDocs);
// LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs...
return filters.get(0).getDocIdSet(context, null);
}
List sets = Lists.newArrayListWithExpectedSize(filters.size());
boolean allAreDocSet = true;
for (Filter filter : filters) {
DocIdSet set = filter.getDocIdSet(context, acceptDocs);
// LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) { // none matching for this filter, we AND, so return EMPTY
return DocSet.EMPTY_DOC_SET;
}

View File

@ -19,11 +19,13 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredDocIdSetIterator;
import org.apache.lucene.util.Bits;
import java.io.IOException;
@ -39,15 +41,15 @@ public class NotDeletedFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
DocIdSet docIdSet = filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
DocIdSet docIdSet = filter.getDocIdSet(context, acceptDocs);
if (docIdSet == null) {
return null;
}
if (!reader.hasDeletions()) {
if (!context.reader().hasDeletions()) {
return docIdSet;
}
return new NotDeletedDocIdSet(docIdSet, reader);
return new NotDeletedDocIdSet(docIdSet, context.reader().getLiveDocs());
}
public Filter filter() {
@ -63,11 +65,11 @@ public class NotDeletedFilter extends Filter {
private final DocIdSet innerSet;
private final IndexReader reader;
private final Bits liveDocs;
NotDeletedDocIdSet(DocIdSet innerSet, IndexReader reader) {
NotDeletedDocIdSet(DocIdSet innerSet, Bits liveDocs) {
this.innerSet = innerSet;
this.reader = reader;
this.liveDocs = liveDocs;
}
@Override
@ -76,22 +78,22 @@ public class NotDeletedFilter extends Filter {
if (iterator == null) {
return null;
}
return new NotDeletedDocIdSetIterator(iterator, reader);
return new NotDeletedDocIdSetIterator(iterator, liveDocs);
}
}
static class NotDeletedDocIdSetIterator extends FilteredDocIdSetIterator {
private final IndexReader reader;
private final Bits liveDocs;
NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, IndexReader reader) {
NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, Bits liveDocs) {
super(innerIter);
this.reader = reader;
this.liveDocs = liveDocs;
}
@Override
protected boolean match(int doc) throws IOException {
return !reader.isDeleted(doc);
protected boolean match(int doc) {
return liveDocs.get(doc);
}
}
}

View File

@ -19,9 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.AllDocSet;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.NotDocIdSet;
@ -45,15 +47,16 @@ public class NotFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
DocIdSet set = filter.getDocIdSet(reader);
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) {
return new AllDocSet(reader.maxDoc());
return new AllDocSet(context.reader().maxDoc());
}
if (set instanceof DocSet) {
return new NotDocSet((DocSet) set, reader.maxDoc());
return new NotDocSet((DocSet) set, context.reader().maxDoc());
}
return new NotDocIdSet(set, reader.maxDoc());
return new NotDocIdSet(set, context.reader().maxDoc());
}
@Override

View File

@ -20,9 +20,11 @@
package org.elasticsearch.common.lucene.search;
import com.google.common.collect.Lists;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.OrDocIdSet;
import org.elasticsearch.common.lucene.docset.OrDocSet;
@ -46,14 +48,16 @@ public class OrFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
if (filters.size() == 1) {
return filters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return filters.get(0).getDocIdSet(context, null);
}
List sets = Lists.newArrayListWithExpectedSize(filters.size());
boolean allAreDocSet = true;
for (Filter filter : filters) {
DocIdSet set = filter.getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
DocIdSet set = filter.getDocIdSet(context, null);
if (set == null) { // none matching for this filter, continue
continue;
}

View File

@ -19,13 +19,11 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.Lucene;
import java.io.IOException;
@ -45,26 +43,26 @@ public class TermFilter extends Filter {
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
FixedBitSet result = null;
TermDocs td = reader.termDocs();
try {
td.seek(term);
// batch read, in Lucene 4.0 its no longer needed
int[] docs = new int[Lucene.BATCH_ENUM_DOCS];
int[] freqs = new int[Lucene.BATCH_ENUM_DOCS];
int number = td.read(docs, freqs);
if (number > 0) {
result = new FixedBitSet(reader.maxDoc());
while (number > 0) {
for (int i = 0; i < number; i++) {
result.set(docs[i]);
}
number = td.read(docs, freqs);
}
}
} finally {
td.close();
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
Terms terms = context.reader().terms(term.field());
if (terms == null) {
return null;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term.bytes(), false)) {
return null;
}
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
DocsEnum docsEnum = termsEnum.docs(null, null);
int docId = docsEnum.nextDoc();
if (docId == DocsEnum.NO_MORE_DOCS) {
return null;
}
final FixedBitSet result = new FixedBitSet(context.reader().maxDoc());
for (; docId < DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
result.set(docId);
}
return result;
}

View File

@ -19,11 +19,12 @@
package org.elasticsearch.common.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.FilterClause;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.elasticsearch.common.lucene.docset.DocSet;
import org.elasticsearch.common.lucene.docset.DocSets;
@ -41,9 +42,9 @@ public class XBooleanFilter extends Filter {
ArrayList<Filter> notFilters = null;
ArrayList<Filter> mustFilters = null;
private DocIdSet getDISI(ArrayList<Filter> filters, int index, IndexReader reader)
private DocIdSet getDISI(ArrayList<Filter> filters, int index, AtomicReaderContext context, Bits acceptedDocs)
throws IOException {
DocIdSet docIdSet = filters.get(index).getDocIdSet(reader);
DocIdSet docIdSet = filters.get(index).getDocIdSet(context, acceptedDocs);
if (docIdSet == DocIdSet.EMPTY_DOCIDSET || docIdSet == DocSet.EMPTY_DOC_SET) {
return null;
}
@ -67,23 +68,26 @@ public class XBooleanFilter extends Filter {
* of the filters that have been added.
*/
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException {
FixedBitSet res = null;
if (mustFilters == null && notFilters == null && shouldFilters != null && shouldFilters.size() == 1) {
return shouldFilters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return shouldFilters.get(0).getDocIdSet(context, null);
}
if (shouldFilters == null && notFilters == null && mustFilters != null && mustFilters.size() == 1) {
return mustFilters.get(0).getDocIdSet(reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
return mustFilters.get(0).getDocIdSet(context, null);
}
if (shouldFilters != null) {
for (int i = 0; i < shouldFilters.size(); i++) {
final DocIdSet disi = getDISI(shouldFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(shouldFilters, i, context, null);
if (disi == null) continue;
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res = new FixedBitSet(context.reader().maxDoc());
}
DocSets.or(res, disi);
}
@ -98,10 +102,11 @@ public class XBooleanFilter extends Filter {
if (notFilters != null) {
for (int i = 0; i < notFilters.size(); i++) {
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs
res = new FixedBitSet(context.reader().maxDoc());
res.set(0, context.reader().maxDoc()); // NOTE: may set bits on deleted docs
}
final DocIdSet disi = getDISI(notFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(notFilters, i, context, null);
if (disi != null) {
DocSets.andNot(res, disi);
}
@ -110,12 +115,13 @@ public class XBooleanFilter extends Filter {
if (mustFilters != null) {
for (int i = 0; i < mustFilters.size(); i++) {
final DocIdSet disi = getDISI(mustFilters, i, reader);
// LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs...
final DocIdSet disi = getDISI(mustFilters, i, context, null);
if (disi == null) {
return null;
}
if (res == null) {
res = new FixedBitSet(reader.maxDoc());
res = new FixedBitSet(context.reader().maxDoc());
DocSets.or(res, disi);
} else {
DocSets.and(res, disi);
@ -219,10 +225,10 @@ public class XBooleanFilter extends Filter {
private void appendFilters(ArrayList<Filter> filters, String occurString, StringBuilder buffer) {
if (filters != null) {
for (int i = 0; i < filters.size(); i++) {
for (Filter filter : filters) {
buffer.append(' ');
buffer.append(occurString);
buffer.append(filters.get(i).toString());
buffer.append(filter.toString());
}
}
}