LUCENE-3593: Add a filter returning all document with at least one or no value in a field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206017 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-11-24 23:24:35 +00:00
parent 9dd60fe58f
commit 94fae441e3
3 changed files with 260 additions and 0 deletions

View File

@ -651,6 +651,12 @@ Security fixes
prevents this as best as it can by throwing AlreadyClosedException
also on clones. (Uwe Schindler, Robert Muir)
New Features
* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either
have at least one or no value at all in a specific field. (Simon Willnauer,
Uwe Schindler, Robert Muir)
======================= Lucene 3.5.0 =======================
Changes in backwards compatibility policy

View File

@ -0,0 +1,137 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchAllBits;
import org.apache.lucene.util.Bits.MatchNoBits;
/**
* A {@link Filter} that accepts all documents that have one or more values in a
* given field. This {@link Filter} request {@link Bits} from the
* {@link FieldCache} and build the bits if not present.
*/
public class FieldValueFilter extends Filter {
private final String field;
private final boolean negate;
/**
* Creates a new {@link FieldValueFilter}
*
* @param field
* the field to filter
*/
public FieldValueFilter(String field) {
this(field, false);
}
/**
* Creates a new {@link FieldValueFilter}
*
* @param field
* the field to filter
* @param negate
* iff <code>true</code> all documents with no value in the given
* field are accepted.
*
*/
public FieldValueFilter(String field, boolean negate) {
this.field = field;
this.negate = negate;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(
context.reader, field);
if (negate) {
if (docsWithField instanceof MatchAllBits) {
return null;
}
final int maxDoc = context.reader.maxDoc();
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override
final boolean matchDoc(int doc) {
if (doc >= maxDoc) {
// TODO: this makes no sense we should check this on the caller level
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
}
return !docsWithField.get(doc);
}
};
} else {
if (docsWithField instanceof MatchNoBits) {
return null;
}
if (docsWithField instanceof DocIdSet) {
// UweSays: this is always the case for our current impl - but who knows
// :-)
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
}
final int maxDoc = context.reader.maxDoc();
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
@Override
final boolean matchDoc(int doc) {
if (doc >= maxDoc) {
// TODO: this makes no sense we should check this on the caller level
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
}
return docsWithField.get(doc);
}
};
}
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((field == null) ? 0 : field.hashCode());
result = prime * result + (negate ? 1231 : 1237);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
FieldValueFilter other = (FieldValueFilter) obj;
if (field == null) {
if (other.field != null)
return false;
} else if (!field.equals(other.field))
return false;
if (negate != other.negate)
return false;
return true;
}
@Override
public String toString() {
return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]";
}
}

View File

@ -0,0 +1,117 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
/**
*
*/
public class TestFieldValueFilter extends LuceneTestCase {
public void testFieldValueFilterNoValue() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
int docs = atLeast(10);
int[] docStates = buildIndex(writer, docs);
int numDocsNoValue = 0;
for (int i = 0; i < docStates.length; i++) {
if (docStates[i] == 0) {
numDocsNoValue++;
}
}
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
new FieldValueFilter("some", true), docs);
assertEquals(search.totalHits, numDocsNoValue);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
assertNull(reader.document(scoreDoc.doc).get("some"));
}
reader.close();
searcher.close();
directory.close();
}
public void testFieldValueFilter() throws IOException {
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
int docs = atLeast(10);
int[] docStates = buildIndex(writer, docs);
int numDocsWithValue = 0;
for (int i = 0; i < docStates.length; i++) {
if (docStates[i] == 1) {
numDocsWithValue++;
}
}
IndexReader reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
new FieldValueFilter("some"), docs);
assertEquals(search.totalHits, numDocsWithValue);
ScoreDoc[] scoreDocs = search.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
assertEquals("value", reader.document(scoreDoc.doc).get("some"));
}
reader.close();
searcher.close();
directory.close();
}
private int[] buildIndex(RandomIndexWriter writer, int docs)
throws IOException, CorruptIndexException {
int[] docStates = new int[docs];
for (int i = 0; i < docs; i++) {
Document doc = new Document();
if (random.nextBoolean()) {
docStates[i] = 1;
doc.add(newField("some", "value", TextField.TYPE_STORED));
}
doc.add(newField("all", "test", TextField.TYPE_UNSTORED));
doc.add(newField("id", "" + i, TextField.TYPE_STORED));
writer.addDocument(doc);
}
writer.commit();
int numDeletes = random.nextInt(docs);
for (int i = 0; i < numDeletes; i++) {
int docID = random.nextInt(docs);
writer.deleteDocuments(new Term("id", "" + docID));
docStates[docID] = 2;
}
writer.close();
return docStates;
}
}