mirror of https://github.com/apache/lucene.git
LUCENE-3593: Add a filter returning all document with at least one or no value in a field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206017 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9dd60fe58f
commit
94fae441e3
|
@ -651,6 +651,12 @@ Security fixes
|
|||
prevents this as best as it can by throwing AlreadyClosedException
|
||||
also on clones. (Uwe Schindler, Robert Muir)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either
|
||||
have at least one or no value at all in a specific field. (Simon Willnauer,
|
||||
Uwe Schindler, Robert Muir)
|
||||
|
||||
======================= Lucene 3.5.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.Bits.MatchAllBits;
|
||||
import org.apache.lucene.util.Bits.MatchNoBits;
|
||||
|
||||
/**
|
||||
* A {@link Filter} that accepts all documents that have one or more values in a
|
||||
* given field. This {@link Filter} request {@link Bits} from the
|
||||
* {@link FieldCache} and build the bits if not present.
|
||||
*/
|
||||
public class FieldValueFilter extends Filter {
|
||||
private final String field;
|
||||
private final boolean negate;
|
||||
|
||||
/**
|
||||
* Creates a new {@link FieldValueFilter}
|
||||
*
|
||||
* @param field
|
||||
* the field to filter
|
||||
*/
|
||||
public FieldValueFilter(String field) {
|
||||
this(field, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link FieldValueFilter}
|
||||
*
|
||||
* @param field
|
||||
* the field to filter
|
||||
* @param negate
|
||||
* iff <code>true</code> all documents with no value in the given
|
||||
* field are accepted.
|
||||
*
|
||||
*/
|
||||
public FieldValueFilter(String field, boolean negate) {
|
||||
this.field = field;
|
||||
this.negate = negate;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
|
||||
throws IOException {
|
||||
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(
|
||||
context.reader, field);
|
||||
if (negate) {
|
||||
if (docsWithField instanceof MatchAllBits) {
|
||||
return null;
|
||||
}
|
||||
final int maxDoc = context.reader.maxDoc();
|
||||
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
|
||||
@Override
|
||||
final boolean matchDoc(int doc) {
|
||||
if (doc >= maxDoc) {
|
||||
// TODO: this makes no sense we should check this on the caller level
|
||||
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
|
||||
}
|
||||
return !docsWithField.get(doc);
|
||||
}
|
||||
};
|
||||
} else {
|
||||
if (docsWithField instanceof MatchNoBits) {
|
||||
return null;
|
||||
}
|
||||
if (docsWithField instanceof DocIdSet) {
|
||||
// UweSays: this is always the case for our current impl - but who knows
|
||||
// :-)
|
||||
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
|
||||
}
|
||||
final int maxDoc = context.reader.maxDoc();
|
||||
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
|
||||
@Override
|
||||
final boolean matchDoc(int doc) {
|
||||
if (doc >= maxDoc) {
|
||||
// TODO: this makes no sense we should check this on the caller level
|
||||
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
|
||||
}
|
||||
return docsWithField.get(doc);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((field == null) ? 0 : field.hashCode());
|
||||
result = prime * result + (negate ? 1231 : 1237);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
FieldValueFilter other = (FieldValueFilter) obj;
|
||||
if (field == null) {
|
||||
if (other.field != null)
|
||||
return false;
|
||||
} else if (!field.equals(other.field))
|
||||
return false;
|
||||
if (negate != other.negate)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class TestFieldValueFilter extends LuceneTestCase {
|
||||
|
||||
public void testFieldValueFilterNoValue() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
int docs = atLeast(10);
|
||||
int[] docStates = buildIndex(writer, docs);
|
||||
int numDocsNoValue = 0;
|
||||
for (int i = 0; i < docStates.length; i++) {
|
||||
if (docStates[i] == 0) {
|
||||
numDocsNoValue++;
|
||||
}
|
||||
}
|
||||
|
||||
IndexReader reader = IndexReader.open(directory);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
|
||||
new FieldValueFilter("some", true), docs);
|
||||
assertEquals(search.totalHits, numDocsNoValue);
|
||||
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
assertNull(reader.document(scoreDoc.doc).get("some"));
|
||||
}
|
||||
|
||||
reader.close();
|
||||
searcher.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
public void testFieldValueFilter() throws IOException {
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
int docs = atLeast(10);
|
||||
int[] docStates = buildIndex(writer, docs);
|
||||
int numDocsWithValue = 0;
|
||||
for (int i = 0; i < docStates.length; i++) {
|
||||
if (docStates[i] == 1) {
|
||||
numDocsWithValue++;
|
||||
}
|
||||
}
|
||||
IndexReader reader = IndexReader.open(directory);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
|
||||
new FieldValueFilter("some"), docs);
|
||||
assertEquals(search.totalHits, numDocsWithValue);
|
||||
|
||||
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
assertEquals("value", reader.document(scoreDoc.doc).get("some"));
|
||||
}
|
||||
|
||||
reader.close();
|
||||
searcher.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
private int[] buildIndex(RandomIndexWriter writer, int docs)
|
||||
throws IOException, CorruptIndexException {
|
||||
int[] docStates = new int[docs];
|
||||
for (int i = 0; i < docs; i++) {
|
||||
Document doc = new Document();
|
||||
if (random.nextBoolean()) {
|
||||
docStates[i] = 1;
|
||||
doc.add(newField("some", "value", TextField.TYPE_STORED));
|
||||
}
|
||||
doc.add(newField("all", "test", TextField.TYPE_UNSTORED));
|
||||
doc.add(newField("id", "" + i, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.commit();
|
||||
int numDeletes = random.nextInt(docs);
|
||||
for (int i = 0; i < numDeletes; i++) {
|
||||
int docID = random.nextInt(docs);
|
||||
writer.deleteDocuments(new Term("id", "" + docID));
|
||||
docStates[docID] = 2;
|
||||
}
|
||||
writer.close();
|
||||
return docStates;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue