mirror of https://github.com/apache/lucene.git
LUCENE-3593: Add a filter returning all document with at least one or no value in a field
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1206017 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9dd60fe58f
commit
94fae441e3
|
@ -651,6 +651,12 @@ Security fixes
|
||||||
prevents this as best as it can by throwing AlreadyClosedException
|
prevents this as best as it can by throwing AlreadyClosedException
|
||||||
also on clones. (Uwe Schindler, Robert Muir)
|
also on clones. (Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
|
New Features
|
||||||
|
|
||||||
|
* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either
|
||||||
|
have at least one or no value at all in a specific field. (Simon Willnauer,
|
||||||
|
Uwe Schindler, Robert Muir)
|
||||||
|
|
||||||
======================= Lucene 3.5.0 =======================
|
======================= Lucene 3.5.0 =======================
|
||||||
|
|
||||||
Changes in backwards compatibility policy
|
Changes in backwards compatibility policy
|
||||||
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.search.FieldCacheRangeFilter.FieldCacheDocIdSet;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.Bits.MatchAllBits;
|
||||||
|
import org.apache.lucene.util.Bits.MatchNoBits;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link Filter} that accepts all documents that have one or more values in a
|
||||||
|
* given field. This {@link Filter} request {@link Bits} from the
|
||||||
|
* {@link FieldCache} and build the bits if not present.
|
||||||
|
*/
|
||||||
|
public class FieldValueFilter extends Filter {
|
||||||
|
private final String field;
|
||||||
|
private final boolean negate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link FieldValueFilter}
|
||||||
|
*
|
||||||
|
* @param field
|
||||||
|
* the field to filter
|
||||||
|
*/
|
||||||
|
public FieldValueFilter(String field) {
|
||||||
|
this(field, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@link FieldValueFilter}
|
||||||
|
*
|
||||||
|
* @param field
|
||||||
|
* the field to filter
|
||||||
|
* @param negate
|
||||||
|
* iff <code>true</code> all documents with no value in the given
|
||||||
|
* field are accepted.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public FieldValueFilter(String field, boolean negate) {
|
||||||
|
this.field = field;
|
||||||
|
this.negate = negate;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
|
||||||
|
throws IOException {
|
||||||
|
final Bits docsWithField = FieldCache.DEFAULT.getDocsWithField(
|
||||||
|
context.reader, field);
|
||||||
|
if (negate) {
|
||||||
|
if (docsWithField instanceof MatchAllBits) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final int maxDoc = context.reader.maxDoc();
|
||||||
|
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
|
||||||
|
@Override
|
||||||
|
final boolean matchDoc(int doc) {
|
||||||
|
if (doc >= maxDoc) {
|
||||||
|
// TODO: this makes no sense we should check this on the caller level
|
||||||
|
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
|
||||||
|
}
|
||||||
|
return !docsWithField.get(doc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
if (docsWithField instanceof MatchNoBits) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (docsWithField instanceof DocIdSet) {
|
||||||
|
// UweSays: this is always the case for our current impl - but who knows
|
||||||
|
// :-)
|
||||||
|
return BitsFilteredDocIdSet.wrap((DocIdSet) docsWithField, acceptDocs);
|
||||||
|
}
|
||||||
|
final int maxDoc = context.reader.maxDoc();
|
||||||
|
return new FieldCacheDocIdSet(maxDoc, acceptDocs) {
|
||||||
|
@Override
|
||||||
|
final boolean matchDoc(int doc) {
|
||||||
|
if (doc >= maxDoc) {
|
||||||
|
// TODO: this makes no sense we should check this on the caller level
|
||||||
|
throw new ArrayIndexOutOfBoundsException("doc: "+doc + " maxDoc: " + maxDoc);
|
||||||
|
}
|
||||||
|
return docsWithField.get(doc);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
final int prime = 31;
|
||||||
|
int result = 1;
|
||||||
|
result = prime * result + ((field == null) ? 0 : field.hashCode());
|
||||||
|
result = prime * result + (negate ? 1231 : 1237);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj)
|
||||||
|
return true;
|
||||||
|
if (obj == null)
|
||||||
|
return false;
|
||||||
|
if (getClass() != obj.getClass())
|
||||||
|
return false;
|
||||||
|
FieldValueFilter other = (FieldValueFilter) obj;
|
||||||
|
if (field == null) {
|
||||||
|
if (other.field != null)
|
||||||
|
return false;
|
||||||
|
} else if (!field.equals(other.field))
|
||||||
|
return false;
|
||||||
|
if (negate != other.negate)
|
||||||
|
return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "NoFieldValueFilter [field=" + field + ", negate=" + negate + "]";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,117 @@
|
||||||
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public class TestFieldValueFilter extends LuceneTestCase {
|
||||||
|
|
||||||
|
public void testFieldValueFilterNoValue() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||||
|
int docs = atLeast(10);
|
||||||
|
int[] docStates = buildIndex(writer, docs);
|
||||||
|
int numDocsNoValue = 0;
|
||||||
|
for (int i = 0; i < docStates.length; i++) {
|
||||||
|
if (docStates[i] == 0) {
|
||||||
|
numDocsNoValue++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexReader reader = IndexReader.open(directory);
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
|
||||||
|
new FieldValueFilter("some", true), docs);
|
||||||
|
assertEquals(search.totalHits, numDocsNoValue);
|
||||||
|
|
||||||
|
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||||
|
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||||
|
assertNull(reader.document(scoreDoc.doc).get("some"));
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
searcher.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFieldValueFilter() throws IOException {
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
RandomIndexWriter writer = new RandomIndexWriter(random, directory,
|
||||||
|
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||||
|
int docs = atLeast(10);
|
||||||
|
int[] docStates = buildIndex(writer, docs);
|
||||||
|
int numDocsWithValue = 0;
|
||||||
|
for (int i = 0; i < docStates.length; i++) {
|
||||||
|
if (docStates[i] == 1) {
|
||||||
|
numDocsWithValue++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
IndexReader reader = IndexReader.open(directory);
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
TopDocs search = searcher.search(new TermQuery(new Term("all", "test")),
|
||||||
|
new FieldValueFilter("some"), docs);
|
||||||
|
assertEquals(search.totalHits, numDocsWithValue);
|
||||||
|
|
||||||
|
ScoreDoc[] scoreDocs = search.scoreDocs;
|
||||||
|
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||||
|
assertEquals("value", reader.document(scoreDoc.doc).get("some"));
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
searcher.close();
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private int[] buildIndex(RandomIndexWriter writer, int docs)
|
||||||
|
throws IOException, CorruptIndexException {
|
||||||
|
int[] docStates = new int[docs];
|
||||||
|
for (int i = 0; i < docs; i++) {
|
||||||
|
Document doc = new Document();
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
docStates[i] = 1;
|
||||||
|
doc.add(newField("some", "value", TextField.TYPE_STORED));
|
||||||
|
}
|
||||||
|
doc.add(newField("all", "test", TextField.TYPE_UNSTORED));
|
||||||
|
doc.add(newField("id", "" + i, TextField.TYPE_STORED));
|
||||||
|
writer.addDocument(doc);
|
||||||
|
}
|
||||||
|
writer.commit();
|
||||||
|
int numDeletes = random.nextInt(docs);
|
||||||
|
for (int i = 0; i < numDeletes; i++) {
|
||||||
|
int docID = random.nextInt(docs);
|
||||||
|
writer.deleteDocuments(new Term("id", "" + docID));
|
||||||
|
docStates[docID] = 2;
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
return docStates;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue