new PrefixFilter from Solr: LUCENE-676

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@453381 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-10-05 21:10:30 +00:00
parent 574f3d3083
commit efab233ed8
3 changed files with 195 additions and 0 deletions

View File

@ -31,6 +31,8 @@ New features
New Fieldable interface for use with the lazy field loading mechanism. New Fieldable interface for use with the lazy field loading mechanism.
(Grant Ingersoll and Chuck Williams via Grant Ingersoll) (Grant Ingersoll and Chuck Williams via Grant Ingersoll)
3. LUCENE-676: Move Solr's PrefixFilter to Lucene core. (Yura Smolsky, Yonik Seeley)
API Changes API Changes
1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow

View File

@ -0,0 +1,89 @@
package org.apache.lucene.search;
import org.apache.lucene.search.Filter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermDocs;
import java.util.BitSet;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class PrefixFilter extends Filter {
protected final Term prefix;
public PrefixFilter(Term prefix) {
this.prefix = prefix;
}
public Term getPrefix() { return prefix; }
public BitSet bits(IndexReader reader) throws IOException {
final BitSet bitSet = new BitSet(reader.maxDoc());
new PrefixGenerator(prefix) {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader);
return bitSet;
}
/** Prints a user-readable version of this query. */
public String toString () {
StringBuffer buffer = new StringBuffer();
buffer.append("PrefixFilter(");
buffer.append(prefix.toString());
buffer.append(")");
return buffer.toString();
}
}
// keep this protected until I decide if it's a good way
// to separate id generation from collection (or should
// I just reuse hitcollector???)
interface IdGenerator {
public void generate(IndexReader reader) throws IOException;
public void handleDoc(int doc);
}
abstract class PrefixGenerator implements IdGenerator {
protected final Term prefix;
PrefixGenerator(Term prefix) {
this.prefix = prefix;
}
public void generate(IndexReader reader) throws IOException {
TermEnum enumerator = reader.terms(prefix);
TermDocs termDocs = reader.termDocs();
try {
String prefixText = prefix.text();
String prefixField = prefix.field();
do {
Term term = enumerator.term();
if (term != null &&
term.text().startsWith(prefixText) &&
term.field() == prefixField)
{
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} else {
break;
}
} while (enumerator.next());
} finally {
termDocs.close();
enumerator.close();
}
}
}

View File

@ -0,0 +1,104 @@
package org.apache.lucene.search;
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import junit.framework.TestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
/**
* Tests {@link PrefixFilter} class.
*
* @author Yura Smolsky
* @author yonik
*/
public class TestPrefixFilter extends TestCase {
public void testPrefixFilter() throws Exception {
RAMDirectory directory = new RAMDirectory();
String[] categories = new String[] {"/Computers/Linux",
"/Computers/Mac/One",
"/Computers/Mac/Two",
"/Computers/Windows"};
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
for (int i = 0; i < categories.length; i++) {
Document doc = new Document();
doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.addDocument(doc);
}
writer.close();
// PrefixFilter combined with ConstantScoreQuery
PrefixFilter filter = new PrefixFilter(new Term("category", "/Computers"));
Query query = new ConstantScoreQuery(filter);
IndexSearcher searcher = new IndexSearcher(directory);
Hits hits = searcher.search(query);
assertEquals(4, hits.length());
// test middle of values
filter = new PrefixFilter(new Term("category", "/Computers/Mac"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(2, hits.length());
// test start of values
filter = new PrefixFilter(new Term("category", "/Computers/Linux"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(1, hits.length());
// test end of values
filter = new PrefixFilter(new Term("category", "/Computers/Windows"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(1, hits.length());
// test non-existant
filter = new PrefixFilter(new Term("category", "/Computers/ObsoleteOS"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(0, hits.length());
// test non-existant, before values
filter = new PrefixFilter(new Term("category", "/Computers/AAA"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(0, hits.length());
// test non-existant, after values
filter = new PrefixFilter(new Term("category", "/Computers/ZZZ"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(0, hits.length());
// test zero length prefix
filter = new PrefixFilter(new Term("category", ""));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(4, hits.length());
// test non existent field
filter = new PrefixFilter(new Term("nonexistantfield", "/Computers"));
query = new ConstantScoreQuery(filter);
hits = searcher.search(query);
assertEquals(0, hits.length());
}
}