From efab233ed8c4ac7095d705755492e7c96e05d34b Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Thu, 5 Oct 2006 21:10:30 +0000 Subject: [PATCH] new PrefixFilter from Solr: LUCENE-676 git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@453381 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../apache/lucene/search/PrefixFilter.java | 89 +++++++++++++++ .../lucene/search/TestPrefixFilter.java | 104 ++++++++++++++++++ 3 files changed, 195 insertions(+) create mode 100755 src/java/org/apache/lucene/search/PrefixFilter.java create mode 100644 src/test/org/apache/lucene/search/TestPrefixFilter.java diff --git a/CHANGES.txt b/CHANGES.txt index f7a92c8721e..7059234a40b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -31,6 +31,8 @@ New features New Fieldable interface for use with the lazy field loading mechanism. (Grant Ingersoll and Chuck Williams via Grant Ingersoll) + 3. LUCENE-676: Move Solr's PrefixFilter to Lucene core. (Yura Smolsky, Yonik Seeley) + API Changes 1. LUCENE-438: Remove "final" from Token, implement Cloneable, allow diff --git a/src/java/org/apache/lucene/search/PrefixFilter.java b/src/java/org/apache/lucene/search/PrefixFilter.java new file mode 100755 index 00000000000..9771791954b --- /dev/null +++ b/src/java/org/apache/lucene/search/PrefixFilter.java @@ -0,0 +1,89 @@ +package org.apache.lucene.search; + +import org.apache.lucene.search.Filter; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.TermDocs; + +import java.util.BitSet; +import java.io.IOException; + +/** + * @author yonik + * @version $Id$ + */ +public class PrefixFilter extends Filter { + protected final Term prefix; + + public PrefixFilter(Term prefix) { + this.prefix = prefix; + } + + public Term getPrefix() { return prefix; } + + public BitSet bits(IndexReader reader) throws IOException { + final BitSet bitSet = new BitSet(reader.maxDoc()); + new PrefixGenerator(prefix) { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader); + return bitSet; + } + + /** Prints a user-readable version of this query. */ + public String toString () { + StringBuffer buffer = new StringBuffer(); + buffer.append("PrefixFilter("); + buffer.append(prefix.toString()); + buffer.append(")"); + return buffer.toString(); + } +} + +// keep this protected until I decide if it's a good way +// to separate id generation from collection (or should +// I just reuse hitcollector???) +interface IdGenerator { + public void generate(IndexReader reader) throws IOException; + public void handleDoc(int doc); +} + + +abstract class PrefixGenerator implements IdGenerator { + protected final Term prefix; + + PrefixGenerator(Term prefix) { + this.prefix = prefix; + } + + public void generate(IndexReader reader) throws IOException { + TermEnum enumerator = reader.terms(prefix); + TermDocs termDocs = reader.termDocs(); + + try { + + String prefixText = prefix.text(); + String prefixField = prefix.field(); + do { + Term term = enumerator.term(); + if (term != null && + term.text().startsWith(prefixText) && + term.field() == prefixField) + { + termDocs.seek(term); + while (termDocs.next()) { + handleDoc(termDocs.doc()); + } + } else { + break; + } + } while (enumerator.next()); + } finally { + termDocs.close(); + enumerator.close(); + } + } +} + diff --git a/src/test/org/apache/lucene/search/TestPrefixFilter.java b/src/test/org/apache/lucene/search/TestPrefixFilter.java new file mode 100644 index 00000000000..0af40698a8c --- /dev/null +++ b/src/test/org/apache/lucene/search/TestPrefixFilter.java @@ -0,0 +1,104 @@ +package org.apache.lucene.search; + +/** + * Copyright 2006 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import junit.framework.TestCase; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +/** + * Tests {@link PrefixFilter} class. + * + * @author Yura Smolsky + * @author yonik + */ +public class TestPrefixFilter extends TestCase { + public void testPrefixFilter() throws Exception { + RAMDirectory directory = new RAMDirectory(); + + String[] categories = new String[] {"/Computers/Linux", + "/Computers/Mac/One", + "/Computers/Mac/Two", + "/Computers/Windows"}; + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); + for (int i = 0; i < categories.length; i++) { + Document doc = new Document(); + doc.add(new Field("category", categories[i], Field.Store.YES, Field.Index.UN_TOKENIZED)); + writer.addDocument(doc); + } + writer.close(); + + // PrefixFilter combined with ConstantScoreQuery + PrefixFilter filter = new PrefixFilter(new Term("category", "/Computers")); + Query query = new ConstantScoreQuery(filter); + IndexSearcher searcher = new IndexSearcher(directory); + Hits hits = searcher.search(query); + assertEquals(4, hits.length()); + + // test middle of values + filter = new PrefixFilter(new Term("category", "/Computers/Mac")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(2, hits.length()); + + // test start of values + filter = new PrefixFilter(new Term("category", "/Computers/Linux")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(1, hits.length()); + + // test end of values + filter = new PrefixFilter(new Term("category", "/Computers/Windows")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(1, hits.length()); + + // test non-existant + filter = new PrefixFilter(new Term("category", "/Computers/ObsoleteOS")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // test non-existant, before values + filter = new PrefixFilter(new Term("category", "/Computers/AAA")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // test non-existant, after values + filter = new PrefixFilter(new Term("category", "/Computers/ZZZ")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(0, hits.length()); + + // test zero length prefix + filter = new PrefixFilter(new Term("category", "")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(4, hits.length()); + + // test non existent field + filter = new PrefixFilter(new Term("nonexistantfield", "/Computers")); + query = new ConstantScoreQuery(filter); + hits = searcher.search(query); + assertEquals(0, hits.length()); + } +}