Applied trejkaz's patch from https://issues.apache.org/jira/browse/LUCENE-1240 to optimise TermFilter.java and included new JUnit test

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@638631 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Harwood 2008-03-18 23:01:00 +00:00
parent 4bd9dba46f
commit c172010da0
2 changed files with 95 additions and 16 deletions

View File

@ -18,9 +18,10 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@ -37,7 +38,7 @@ import org.apache.lucene.index.TermDocs;
*/
public class TermsFilter extends Filter
{
ArrayList termsList=new ArrayList();
Set terms=new TreeSet();
/**
* Adds a term to the list of acceptable terms
@ -45,7 +46,7 @@ public class TermsFilter extends Filter
*/
public void addTerm(Term term)
{
termsList.add(term);
terms.add(term);
}
/* (non-Javadoc)
@ -54,16 +55,24 @@ public class TermsFilter extends Filter
public BitSet bits(IndexReader reader) throws IOException
{
BitSet result=new BitSet(reader.maxDoc());
for (Iterator iter = termsList.iterator(); iter.hasNext();)
{
Term term = (Term) iter.next();
TermDocs td=reader.termDocs(term);
while (td.next())
{
result.set(td.doc());
}
}
return result;
TermDocs td = reader.termDocs();
try
{
for (Iterator iter = terms.iterator(); iter.hasNext();)
{
Term term = (Term) iter.next();
td.seek(term);
while (td.next())
{
result.set(td.doc());
}
}
}
finally
{
td.close();
}
return result;
}
public boolean equals(Object obj)
@ -73,14 +82,14 @@ public class TermsFilter extends Filter
if((obj == null) || (obj.getClass() != this.getClass()))
return false;
TermsFilter test = (TermsFilter)obj;
return (termsList == test.termsList||
(termsList!= null && termsList.equals(test.termsList)));
return (terms == test.terms ||
(terms != null && terms.equals(test.terms)));
}
public int hashCode()
{
int hash=9;
for (Iterator iter = termsList.iterator(); iter.hasNext();)
for (Iterator iter = terms.iterator(); iter.hasNext();)
{
Term term = (Term) iter.next();
hash = 31 * hash + term.hashCode();

View File

@ -0,0 +1,70 @@
package org.apache.lucene.search;
import java.util.BitSet;
import java.util.HashSet;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.RAMDirectory;
public class TermsFilterTest extends TestCase
{
public void testCachability() throws Exception
{
TermsFilter a=new TermsFilter();
a.addTerm(new Term("field1","a"));
a.addTerm(new Term("field1","b"));
HashSet cachedFilters=new HashSet();
cachedFilters.add(a);
TermsFilter b=new TermsFilter();
b.addTerm(new Term("field1","a"));
b.addTerm(new Term("field1","b"));
assertTrue("Must be cached",cachedFilters.contains(b));
b.addTerm(new Term("field1","a")); //duplicate term
assertTrue("Must be cached",cachedFilters.contains(b));
b.addTerm(new Term("field1","c"));
assertFalse("Must not be cached",cachedFilters.contains(b));
}
public void testMissingTerms() throws Exception
{
String fieldName="field1";
RAMDirectory rd=new RAMDirectory();
IndexWriter w=new IndexWriter(rd,new WhitespaceAnalyzer(),MaxFieldLength.UNLIMITED);
for (int i = 0; i < 100; i++)
{
Document doc=new Document();
int term=i*10; //terms are units of 10;
doc.add(new Field(fieldName,""+term,Field.Store.YES,Field.Index.UN_TOKENIZED));
w.addDocument(doc);
}
w.close();
IndexReader reader = IndexReader.open(rd);
TermsFilter tf=new TermsFilter();
tf.addTerm(new Term(fieldName,"19"));
BitSet bits = tf.bits(reader);
assertEquals("Must match nothing", 0, bits.cardinality());
tf.addTerm(new Term(fieldName,"20"));
bits=tf.bits(reader);
assertEquals("Must match 1", 1, bits.cardinality());
tf.addTerm(new Term(fieldName,"10"));
bits=tf.bits(reader);
assertEquals("Must match 2", 2, bits.cardinality());
tf.addTerm(new Term(fieldName,"00"));
bits=tf.bits(reader);
assertEquals("Must match 2", 2, bits.cardinality());
}
}