ConjunctionScorer tests: LUCENE-693

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@467539 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-10-25 01:46:15 +00:00
parent 59d5576068
commit bc23956ebb
1 changed files with 277 additions and 0 deletions

View File

@ -0,0 +1,277 @@
package org.apache.lucene.search;
import junit.framework.TestCase;
import java.util.Random;
import java.util.BitSet;
import java.util.Set;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author yonik
* @version $Id$
*/
public class TestScorerPerf extends TestCase {
Random r = new Random(0);
boolean validate = true; // set to false when doing performance testing
BitSet[] sets;
IndexSearcher s;
public void createDummySearcher() throws Exception {
// Create a dummy index with nothing in it.
// This could possibly fail if Lucene starts checking for docid ranges...
RAMDirectory rd = new RAMDirectory();
IndexWriter iw = new IndexWriter(rd,new WhitespaceAnalyzer(), true);
iw.close();
s = new IndexSearcher(rd);
}
public void createRandomTerms(int nDocs, int nTerms, Directory dir) throws Exception {
IndexWriter iw = new IndexWriter(dir,new WhitespaceAnalyzer(), true);
iw.setMaxBufferedDocs(123);
for (int i=0; i<nDocs; i++) {
Document d = new Document();
for (int j=0; j<nTerms; j++) {
if (r.nextInt(nTerms) <= j) {
d.add(new Field("f", Character.toString((char)j), Field.Store.NO, Field.Index.UN_TOKENIZED));
}
}
iw.addDocument(d);
}
iw.close();
}
public BitSet randBitSet(int sz, int numBitsToSet) {
BitSet set = new BitSet(sz);
for (int i=0; i<numBitsToSet; i++) {
set.set(r.nextInt(sz));
}
return set;
}
public BitSet[] randBitSets(int numSets, int setSize) {
BitSet[] sets = new BitSet[numSets];
for (int i=0; i<sets.length; i++) {
sets[i] = randBitSet(setSize, r.nextInt(setSize));
}
return sets;
}
public static class BitSetFilter extends Filter {
public BitSet set;
public BitSetFilter(BitSet set) {
this.set = set;
}
public BitSet bits(IndexReader reader) throws IOException {
return set;
}
}
public static class CountingHitCollector extends HitCollector {
int count=0;
int sum=0;
public void collect(int doc, float score) {
count++;
sum += doc; // use it to avoid any possibility of being optimized away
}
public int getCount() { return count; }
public int getSum() { return sum; }
}
public static class MatchingHitCollector extends CountingHitCollector {
BitSet answer;
int pos=-1;
public MatchingHitCollector(BitSet answer) {
this.answer = answer;
}
public void collect(int doc, float score) {
pos = answer.nextSetBit(pos+1);
if (pos != doc) {
throw new RuntimeException("Expected doc " + pos + " but got " + doc);
}
super.collect(doc,score);
}
}
BitSet addClause(BooleanQuery bq, BitSet result) {
BitSet rnd = sets[r.nextInt(sets.length)];
Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
bq.add(q, BooleanClause.Occur.MUST);
if (validate) {
if (result==null) result = (BitSet)rnd.clone();
else result.and(rnd);
}
return result;
}
public int doConjunctions(int iter, int maxClauses) throws IOException {
int ret=0;
for (int i=0; i<iter; i++) {
int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitSet result=null;
for (int j=0; j<nClauses; j++) {
result = addClause(bq,result);
}
CountingHitCollector hc = validate ? new MatchingHitCollector(result)
: new CountingHitCollector();
s.search(bq, hc);
ret += hc.getSum();
if (validate) assertEquals(result.cardinality(), hc.getCount());
// System.out.println(hc.getCount());
}
return ret;
}
public int doTermConjunctions(IndexSearcher s,
int termsInIndex,
int maxClauses,
int iter
) throws IOException {
int ret=0;
for (int i=0; i<iter; i++) {
int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitSet terms = new BitSet(termsInIndex);
for (int j=0; j<nClauses; j++) {
int tnum;
// don't pick same clause twice
do {tnum = r.nextInt(termsInIndex);} while (terms.get(tnum));
Query tq = new TermQuery(new Term("f",Character.toString((char)tnum)));
bq.add(tq, BooleanClause.Occur.MUST);
break;
}
CountingHitCollector hc = new CountingHitCollector();
s.search(bq, hc);
ret += hc.getSum();
}
return ret;
}
public int doNestedTermConjunctions(IndexSearcher s,
int termsInIndex,
int maxOuterClauses,
int maxClauses,
int iter
) throws IOException {
int ret=0;
for (int i=0; i<iter; i++) {
int oClauses = r.nextInt(maxOuterClauses-1)+2;
BooleanQuery oq = new BooleanQuery();
for (int o=0; o<oClauses; o++) {
int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
BooleanQuery bq = new BooleanQuery();
BitSet terms = new BitSet(termsInIndex);
for (int j=0; j<nClauses; j++) {
int tnum;
// don't pick same clause twice
do {tnum = r.nextInt(termsInIndex);} while (terms.get(tnum));
Query tq = new TermQuery(new Term("f",Character.toString((char)tnum)));
bq.add(tq, BooleanClause.Occur.MUST);
break;
} // inner
oq.add(bq, BooleanClause.Occur.MUST);
} // outer
CountingHitCollector hc = new CountingHitCollector();
s.search(oq, hc);
ret += hc.getSum();
}
return ret;
}
public void testConjunctions() throws Exception {
// test many small sets... the bugs will be found on boundary conditions
createDummySearcher();
validate=true;
sets=randBitSets(1000,10);
doConjunctions(10000,5);
s.close();
}
/***
public void testConjunctionPerf() throws Exception {
createDummySearcher();
validate=false;
sets=randBitSets(32,1000000);
long start = System.currentTimeMillis();
doConjunctions(500,6);
long end = System.currentTimeMillis();
s.close();
System.out.println("milliseconds="+(end-start));
}
public void testConjunctionTerms() throws Exception {
RAMDirectory dir = new RAMDirectory();
System.out.println("Creating index");
createRandomTerms(100000,25, dir);
s = new IndexSearcher(dir);
System.out.println("Starting performance test");
long start = System.currentTimeMillis();
doTermConjunctions(s,25,5,10000);
long end = System.currentTimeMillis();
s.close();
System.out.println("milliseconds="+(end-start));
}
public void testNestedConjunctionTerms() throws Exception {
RAMDirectory dir = new RAMDirectory();
System.out.println("Creating index");
createRandomTerms(100000,25, dir);
s = new IndexSearcher(dir);
System.out.println("Starting performance test");
long start = System.currentTimeMillis();
doNestedTermConjunctions(s,25,4,6,1000);
long end = System.currentTimeMillis();
s.close();
System.out.println("milliseconds="+(end-start));
}
***/
}