mirror of https://github.com/apache/lucene.git
norms test: LUCENE-756
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@493897 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c9795dd56b
commit
0971e23550
|
@ -0,0 +1,224 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.Field.Index;
|
||||||
|
import org.apache.lucene.document.Field.Store;
|
||||||
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
|
import org.apache.lucene.search.Similarity;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that norms info is preserved during index life - incluidng seprate norms, addDocument, addIndexes, optimize.
|
||||||
|
*/
|
||||||
|
public class TestNorms extends TestCase {
|
||||||
|
|
||||||
|
private class SimilarityOne extends DefaultSimilarity {
|
||||||
|
public float lengthNorm(String fieldName, int numTerms) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int NUM_FIELDS = 10;
|
||||||
|
|
||||||
|
private Similarity similarityOne;
|
||||||
|
private Analyzer anlzr;
|
||||||
|
private int numDocNorms;
|
||||||
|
private ArrayList norms;
|
||||||
|
private ArrayList modifiedNorms;
|
||||||
|
private float lastNorm = 0;
|
||||||
|
private float normDelta = (float) 0.001;
|
||||||
|
|
||||||
|
public TestNorms(String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setUp() throws IOException {
|
||||||
|
similarityOne = new SimilarityOne();
|
||||||
|
anlzr = new StandardAnalyzer();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void tearDown() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that norms values are preserved as the index is maintained.
|
||||||
|
* Including separate norms.
|
||||||
|
* Including merging indexes with seprate norms.
|
||||||
|
* Including optimize.
|
||||||
|
*/
|
||||||
|
public void testNorms() throws IOException {
|
||||||
|
// tmp dir
|
||||||
|
String tempDir = System.getProperty("java.io.tmpdir");
|
||||||
|
if (tempDir == null) {
|
||||||
|
throw new IOException("java.io.tmpdir undefined, cannot run test");
|
||||||
|
}
|
||||||
|
|
||||||
|
// test with a single index: index1
|
||||||
|
File indexDir1 = new File(tempDir, "lucenetestindex1");
|
||||||
|
Directory dir1 = FSDirectory.getDirectory(indexDir1, true);
|
||||||
|
|
||||||
|
norms = new ArrayList();
|
||||||
|
modifiedNorms = new ArrayList();
|
||||||
|
|
||||||
|
createIndex(dir1);
|
||||||
|
doTestNorms(dir1);
|
||||||
|
|
||||||
|
// test with a single index: index2
|
||||||
|
ArrayList norms1 = norms;
|
||||||
|
ArrayList modifiedNorms1 = modifiedNorms;
|
||||||
|
int numDocNorms1 = numDocNorms;
|
||||||
|
|
||||||
|
norms = new ArrayList();
|
||||||
|
modifiedNorms = new ArrayList();
|
||||||
|
numDocNorms = 0;
|
||||||
|
|
||||||
|
File indexDir2 = new File(tempDir, "lucenetestindex2");
|
||||||
|
Directory dir2 = FSDirectory.getDirectory(indexDir2, true);
|
||||||
|
|
||||||
|
createIndex(dir2);
|
||||||
|
doTestNorms(dir2);
|
||||||
|
|
||||||
|
// add index1 and index2 to a third index: index3
|
||||||
|
File indexDir3 = new File(tempDir, "lucenetestindex3");
|
||||||
|
Directory dir3 = FSDirectory.getDirectory(indexDir3, true);
|
||||||
|
|
||||||
|
createIndex(dir3);
|
||||||
|
IndexWriter iw = new IndexWriter(dir3,anlzr,false);
|
||||||
|
iw.setMaxBufferedDocs(5);
|
||||||
|
iw.setMergeFactor(3);
|
||||||
|
iw.addIndexes(new Directory[]{dir1,dir2});
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
norms1.addAll(norms);
|
||||||
|
norms = norms1;
|
||||||
|
modifiedNorms1.addAll(modifiedNorms);
|
||||||
|
modifiedNorms = modifiedNorms1;
|
||||||
|
numDocNorms += numDocNorms1;
|
||||||
|
|
||||||
|
// test with index3
|
||||||
|
verifyIndex(dir3);
|
||||||
|
doTestNorms(dir3);
|
||||||
|
|
||||||
|
// now with optimize
|
||||||
|
iw = new IndexWriter(dir3,anlzr,false);
|
||||||
|
iw.setMaxBufferedDocs(5);
|
||||||
|
iw.setMergeFactor(3);
|
||||||
|
iw.optimize();
|
||||||
|
iw.close();
|
||||||
|
verifyIndex(dir3);
|
||||||
|
|
||||||
|
dir1.close();
|
||||||
|
dir2.close();
|
||||||
|
dir3.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestNorms(Directory dir) throws IOException {
|
||||||
|
for (int i=0; i<5; i++) {
|
||||||
|
addDocs(dir,12,true);
|
||||||
|
verifyIndex(dir);
|
||||||
|
modifyNormsForF1(dir);
|
||||||
|
verifyIndex(dir);
|
||||||
|
addDocs(dir,12,false);
|
||||||
|
verifyIndex(dir);
|
||||||
|
modifyNormsForF1(dir);
|
||||||
|
verifyIndex(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createIndex(Directory dir) throws IOException {
|
||||||
|
IndexWriter iw = new IndexWriter(dir,anlzr,true);
|
||||||
|
iw.setMaxBufferedDocs(5);
|
||||||
|
iw.setMergeFactor(3);
|
||||||
|
iw.setSimilarity(similarityOne);
|
||||||
|
iw.setUseCompoundFile(true);
|
||||||
|
iw.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void modifyNormsForF1(Directory dir) throws IOException {
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
int n = ir.maxDoc();
|
||||||
|
for (int i = 0; i < n; i+=3) { // modify for every third doc
|
||||||
|
int k = (i*3) % modifiedNorms.size();
|
||||||
|
float origNorm = ((Float)modifiedNorms.get(i)).floatValue();
|
||||||
|
float newNorm = ((Float)modifiedNorms.get(k)).floatValue();
|
||||||
|
//System.out.println("Modifying: for "+i+" from "+origNorm+" to "+newNorm);
|
||||||
|
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
|
||||||
|
modifiedNorms.set(i, new Float(newNorm));
|
||||||
|
modifiedNorms.set(k, new Float(origNorm));
|
||||||
|
ir.setNorm(i, "f"+1, newNorm);
|
||||||
|
ir.setNorm(k, "f"+1, origNorm);
|
||||||
|
}
|
||||||
|
ir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void verifyIndex(Directory dir) throws IOException {
|
||||||
|
IndexReader ir = IndexReader.open(dir);
|
||||||
|
for (int i = 0; i < NUM_FIELDS; i++) {
|
||||||
|
String field = "f"+i;
|
||||||
|
byte b[] = ir.norms(field);
|
||||||
|
assertEquals("number of norms mismatches",numDocNorms,b.length);
|
||||||
|
ArrayList storedNorms = (i==1 ? modifiedNorms : norms);
|
||||||
|
for (int j = 0; j < b.length; j++) {
|
||||||
|
float norm = Similarity.decodeNorm(b[j]);
|
||||||
|
float norm1 = ((Float)storedNorms.get(j)).floatValue();
|
||||||
|
assertEquals("stored norm value of "+field+" for doc "+j+" is "+norm+" - a mismatch!", norm, norm1, 0.000001);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDocs(Directory dir, int ndocs, boolean compound) throws IOException {
|
||||||
|
IndexWriter iw = new IndexWriter(dir,anlzr,false);
|
||||||
|
iw.setMaxBufferedDocs(5);
|
||||||
|
iw.setMergeFactor(3);
|
||||||
|
iw.setSimilarity(similarityOne);
|
||||||
|
iw.setUseCompoundFile(compound);
|
||||||
|
for (int i = 0; i < ndocs; i++) {
|
||||||
|
iw.addDocument(newDoc());
|
||||||
|
}
|
||||||
|
iw.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
// create the next document
|
||||||
|
private Document newDoc() {
|
||||||
|
Document d = new Document();
|
||||||
|
float boost = nextNorm();
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
Field f = new Field("f"+i,"v"+i,Store.NO,Index.UN_TOKENIZED);
|
||||||
|
f.setBoost(boost);
|
||||||
|
d.add(f);
|
||||||
|
}
|
||||||
|
return d;
|
||||||
|
}
|
||||||
|
|
||||||
|
// return unique norm values that are unchanged by encoding/decoding
|
||||||
|
private float nextNorm() {
|
||||||
|
float norm = lastNorm + normDelta;
|
||||||
|
do {
|
||||||
|
float norm1 = Similarity.decodeNorm(Similarity.encodeNorm(norm));
|
||||||
|
if (norm1 > lastNorm) {
|
||||||
|
//System.out.println(norm1+" > "+lastNorm);
|
||||||
|
norm = norm1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
norm += normDelta;
|
||||||
|
} while (true);
|
||||||
|
norms.add(numDocNorms, new Float(norm));
|
||||||
|
modifiedNorms.add(numDocNorms, new Float(norm));
|
||||||
|
//System.out.println("creating norm("+numDocNorms+"): "+norm);
|
||||||
|
numDocNorms ++;
|
||||||
|
lastNorm = (norm>10 ? 0 : norm); //there's a limit to how many distinct values can be stored in a ingle byte
|
||||||
|
return norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue