Add new method IndexReader.setNorm(), to permit altering boosts after an index is created.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150156 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2003-12-15 23:04:42 +00:00
parent 321b292be9
commit bd2acf0bf8
6 changed files with 237 additions and 28 deletions

View File

@ -7,6 +7,12 @@ $Id$
1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
throw ParseException instead. (Erik Hatcher)
2. Fixed a NullPointerException in Query.explain(). (Doug Cutting)
3. Added a new method IndexReader.setNorm(), that permits one to
alter the boosting of fields after an index is created.
1.3 RC3
1. Added minMergeDocs in IndexWriter. This can be raised to speed

View File

@ -128,6 +128,9 @@ public class FilterIndexReader extends IndexReader {
public void undeleteAll() throws IOException { in.undeleteAll(); }
public byte[] norms(String f) throws IOException { return in.norms(f); }
public void setNorm(int d, String f, byte b) throws IOException {
in.setNorm(d,f,b);
}
public TermEnum terms() throws IOException { return in.terms(); }
public TermEnum terms(Term t) throws IOException { return in.terms(t); }

View File

@ -63,6 +63,7 @@ import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; // for javadoc
import org.apache.lucene.search.Similarity;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
@ -271,6 +272,30 @@ public abstract class IndexReader {
*/
public abstract byte[] norms(String field) throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
* Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
* int) length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
* @see #norms(String)
* @see Similarity#decodeNorm(byte)
*/
public abstract void setNorm(int doc, String field, byte value)
throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document.
*
* @see #norms(String)
* @see Similarity#decodeNorm(byte)
*/
public void setNorm(int doc, String field, float value)
throws IOException {
setNorm(doc, field, Similarity.encodeNorm(value));
}
/** Returns an enumeration of all the terms in the index.
The enumeration is ordered by Term.compareTo(). Each term
is greater than all that precede it in the enumeration.

View File

@ -64,6 +64,7 @@ import java.util.Vector;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.InputStream;
import org.apache.lucene.store.OutputStream;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
@ -84,6 +85,7 @@ final class SegmentReader extends IndexReader {
BitVector deletedDocs = null;
private boolean deletedDocsDirty = false;
private boolean normsDirty = false;
InputStream freqStream;
InputStream proxStream;
@ -91,10 +93,25 @@ final class SegmentReader extends IndexReader {
// Compound File Reader when based on a compound file segment
CompoundFileReader cfsReader;
private static class Norm {
private class Norm {
public Norm(InputStream in) { this.in = in; }
public InputStream in;
public byte[] bytes;
private InputStream in;
private byte[] bytes;
private boolean dirty;
private void reWrite(String name) throws IOException {
// NOTE: norms are re-written in regular directory, not cfs
OutputStream out = directory().createFile(segment + ".tmp");
try {
out.writeBytes(bytes, maxDoc());
} finally {
out.close();
}
String fileName = segment + ".f" + fieldInfos.fieldNumber(name);
directory().renameFile(segment + ".tmp", fileName);
this.dirty = false;
}
}
private Hashtable norms = new Hashtable();
@ -135,13 +152,29 @@ final class SegmentReader extends IndexReader {
}
protected final synchronized void doClose() throws IOException {
if (deletedDocsDirty) {
if (deletedDocsDirty || normsDirty) {
synchronized (directory()) { // in- & inter-process sync
new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
IndexWriter.COMMIT_LOCK_TIMEOUT) {
public Object doBody() throws IOException {
deletedDocs.write(directory(), segment + ".tmp");
directory().renameFile(segment + ".tmp", segment + ".del");
if (deletedDocsDirty) { // re-write deleted
deletedDocs.write(directory(), segment + ".tmp");
directory().renameFile(segment + ".tmp", segment + ".del");
}
if (normsDirty) { // re-write norms
Enumeration keys = norms.keys();
Enumeration values = norms.elements();
while (values.hasMoreElements()) {
String field = (String)keys.nextElement();
Norm norm = (Norm)values.nextElement();
if (norm.dirty) {
norm.reWrite(field);
}
}
}
if(segmentInfos != null)
segmentInfos.write(directory());
else
@ -151,6 +184,7 @@ final class SegmentReader extends IndexReader {
}.run();
}
deletedDocsDirty = false;
normsDirty = false;
}
fieldsReader.close();
@ -189,7 +223,7 @@ final class SegmentReader extends IndexReader {
deletedDocs.set(docNum);
}
public void undeleteAll() throws IOException {
public synchronized void undeleteAll() throws IOException {
synchronized (directory()) { // in- & inter-process sync
new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
IndexWriter.COMMIT_LOCK_TIMEOUT) {
@ -299,44 +333,60 @@ final class SegmentReader extends IndexReader {
return fieldSet;
}
public final byte[] norms(String field) throws IOException {
public synchronized byte[] norms(String field) throws IOException {
Norm norm = (Norm)norms.get(field);
if (norm == null)
if (norm == null) // not an indexed field
return null;
if (norm.bytes == null) {
if (norm.bytes == null) { // value not yet read
byte[] bytes = new byte[maxDoc()];
norms(field, bytes, 0);
norm.bytes = bytes;
norm.bytes = bytes; // cache it
}
return norm.bytes;
}
final void norms(String field, byte[] bytes, int offset) throws IOException {
InputStream normStream = normStream(field);
if (normStream == null)
public synchronized void setNorm(int doc, String field, byte value)
throws IOException {
Norm norm = (Norm)norms.get(field);
if (norm == null) // not an indexed field
return;
norm.dirty = true; // mark it dirty
normsDirty = true;
norms(field)[doc] = value; // set the value
}
/** Read norms into a pre-allocated array. */
synchronized void norms(String field, byte[] bytes, int offset)
throws IOException {
Norm norm = (Norm)norms.get(field);
if (norm == null)
return; // use zeros in array
try {
if (norm.bytes != null) { // can copy from cache
System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
return;
}
InputStream normStream = (InputStream)norm.in.clone();
try { // read from disk
normStream.seek(0);
normStream.readBytes(bytes, offset, maxDoc());
} finally {
normStream.close();
}
}
final InputStream normStream(String field) throws IOException {
Norm norm = (Norm)norms.get(field);
if (norm == null)
return null;
InputStream result = (InputStream)norm.in.clone();
result.seek(0);
return result;
}
private final void openNorms(Directory useDir) throws IOException {
private final void openNorms(Directory cfsDir) throws IOException {
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed)
norms.put(fi.name,
new Norm(useDir.openFile(segment + ".f" + fi.number)));
if (fi.isIndexed) {
String fileName = segment + ".f" + fi.number;
// look first for re-written file, then in compound format
Directory d = directory().fileExists(fileName) ? directory() : cfsDir;
norms.put(fi.name, new Norm(d.openFile(fileName)));
}
}
}

View File

@ -165,6 +165,13 @@ final class SegmentsReader extends IndexReader
return bytes;
}
public synchronized void setNorm(int n, String field, byte value)
throws IOException {
normsCache.remove(field); // clear cache
int i = readerIndex(n); // find segment num
readers[i].setNorm(n-starts[i], field, value); // dispatch
}
public final TermEnum terms() throws IOException {
return new SegmentsTermEnum(readers, starts, null);
}

View File

@ -0,0 +1,118 @@
package org.apache.lucene.search;
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache Lucene" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache Lucene", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
import junit.framework.TestCase;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
/** Document boost unit test.
*
* @author Doug Cutting
* @version $Revision$
*/
public class TestSetNorm extends TestCase {
public TestSetNorm(String name) {
super(name);
}
public void testSetNorm() throws Exception {
RAMDirectory store = new RAMDirectory();
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
// add the same document four times
Field f1 = Field.Text("field", "word");
Document d1 = new Document();
d1.add(f1);
writer.addDocument(d1);
writer.addDocument(d1);
writer.addDocument(d1);
writer.addDocument(d1);
writer.close();
// reset the boost of each instance of this document
IndexReader reader = IndexReader.open(store);
reader.setNorm(0, "field", 1.0f);
reader.setNorm(1, "field", 2.0f);
reader.setNorm(2, "field", 4.0f);
reader.setNorm(3, "field", 16.0f);
reader.close();
// check that searches are ordered by this boost
final float[] scores = new float[4];
new IndexSearcher(store).search
(new TermQuery(new Term("field", "word")),
new HitCollector() {
public final void collect(int doc, float score) {
scores[doc] = score;
}
});
float lastScore = 0.0f;
for (int i = 0; i < 4; i++) {
assertTrue(scores[i] > lastScore);
lastScore = scores[i];
}
}
}