mirror of https://github.com/apache/lucene.git
Add new method IndexReader.setNorm(), to permit altering boosts after an index is created.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150156 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
321b292be9
commit
bd2acf0bf8
|
@ -7,6 +7,12 @@ $Id$
|
|||
1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
|
||||
throw ParseException instead. (Erik Hatcher)
|
||||
|
||||
2. Fixed a NullPointerException in Query.explain(). (Doug Cutting)
|
||||
|
||||
3. Added a new method IndexReader.setNorm(), that permits one to
|
||||
alter the boosting of fields after an index is created.
|
||||
|
||||
|
||||
1.3 RC3
|
||||
|
||||
1. Added minMergeDocs in IndexWriter. This can be raised to speed
|
||||
|
|
|
@ -128,6 +128,9 @@ public class FilterIndexReader extends IndexReader {
|
|||
public void undeleteAll() throws IOException { in.undeleteAll(); }
|
||||
|
||||
public byte[] norms(String f) throws IOException { return in.norms(f); }
|
||||
public void setNorm(int d, String f, byte b) throws IOException {
|
||||
in.setNorm(d,f,b);
|
||||
}
|
||||
|
||||
public TermEnum terms() throws IOException { return in.terms(); }
|
||||
public TermEnum terms(Term t) throws IOException { return in.terms(t); }
|
||||
|
|
|
@ -63,6 +63,7 @@ import org.apache.lucene.store.FSDirectory;
|
|||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field; // for javadoc
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
/** IndexReader is an abstract class, providing an interface for accessing an
|
||||
index. Search of an index is done entirely through this abstract interface,
|
||||
|
@ -271,6 +272,30 @@ public abstract class IndexReader {
|
|||
*/
|
||||
public abstract byte[] norms(String field) throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document. The norm represents the product of the field's {@link
|
||||
* Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
|
||||
* int) length normalization}. Thus, to preserve the length normalization
|
||||
* values when resetting this, one should base the new value upon the old.
|
||||
*
|
||||
* @see #norms(String)
|
||||
* @see Similarity#decodeNorm(byte)
|
||||
*/
|
||||
public abstract void setNorm(int doc, String field, byte value)
|
||||
throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document.
|
||||
*
|
||||
* @see #norms(String)
|
||||
* @see Similarity#decodeNorm(byte)
|
||||
*/
|
||||
public void setNorm(int doc, String field, float value)
|
||||
throws IOException {
|
||||
setNorm(doc, field, Similarity.encodeNorm(value));
|
||||
}
|
||||
|
||||
|
||||
/** Returns an enumeration of all the terms in the index.
|
||||
The enumeration is ordered by Term.compareTo(). Each term
|
||||
is greater than all that precede it in the enumeration.
|
||||
|
|
|
@ -64,6 +64,7 @@ import java.util.Vector;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.InputStream;
|
||||
import org.apache.lucene.store.OutputStream;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BitVector;
|
||||
|
@ -84,6 +85,7 @@ final class SegmentReader extends IndexReader {
|
|||
|
||||
BitVector deletedDocs = null;
|
||||
private boolean deletedDocsDirty = false;
|
||||
private boolean normsDirty = false;
|
||||
|
||||
InputStream freqStream;
|
||||
InputStream proxStream;
|
||||
|
@ -91,10 +93,25 @@ final class SegmentReader extends IndexReader {
|
|||
// Compound File Reader when based on a compound file segment
|
||||
CompoundFileReader cfsReader;
|
||||
|
||||
private static class Norm {
|
||||
private class Norm {
|
||||
public Norm(InputStream in) { this.in = in; }
|
||||
public InputStream in;
|
||||
public byte[] bytes;
|
||||
|
||||
private InputStream in;
|
||||
private byte[] bytes;
|
||||
private boolean dirty;
|
||||
|
||||
private void reWrite(String name) throws IOException {
|
||||
// NOTE: norms are re-written in regular directory, not cfs
|
||||
OutputStream out = directory().createFile(segment + ".tmp");
|
||||
try {
|
||||
out.writeBytes(bytes, maxDoc());
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
String fileName = segment + ".f" + fieldInfos.fieldNumber(name);
|
||||
directory().renameFile(segment + ".tmp", fileName);
|
||||
this.dirty = false;
|
||||
}
|
||||
}
|
||||
private Hashtable norms = new Hashtable();
|
||||
|
||||
|
@ -135,13 +152,29 @@ final class SegmentReader extends IndexReader {
|
|||
}
|
||||
|
||||
protected final synchronized void doClose() throws IOException {
|
||||
if (deletedDocsDirty) {
|
||||
if (deletedDocsDirty || normsDirty) {
|
||||
synchronized (directory()) { // in- & inter-process sync
|
||||
new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
|
||||
IndexWriter.COMMIT_LOCK_TIMEOUT) {
|
||||
public Object doBody() throws IOException {
|
||||
deletedDocs.write(directory(), segment + ".tmp");
|
||||
directory().renameFile(segment + ".tmp", segment + ".del");
|
||||
|
||||
if (deletedDocsDirty) { // re-write deleted
|
||||
deletedDocs.write(directory(), segment + ".tmp");
|
||||
directory().renameFile(segment + ".tmp", segment + ".del");
|
||||
}
|
||||
|
||||
if (normsDirty) { // re-write norms
|
||||
Enumeration keys = norms.keys();
|
||||
Enumeration values = norms.elements();
|
||||
while (values.hasMoreElements()) {
|
||||
String field = (String)keys.nextElement();
|
||||
Norm norm = (Norm)values.nextElement();
|
||||
if (norm.dirty) {
|
||||
norm.reWrite(field);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(segmentInfos != null)
|
||||
segmentInfos.write(directory());
|
||||
else
|
||||
|
@ -151,6 +184,7 @@ final class SegmentReader extends IndexReader {
|
|||
}.run();
|
||||
}
|
||||
deletedDocsDirty = false;
|
||||
normsDirty = false;
|
||||
}
|
||||
|
||||
fieldsReader.close();
|
||||
|
@ -189,7 +223,7 @@ final class SegmentReader extends IndexReader {
|
|||
deletedDocs.set(docNum);
|
||||
}
|
||||
|
||||
public void undeleteAll() throws IOException {
|
||||
public synchronized void undeleteAll() throws IOException {
|
||||
synchronized (directory()) { // in- & inter-process sync
|
||||
new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
|
||||
IndexWriter.COMMIT_LOCK_TIMEOUT) {
|
||||
|
@ -299,44 +333,60 @@ final class SegmentReader extends IndexReader {
|
|||
return fieldSet;
|
||||
}
|
||||
|
||||
public final byte[] norms(String field) throws IOException {
|
||||
public synchronized byte[] norms(String field) throws IOException {
|
||||
Norm norm = (Norm)norms.get(field);
|
||||
if (norm == null)
|
||||
if (norm == null) // not an indexed field
|
||||
return null;
|
||||
if (norm.bytes == null) {
|
||||
if (norm.bytes == null) { // value not yet read
|
||||
byte[] bytes = new byte[maxDoc()];
|
||||
norms(field, bytes, 0);
|
||||
norm.bytes = bytes;
|
||||
norm.bytes = bytes; // cache it
|
||||
}
|
||||
return norm.bytes;
|
||||
}
|
||||
|
||||
final void norms(String field, byte[] bytes, int offset) throws IOException {
|
||||
InputStream normStream = normStream(field);
|
||||
if (normStream == null)
|
||||
public synchronized void setNorm(int doc, String field, byte value)
|
||||
throws IOException {
|
||||
Norm norm = (Norm)norms.get(field);
|
||||
if (norm == null) // not an indexed field
|
||||
return;
|
||||
norm.dirty = true; // mark it dirty
|
||||
normsDirty = true;
|
||||
|
||||
norms(field)[doc] = value; // set the value
|
||||
}
|
||||
|
||||
/** Read norms into a pre-allocated array. */
|
||||
synchronized void norms(String field, byte[] bytes, int offset)
|
||||
throws IOException {
|
||||
|
||||
Norm norm = (Norm)norms.get(field);
|
||||
if (norm == null)
|
||||
return; // use zeros in array
|
||||
try {
|
||||
|
||||
if (norm.bytes != null) { // can copy from cache
|
||||
System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
|
||||
return;
|
||||
}
|
||||
|
||||
InputStream normStream = (InputStream)norm.in.clone();
|
||||
try { // read from disk
|
||||
normStream.seek(0);
|
||||
normStream.readBytes(bytes, offset, maxDoc());
|
||||
} finally {
|
||||
normStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
final InputStream normStream(String field) throws IOException {
|
||||
Norm norm = (Norm)norms.get(field);
|
||||
if (norm == null)
|
||||
return null;
|
||||
InputStream result = (InputStream)norm.in.clone();
|
||||
result.seek(0);
|
||||
return result;
|
||||
}
|
||||
|
||||
private final void openNorms(Directory useDir) throws IOException {
|
||||
private final void openNorms(Directory cfsDir) throws IOException {
|
||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||
if (fi.isIndexed)
|
||||
norms.put(fi.name,
|
||||
new Norm(useDir.openFile(segment + ".f" + fi.number)));
|
||||
if (fi.isIndexed) {
|
||||
String fileName = segment + ".f" + fi.number;
|
||||
// look first for re-written file, then in compound format
|
||||
Directory d = directory().fileExists(fileName) ? directory() : cfsDir;
|
||||
norms.put(fi.name, new Norm(d.openFile(fileName)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -165,6 +165,13 @@ final class SegmentsReader extends IndexReader
|
|||
return bytes;
|
||||
}
|
||||
|
||||
public synchronized void setNorm(int n, String field, byte value)
|
||||
throws IOException {
|
||||
normsCache.remove(field); // clear cache
|
||||
int i = readerIndex(n); // find segment num
|
||||
readers[i].setNorm(n-starts[i], field, value); // dispatch
|
||||
}
|
||||
|
||||
public final TermEnum terms() throws IOException {
|
||||
return new SegmentsTermEnum(readers, starts, null);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2001 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache Lucene" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache Lucene", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
||||
/** Document boost unit test.
|
||||
*
|
||||
* @author Doug Cutting
|
||||
* @version $Revision$
|
||||
*/
|
||||
public class TestSetNorm extends TestCase {
|
||||
public TestSetNorm(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public void testSetNorm() throws Exception {
|
||||
RAMDirectory store = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
|
||||
|
||||
// add the same document four times
|
||||
Field f1 = Field.Text("field", "word");
|
||||
Document d1 = new Document();
|
||||
d1.add(f1);
|
||||
writer.addDocument(d1);
|
||||
writer.addDocument(d1);
|
||||
writer.addDocument(d1);
|
||||
writer.addDocument(d1);
|
||||
writer.close();
|
||||
|
||||
// reset the boost of each instance of this document
|
||||
IndexReader reader = IndexReader.open(store);
|
||||
reader.setNorm(0, "field", 1.0f);
|
||||
reader.setNorm(1, "field", 2.0f);
|
||||
reader.setNorm(2, "field", 4.0f);
|
||||
reader.setNorm(3, "field", 16.0f);
|
||||
reader.close();
|
||||
|
||||
// check that searches are ordered by this boost
|
||||
final float[] scores = new float[4];
|
||||
|
||||
new IndexSearcher(store).search
|
||||
(new TermQuery(new Term("field", "word")),
|
||||
new HitCollector() {
|
||||
public final void collect(int doc, float score) {
|
||||
scores[doc] = score;
|
||||
}
|
||||
});
|
||||
|
||||
float lastScore = 0.0f;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
assertTrue(scores[i] > lastScore);
|
||||
lastScore = scores[i];
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue