LUCENE-1604: allow IndexReader.norms to return null if the field has no norms; this is off by default but will be hardwired to on in 3.0

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@769524 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-04-28 20:38:47 +00:00
parent f3189d8063
commit 72d1873533
19 changed files with 169 additions and 78 deletions

View File

@ -50,6 +50,13 @@ Changes in runtime behavior
...
</code>
3. LUCENE-1604: IndexReader.norms(String field) is now allowed to
return null if the field has no norms, as long as you've
previously called IndexReader.setDisableFakeNorms(true). This
setting now defaults to false (to preserve the fake norms back
compatible behavior) but in 3.0 will be hardwired to true. (Shon
Vella via Mike McCandless).
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is

View File

@ -72,13 +72,15 @@ public class TestEmptyIndex extends TestCase {
private void testNorms(IndexReader r) throws IOException {
byte[] norms;
norms = r.norms("foo");
assertNotNull(norms);
assertEquals(0, norms.length);
norms = new byte[10];
Arrays.fill(norms, (byte)10);
r.norms("foo", norms, 10);
for (byte b : norms) {
assertEquals((byte)10, b);
if (!r.getDisableFakeNorms()) {
assertNotNull(norms);
assertEquals(0, norms.length);
norms = new byte[10];
Arrays.fill(norms, (byte)10);
r.norms("foo", norms, 10);
for (byte b : norms) {
assertEquals((byte)10, b);
}
}
}

View File

@ -247,39 +247,41 @@ public class TestIndicesEquals extends TestCase {
byte[] aprioriNorms = aprioriReader.norms((String) field);
byte[] testNorms = testReader.norms((String) field);
assertEquals(aprioriNorms.length, testNorms.length);
if (!aprioriReader.getDisableFakeNorms()) {
assertEquals(aprioriNorms.length, testNorms.length);
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
// test norms as used by multireader
// test norms as used by multireader
aprioriNorms = new byte[aprioriReader.maxDoc()];
aprioriReader.norms((String) field, aprioriNorms, 0);
aprioriNorms = new byte[aprioriReader.maxDoc()];
aprioriReader.norms((String) field, aprioriNorms, 0);
testNorms = new byte[testReader.maxDoc()];
testReader.norms((String) field, testNorms, 0);
testNorms = new byte[testReader.maxDoc()];
testReader.norms((String) field, testNorms, 0);
assertEquals(aprioriNorms.length, testNorms.length);
assertEquals(aprioriNorms.length, testNorms.length);
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
// test norms as used by multireader
// test norms as used by multireader
aprioriNorms = new byte[aprioriReader.maxDoc() + 10];
aprioriReader.norms((String) field, aprioriNorms, 10);
aprioriNorms = new byte[aprioriReader.maxDoc() + 10];
aprioriReader.norms((String) field, aprioriNorms, 10);
testNorms = new byte[testReader.maxDoc() + 10];
testReader.norms((String) field, testNorms, 10);
testNorms = new byte[testReader.maxDoc() + 10];
testReader.norms((String) field, testNorms, 10);
assertEquals(aprioriNorms.length, testNorms.length);
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
assertEquals(aprioriNorms.length, testNorms.length);
for (int i = 0; i < aprioriNorms.length; i++) {
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
}
}
}

View File

@ -92,8 +92,12 @@ public class TestFieldNormModifier extends TestCase {
// sanity check, norms should all be 1
assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
if (!r.getDisableFakeNorms()) {
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
}
} else {
assertNull(norms);
}
r.close();
@ -110,8 +114,12 @@ public class TestFieldNormModifier extends TestCase {
norms = r.norms("nonorm");
assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
if (!r.getDisableFakeNorms()) {
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
}
} else {
assertNull(norms);
}
r.close();

View File

@ -98,9 +98,13 @@ public class TestLengthNormModifier extends TestCase {
// sanity check, norms should all be 1
assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
for (int i = 0; i< norms.length; i++) {
if (!r.getDisableFakeNorms()) {
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
}
}
} else {
assertNull(norms);
}
r.close();
@ -116,9 +120,13 @@ public class TestLengthNormModifier extends TestCase {
norms = r.norms("nonorm");
assertTrue("Whoops we have norms?", !r.hasNorms("nonorm"));
for (int i = 0; i< norms.length; i++) {
if (!r.getDisableFakeNorms()) {
for (int i = 0; i< norms.length; i++) {
assertEquals(""+i, DEFAULT_NORM, norms[i]);
}
}
} else {
assertNull(norms);
}
r.close();

View File

@ -227,7 +227,9 @@ abstract class DirectoryIndexReader extends IndexReader implements Cloneable {
// TODO: right now we *always* make a new reader; in
// the future we could have write make some effort to
// detect that no changes have occurred
return writer.getReader();
IndexReader reader = writer.getReader();
reader.setDisableFakeNorms(getDisableFakeNorms());
return reader;
}
if (commit == null) {
@ -298,6 +300,7 @@ abstract class DirectoryIndexReader extends IndexReader implements Cloneable {
} else {
reader = (DirectoryIndexReader) finder.doBody(commit.getSegmentsFileName());
}
reader.setDisableFakeNorms(getDisableFakeNorms());
} finally {
if (myCloseDirectory) {
assert directory instanceof FSDirectory;

View File

@ -116,6 +116,8 @@ public abstract class IndexReader implements Cloneable {
private volatile int refCount;
private boolean disableFakeNorms = false;
/** Expert: returns the current refCount for this reader */
public synchronized int getRefCount() {
return refCount;
@ -792,6 +794,9 @@ public abstract class IndexReader implements Cloneable {
* int) length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
* <b>NOTE:</b> If this field does not store norms, then
* this method call will silently do nothing.
*
* @see #norms(String)
* @see Similarity#decodeNorm(byte)
* @throws StaleReaderException if the index has changed
@ -1275,4 +1280,26 @@ public abstract class IndexReader implements Cloneable {
public long getUniqueTermCount() throws IOException {
throw new UnsupportedOperationException("this reader does not implement getUniqueTermCount()");
}
/** Expert: Return the state of the flag that disables fakes norms in favor of representing the absence of field norms with null.
* @return true if fake norms are disabled
* @deprecated This currently defaults to false (to remain
* back-compatible), but in 3.0 it will be hardwired to
* true, meaning the norms() methods will return null for
* fields that had disabled norms.
*/
public boolean getDisableFakeNorms() {
return disableFakeNorms;
}
/** Expert: Set the state of the flag that disables fakes norms in favor of representing the absence of field norms with null.
* @param disableFakeNorms true to disable fake norms, false to preserve the legacy behavior
* @deprecated This currently defaults to false (to remain
* back-compatible), but in 3.0 it will be hardwired to
* true, meaning the norms() methods will return null for
* fields that had disabled norms.
*/
public void setDisableFakeNorms(boolean disableFakeNorms) {
this.disableFakeNorms = disableFakeNorms;
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
@ -27,6 +28,7 @@ import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.MultiSegmentReader.MultiTermDocs;
import org.apache.lucene.index.MultiSegmentReader.MultiTermEnum;
import org.apache.lucene.index.MultiSegmentReader.MultiTermPositions;
import org.apache.lucene.search.DefaultSimilarity;
/** An IndexReader which reads multiple indexes, appending their content.
*
@ -179,6 +181,7 @@ public class MultiReader extends IndexReader implements Cloneable {
}
MultiReader mr = new MultiReader(newSubReaders);
mr.decrefOnClose = newDecrefOnClose;
mr.setDisableFakeNorms(getDisableFakeNorms());
return mr;
} else {
return this;
@ -288,7 +291,7 @@ public class MultiReader extends IndexReader implements Cloneable {
if (bytes != null)
return bytes; // cache hit
if (!hasNorms(field))
return fakeNorms();
return getDisableFakeNorms() ? null : fakeNorms();
bytes = new byte[maxDoc()];
for (int i = 0; i < subReaders.length; i++)
@ -301,12 +304,18 @@ public class MultiReader extends IndexReader implements Cloneable {
throws IOException {
ensureOpen();
byte[] bytes = (byte[])normsCache.get(field);
if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
if (bytes != null) // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc());
for (int i = 0; i < subReaders.length; i++) // read from segments
subReaders[i].norms(field, result, offset + starts[i]);
if (bytes==null && !hasNorms(field)) {
Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f));
} else if (bytes != null) { // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc());
} else {
for (int i = 0; i < subReaders.length; i++) { // read from segments
subReaders[i].norms(field, result, offset + starts[i]);
}
}
}
protected void doSetNorm(int n, String field, byte value)

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@ -27,6 +28,7 @@ import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.store.Directory;
/**
@ -262,15 +264,18 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
}
protected synchronized DirectoryIndexReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
if (infos.size() == 1) {
DirectoryIndexReader reader;
if (infos.size() == 1) {
// The index has only one segment now, so we can't refresh the MultiSegmentReader.
// Return a new [ReadOnly]SegmentReader instead
return SegmentReader.get(openReadOnly, infos, infos.info(0), false);
reader = SegmentReader.get(openReadOnly, infos, infos.info(0), false);
} else if (openReadOnly) {
return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, doClone);
reader = new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, doClone);
} else {
return new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, false, doClone);
}
reader = new MultiSegmentReader(directory, infos, closeDirectory, subReaders, starts, normsCache, false, doClone);
}
reader.setDisableFakeNorms(getDisableFakeNorms());
return reader;
}
public TermFreqVector[] getTermFreqVectors(int n) throws IOException {
@ -397,7 +402,7 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
if (bytes != null)
return bytes; // cache hit
if (!hasNorms(field))
return fakeNorms();
return getDisableFakeNorms() ? null : fakeNorms();
bytes = new byte[maxDoc()];
for (int i = 0; i < subReaders.length; i++)
@ -410,12 +415,15 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
throws IOException {
ensureOpen();
byte[] bytes = (byte[])normsCache.get(field);
if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
if (bytes != null) // cache hit
if (bytes==null && !hasNorms(field)) {
Arrays.fill(result, offset, result.length, DefaultSimilarity.encodeNorm(1.0f));
} else if (bytes != null) { // cache hit
System.arraycopy(bytes, 0, result, offset, maxDoc());
for (int i = 0; i < subReaders.length; i++) // read from segments
subReaders[i].norms(field, result, offset + starts[i]);
} else {
for (int i = 0; i < subReaders.length; i++) { // read from segments
subReaders[i].norms(field, result, offset + starts[i]);
}
}
}
protected void doSetNorm(int n, String field, byte value)
@ -514,6 +522,12 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
throw new IllegalStateException("no readers");
}
public void setDisableFakeNorms(boolean disableFakeNorms) {
super.setDisableFakeNorms(disableFakeNorms);
for (int i = 0; i < subReaders.length; i++)
subReaders[i].setDisableFakeNorms(disableFakeNorms);
}
static class MultiTermEnum extends TermEnum {
private SegmentMergeQueue queue;

View File

@ -629,11 +629,11 @@ class SegmentReader extends DirectoryIndexReader {
}
} else {
if (openReadOnly)
return new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, doClone);
newReader = new ReadOnlyMultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, doClone);
else
return new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, false, doClone);
newReader = new MultiSegmentReader(directory, infos, closeDirectory, new SegmentReader[] {this}, null, null, false, doClone);
}
newReader.setDisableFakeNorms(getDisableFakeNorms());
return newReader;
}
@ -708,6 +708,7 @@ class SegmentReader extends DirectoryIndexReader {
}
}
clone.setDisableFakeNorms(getDisableFakeNorms());
clone.norms = new HashMap();
// Clone norms
@ -1017,6 +1018,7 @@ class SegmentReader extends DirectoryIndexReader {
private byte[] ones;
private byte[] fakeNorms() {
assert !getDisableFakeNorms();
if (ones==null) ones=createFakeNorms(maxDoc());
return ones;
}
@ -1032,7 +1034,7 @@ class SegmentReader extends DirectoryIndexReader {
public synchronized byte[] norms(String field) throws IOException {
ensureOpen();
byte[] bytes = getNorms(field);
if (bytes==null) bytes=fakeNorms();
if (bytes==null && !getDisableFakeNorms()) bytes=fakeNorms();
return bytes;
}
@ -1053,7 +1055,7 @@ class SegmentReader extends DirectoryIndexReader {
ensureOpen();
Norm norm = (Norm) norms.get(field);
if (norm == null) {
System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
Arrays.fill(bytes, offset, bytes.length, DefaultSimilarity.encodeNorm(1.0f));
return;
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultipleTermPositions;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.ToStringUtils;
/**
@ -225,7 +224,7 @@ public class MultiPhraseQuery extends Query {
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);

View File

@ -216,7 +216,7 @@ public class PhraseQuery extends Query {
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);

View File

@ -104,7 +104,7 @@ abstract class PhraseScorer extends Scorer {
public float score() throws IOException {
//System.out.println("scoring " + first.doc);
float raw = getSimilarity().tf(freq) * value; // raw score
return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
return norms == null ? raw : raw * Similarity.decodeNorm(norms[first.doc]); // normalize
}
public boolean skipTo(int target) throws IOException {

View File

@ -111,7 +111,7 @@ public class TermQuery extends Query {
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);

View File

@ -127,7 +127,7 @@ final class TermScorer extends Scorer {
? scoreCache[f] // cache hit
: getSimilarity().tf(f)*weightValue; // cache miss
return raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field
return norms == null ? raw : raw * SIM_NORM_DECODER[norms[doc] & 0xFF]; // normalize for field
}
/** Skips to the first match beyond the current whose document number is

View File

@ -89,7 +89,7 @@ public class SpanScorer extends Scorer {
public float score() throws IOException {
float raw = getSimilarity().tf(freq) * value; // raw score
return raw * Similarity.decodeNorm(norms[doc]); // normalize
return norms == null? raw : raw * Similarity.decodeNorm(norms[doc]); // normalize
}
public Explanation explain(final int doc) throws IOException {

View File

@ -122,7 +122,7 @@ public class SpanWeight implements Weight {
Explanation fieldNormExpl = new Explanation();
byte[] fieldNorms = reader.norms(field);
float fieldNorm =
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
fieldNormExpl.setValue(fieldNorm);
fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
fieldExpl.addDetail(fieldNormExpl);

View File

@ -1377,10 +1377,17 @@ public class TestIndexReader extends LuceneTestCase
String curField = (String) it1.next();
byte[] norms1 = index1.norms(curField);
byte[] norms2 = index2.norms(curField);
assertEquals(norms1.length, norms2.length);
for (int i = 0; i < norms1.length; i++) {
assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", norms1[i], norms2[i]);
}
if (norms1 != null && norms2 != null)
{
assertEquals(norms1.length, norms2.length);
for (int i = 0; i < norms1.length; i++) {
assertEquals("Norm different for doc " + i + " and field '" + curField + "'.", norms1[i], norms2[i]);
}
}
else
{
assertSame(norms1, norms2);
}
}
// check deletions

View File

@ -29,8 +29,6 @@ import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.search.Similarity;
public class TestSegmentReader extends LuceneTestCase {
private RAMDirectory dir = new RAMDirectory();
@ -173,16 +171,21 @@ public class TestSegmentReader extends LuceneTestCase {
assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms());
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
if (!reader.hasNorms(f.name())) {
// test for fake norms of 1.0
// test for fake norms of 1.0 or null depending on the flag
byte [] norms = reader.norms(f.name());
assertEquals(norms.length,reader.maxDoc());
for (int j=0; j<reader.maxDoc(); j++) {
assertEquals(norms[j], DefaultSimilarity.encodeNorm(1.0f));
byte norm1 = DefaultSimilarity.encodeNorm(1.0f);
if (reader.getDisableFakeNorms())
assertNull(norms);
else {
assertEquals(norms.length,reader.maxDoc());
for (int j=0; j<reader.maxDoc(); j++) {
assertEquals(norms[j], norm1);
}
}
norms = new byte[reader.maxDoc()];
reader.norms(f.name(),norms, 0);
for (int j=0; j<reader.maxDoc(); j++) {
assertEquals(norms[j], DefaultSimilarity.encodeNorm(1.0f));
assertEquals(norms[j], norm1);
}
}
}