diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 05b9e5c963c..98bd2d7b5b7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -152,6 +152,17 @@ Changes in Runtime Behavior without any changes to the index will not cause any index version increment. (Simon Willnauer, Mike McCandless) +* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you + omitNorms(true) for field "a" for 1000 documents, but then add a document with + omitNorms(false) for field "a", all documents for field "a" will have no norms. + Previously, Lucene would fill the first 1000 documents with "fake norms" from + Similarity.getDefault(). (Robert Muir, Mike Mccandless) + +* LUCENE-2846: When some documents contain field "a", and others do not, the + documents that don't have the field get a norm byte value of 0. Previously, Lucene + would populate "fake norms" with Similarity.getDefault() for these documents. + (Robert Muir, Mike Mccandless) + API Changes * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer @@ -193,6 +204,14 @@ API Changes Collector#setNextReader & FieldComparator#setNextReader now expect an AtomicReaderContext instead of an IndexReader. (Simon Willnauer) +* LUCENE-2846: Remove the deprecated IndexReader.setNorm(int, String, float). + This method was only syntactic sugar for setNorm(int, String, byte), but + using the global Similarity.getDefault().encodeNormValue. Use the byte-based + method instead to ensure that the norm is encoded with your Similarity. + Also removed norms(String, byte[], int), which was only used by MultiReader + for building top-level norms. If you really need a top-level norms, use + MultiNorms or SlowMultiReaderWrapper. (Robert Muir, Mike Mccandless) + New features * LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java index 58c5313bed4..3b191b83fc5 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java @@ -333,15 +333,6 @@ public class InstantiatedIndexReader extends IndexReader { return norms; } - @Override - public void norms(String field, byte[] bytes, int offset) throws IOException { - byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field); - if (norms == null) { - return; - } - System.arraycopy(norms, 0, bytes, offset, norms.length); - } - @Override protected void doSetNorm(int doc, String field, byte value) throws IOException { if (uncommittedNormsByFieldNameAndDocumentNumber == null) { diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 0bfa8bd6e26..f5cd26dfe31 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -201,9 +201,9 @@ public class InstantiatedIndexWriter implements Closeable { byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field); if (oldNorms != null) { System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length); - Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, oldNorms.length, norms.length, (byte) 0); } else { - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); } normsByFieldNameAndDocumentNumber.put(field, norms); fieldNames.remove(field); @@ -211,7 +211,7 @@ public class InstantiatedIndexWriter implements Closeable { for (String field : fieldNames) { //System.out.println(field); byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()]; - Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f)); + Arrays.fill(norms, 0, norms.length, (byte) 0); normsByFieldNameAndDocumentNumber.put(field, norms); } fieldNames.clear(); diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java index d6a7cf54a6b..1eb03fdaa5a 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestEmptyIndex.java @@ -71,12 +71,6 @@ public class TestEmptyIndex extends LuceneTestCase { byte[] norms = MultiNorms.norms(r, "foo"); if (norms != null) { assertEquals(0, norms.length); - norms = new byte[10]; - Arrays.fill(norms, (byte)10); - r.norms("foo", norms, 10); - for (byte b : norms) { - assertEquals((byte)10, b); - } } } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java index 3d5c247cb7f..1ad0743b635 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java @@ -358,35 +358,6 @@ public class TestIndicesEquals extends LuceneTestCase { for (int i = 0; i < aprioriNorms.length; i++) { assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); } - - // test norms as used by multireader - - aprioriNorms = new byte[aprioriReader.maxDoc()]; - MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 0); - - testNorms = new byte[testReader.maxDoc()]; - MultiNorms.norms(testReader, (String) field, testNorms, 0); - - assertEquals(aprioriNorms.length, testNorms.length); - - for (int i = 0; i < aprioriNorms.length; i++) { - assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); - } - - - // test norms as used by multireader - - aprioriNorms = new byte[aprioriReader.maxDoc() + 10]; - MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 10); - - testNorms = new byte[testReader.maxDoc() + 10]; - MultiNorms.norms(testReader, (String) field, testNorms, 10); - - assertEquals(aprioriNorms.length, testNorms.length); - - for (int i = 0; i < aprioriNorms.length; i++) { - assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]); - } } } diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 08e880bfe99..cd1df94a9c7 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -1185,13 +1185,6 @@ public class MemoryIndex implements Serializable { return norms; } - @Override - public void norms(String fieldName, byte[] bytes, int offset) { - if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName); - byte[] norms = norms(fieldName); - System.arraycopy(norms, 0, bytes, offset, norms.length); - } - @Override protected void doSetNorm(int doc, String fieldName, byte value) { throw new UnsupportedOperationException(); diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 392ab635249..e825c670e29 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -548,10 +548,10 @@ public class CheckIndex { if (infoStream != null) { infoStream.print(" test: field norms........."); } - final byte[] b = new byte[reader.maxDoc()]; + byte[] b; for (final String fieldName : fieldNames) { if (reader.hasNorms(fieldName)) { - reader.norms(fieldName, b, 0); + b = reader.norms(fieldName); ++status.totFields; } } diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index c4ed2633760..9da85ca5e6a 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -601,12 +601,6 @@ class DirectoryReader extends IndexReader implements Cloneable { throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); } - @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 9526a7b40cf..2477f9e5c1f 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -52,7 +52,7 @@ public final class FieldInfo { this.storeOffsetWithTermVector = false; this.storePositionWithTermVector = false; this.storePayloads = false; - this.omitNorms = true; + this.omitNorms = false; this.omitTermFreqAndPositions = false; } } @@ -82,7 +82,7 @@ public final class FieldInfo { this.storePayloads = true; } if (this.omitNorms != omitNorms) { - this.omitNorms = false; // once norms are stored, always store + this.omitNorms = true; // if one require omitNorms at least once, it remains off for life } if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) { this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 0fb4878b4be..ef1a3b7476f 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -278,14 +278,21 @@ public final class FieldInfos { } public boolean hasVectors() { - boolean hasVectors = false; for (int i = 0; i < size(); i++) { if (fieldInfo(i).storeTermVector) { - hasVectors = true; - break; + return true; } } - return hasVectors; + return false; + } + + public boolean hasNorms() { + for (int i = 0; i < size(); i++) { + if (!fieldInfo(i).omitNorms) { + return true; + } + } + return false; } public void write(Directory d, String name) throws IOException { diff --git a/lucene/src/java/org/apache/lucene/index/Fields.java b/lucene/src/java/org/apache/lucene/index/Fields.java index a14ca1d52c3..d7b0bba997f 100644 --- a/lucene/src/java/org/apache/lucene/index/Fields.java +++ b/lucene/src/java/org/apache/lucene/index/Fields.java @@ -28,7 +28,7 @@ public abstract class Fields { * names. This will not return null. */ public abstract FieldsEnum iterator() throws IOException; - /** Get the {@link Terms} for this field. This may return + /** Get the {@link Terms} for this field. This will return * null if the field does not exist. */ public abstract Terms terms(String field) throws IOException; diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 8246fe4894d..ffe82b017dc 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -355,12 +355,6 @@ public class FilterIndexReader extends IndexReader { return in.norms(f); } - @Override - public void norms(String f, byte[] bytes, int offset) throws IOException { - ensureOpen(); - in.norms(f, bytes, offset); - } - @Override protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException { in.setNorm(d, f, b); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 6a23da780fc..95f4977f177 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -931,14 +931,6 @@ public abstract class IndexReader implements Cloneable,Closeable { */ public abstract byte[] norms(String field) throws IOException; - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract void norms(String field, byte[] bytes, int offset) - throws IOException; - /** Expert: Resets the normalization factor for the named field of the named * document. The norm represents the product of the field's {@link * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, @@ -970,26 +962,6 @@ public abstract class IndexReader implements Cloneable,Closeable { protected abstract void doSetNorm(int doc, String field, byte value) throws CorruptIndexException, IOException; - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNormValue(byte) - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public void setNorm(int doc, String field, float value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - setNorm(doc, field, Similarity.getDefault().encodeNormValue(value)); - } - /** Flex API: returns {@link Fields} for this reader. * This method may return null if the reader has no * postings. diff --git a/lucene/src/java/org/apache/lucene/index/MultiNorms.java b/lucene/src/java/org/apache/lucene/index/MultiNorms.java index 34bd967c73e..1026480fb29 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiNorms.java +++ b/lucene/src/java/org/apache/lucene/index/MultiNorms.java @@ -22,7 +22,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.lucene.search.Similarity; import org.apache.lucene.util.ReaderUtil; /** @@ -61,26 +60,24 @@ public class MultiNorms { ReaderUtil.gatherSubReaders(leaves, r); int end = 0; for (IndexReader leaf : leaves) { + Fields fields = leaf.fields(); + boolean hasField = (fields != null && fields.terms(field) != null); + int start = end; - leaf.norms(field, norms, start); + byte leafNorms[] = leaf.norms(field); + if (leafNorms == null) { + if (hasField) { // omitted norms + return null; + } + // doesn't have field, fill bytes + leafNorms = new byte[leaf.maxDoc()]; + Arrays.fill(leafNorms, (byte) 0); + } + + System.arraycopy(leafNorms, 0, norms, start, leafNorms.length); end += leaf.maxDoc(); } return norms; } } - - /** - * Warning: this is heavy! Do not use in a loop, or implement norms() - * in your own reader with this (you should likely cache the result). - */ - public static void norms(IndexReader r, String field, byte[] bytes, int offset) - throws IOException { - // TODO: optimize more maybe - byte[] norms = norms(r, field); - if (norms == null) { - Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f)); - } else { - System.arraycopy(norms, 0, bytes, offset, r.maxDoc()); - } - } } diff --git a/lucene/src/java/org/apache/lucene/index/MultiReader.java b/lucene/src/java/org/apache/lucene/index/MultiReader.java index 37328d1314a..8a5dca94f22 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/MultiReader.java @@ -304,12 +304,6 @@ public class MultiReader extends IndexReader implements Cloneable { throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); } - @Override - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms"); - } - @Override protected void doSetNorm(int n, String field, byte value) throws CorruptIndexException, IOException { diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/NormsWriter.java index 036832297cf..82b4a730e5b 100644 --- a/lucene/src/java/org/apache/lucene/index/NormsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/NormsWriter.java @@ -26,7 +26,6 @@ import java.util.List; import java.util.ArrayList; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.search.Similarity; // TODO FI: norms could actually be stored as doc store @@ -37,7 +36,6 @@ import org.apache.lucene.search.Similarity; final class NormsWriter extends InvertedDocEndConsumer { - private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f); private FieldInfos fieldInfos; @Override public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { @@ -62,6 +60,10 @@ final class NormsWriter extends InvertedDocEndConsumer { final Map> byField = new HashMap>(); + if (!fieldInfos.hasNorms()) { + return; + } + // Typically, each thread will have encountered the same // field. So first we collate by field, ie, all // per-thread field instances that correspond to the @@ -137,7 +139,7 @@ final class NormsWriter extends InvertedDocEndConsumer { // Fill hole for(;upto - * WARNING: If you override this method, you should change the default - * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. - * Otherwise, your method may not always be called, especially if you omit norms - * for some fields. * @see #encodeNormValue(float) */ public float decodeNormValue(byte b) { @@ -662,11 +657,6 @@ public abstract class Similarity implements Serializable { * are rounded down to the largest representable value. Positive values too * small to represent are rounded up to the smallest positive representable * value. - *

- * WARNING: If you override this method, you should change the default - * Similarity to your implementation with {@link Similarity#setDefault(Similarity)}. - * Otherwise, your method may not always be called, especially if you omit norms - * for some fields. * @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.util.SmallFloat */ diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index e85e4fb4bc5..ffa6238b072 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -43,6 +43,7 @@ import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -216,6 +217,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase { public void testOptimizeOldIndex() throws Exception { for(int i=0;i