mirror of https://github.com/apache/lucene.git
LUCENE-2846: remove fake norms and invalid uses of Similarity.getDefault
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1058367 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eacfb5d636
commit
cfcaa9edcc
|
@ -152,6 +152,17 @@ Changes in Runtime Behavior
|
|||
without any changes to the index will not cause any index version increment.
|
||||
(Simon Willnauer, Mike McCandless)
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
omitNorms(true) for field "a" for 1000 documents, but then add a document with
|
||||
omitNorms(false) for field "a", all documents for field "a" will have no norms.
|
||||
Previously, Lucene would fill the first 1000 documents with "fake norms" from
|
||||
Similarity.getDefault(). (Robert Muir, Mike Mccandless)
|
||||
|
||||
* LUCENE-2846: When some documents contain field "a", and others do not, the
|
||||
documents that don't have the field get a norm byte value of 0. Previously, Lucene
|
||||
would populate "fake norms" with Similarity.getDefault() for these documents.
|
||||
(Robert Muir, Mike Mccandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
||||
|
@ -193,6 +204,14 @@ API Changes
|
|||
Collector#setNextReader & FieldComparator#setNextReader now expect an
|
||||
AtomicReaderContext instead of an IndexReader. (Simon Willnauer)
|
||||
|
||||
* LUCENE-2846: Remove the deprecated IndexReader.setNorm(int, String, float).
|
||||
This method was only syntactic sugar for setNorm(int, String, byte), but
|
||||
using the global Similarity.getDefault().encodeNormValue. Use the byte-based
|
||||
method instead to ensure that the norm is encoded with your Similarity.
|
||||
Also removed norms(String, byte[], int), which was only used by MultiReader
|
||||
for building top-level norms. If you really need a top-level norms, use
|
||||
MultiNorms or SlowMultiReaderWrapper. (Robert Muir, Mike Mccandless)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
|
||||
|
|
|
@ -333,15 +333,6 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
return norms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void norms(String field, byte[] bytes, int offset) throws IOException {
|
||||
byte[] norms = getIndex().getNormsByFieldNameAndDocumentNumber().get(field);
|
||||
if (norms == null) {
|
||||
return;
|
||||
}
|
||||
System.arraycopy(norms, 0, bytes, offset, norms.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int doc, String field, byte value) throws IOException {
|
||||
if (uncommittedNormsByFieldNameAndDocumentNumber == null) {
|
||||
|
|
|
@ -201,9 +201,9 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
byte[] oldNorms = index.getNormsByFieldNameAndDocumentNumber().get(field);
|
||||
if (oldNorms != null) {
|
||||
System.arraycopy(oldNorms, 0, norms, 0, oldNorms.length);
|
||||
Arrays.fill(norms, oldNorms.length, norms.length, similarity.encodeNormValue(1.0f));
|
||||
Arrays.fill(norms, oldNorms.length, norms.length, (byte) 0);
|
||||
} else {
|
||||
Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
|
||||
Arrays.fill(norms, 0, norms.length, (byte) 0);
|
||||
}
|
||||
normsByFieldNameAndDocumentNumber.put(field, norms);
|
||||
fieldNames.remove(field);
|
||||
|
@ -211,7 +211,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
for (String field : fieldNames) {
|
||||
//System.out.println(field);
|
||||
byte[] norms = new byte[index.getDocumentsByNumber().length + termDocumentInformationFactoryByDocument.size()];
|
||||
Arrays.fill(norms, 0, norms.length, similarity.encodeNormValue(1.0f));
|
||||
Arrays.fill(norms, 0, norms.length, (byte) 0);
|
||||
normsByFieldNameAndDocumentNumber.put(field, norms);
|
||||
}
|
||||
fieldNames.clear();
|
||||
|
|
|
@ -71,12 +71,6 @@ public class TestEmptyIndex extends LuceneTestCase {
|
|||
byte[] norms = MultiNorms.norms(r, "foo");
|
||||
if (norms != null) {
|
||||
assertEquals(0, norms.length);
|
||||
norms = new byte[10];
|
||||
Arrays.fill(norms, (byte)10);
|
||||
r.norms("foo", norms, 10);
|
||||
for (byte b : norms) {
|
||||
assertEquals((byte)10, b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -358,35 +358,6 @@ public class TestIndicesEquals extends LuceneTestCase {
|
|||
for (int i = 0; i < aprioriNorms.length; i++) {
|
||||
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
|
||||
}
|
||||
|
||||
// test norms as used by multireader
|
||||
|
||||
aprioriNorms = new byte[aprioriReader.maxDoc()];
|
||||
MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 0);
|
||||
|
||||
testNorms = new byte[testReader.maxDoc()];
|
||||
MultiNorms.norms(testReader, (String) field, testNorms, 0);
|
||||
|
||||
assertEquals(aprioriNorms.length, testNorms.length);
|
||||
|
||||
for (int i = 0; i < aprioriNorms.length; i++) {
|
||||
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
|
||||
}
|
||||
|
||||
|
||||
// test norms as used by multireader
|
||||
|
||||
aprioriNorms = new byte[aprioriReader.maxDoc() + 10];
|
||||
MultiNorms.norms(aprioriReader, (String) field, aprioriNorms, 10);
|
||||
|
||||
testNorms = new byte[testReader.maxDoc() + 10];
|
||||
MultiNorms.norms(testReader, (String) field, testNorms, 10);
|
||||
|
||||
assertEquals(aprioriNorms.length, testNorms.length);
|
||||
|
||||
for (int i = 0; i < aprioriNorms.length; i++) {
|
||||
assertEquals("norms does not equals for field " + field + " in document " + i, aprioriNorms[i], testNorms[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1185,13 +1185,6 @@ public class MemoryIndex implements Serializable {
|
|||
return norms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void norms(String fieldName, byte[] bytes, int offset) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName);
|
||||
byte[] norms = norms(fieldName);
|
||||
System.arraycopy(norms, 0, bytes, offset, norms.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int doc, String fieldName, byte value) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
|
|
@ -548,10 +548,10 @@ public class CheckIndex {
|
|||
if (infoStream != null) {
|
||||
infoStream.print(" test: field norms.........");
|
||||
}
|
||||
final byte[] b = new byte[reader.maxDoc()];
|
||||
byte[] b;
|
||||
for (final String fieldName : fieldNames) {
|
||||
if (reader.hasNorms(fieldName)) {
|
||||
reader.norms(fieldName, b, 0);
|
||||
b = reader.norms(fieldName);
|
||||
++status.totFields;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -601,12 +601,6 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
|||
throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void norms(String field, byte[] result, int offset)
|
||||
throws IOException {
|
||||
throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int n, String field, byte value)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
|
|
@ -52,7 +52,7 @@ public final class FieldInfo {
|
|||
this.storeOffsetWithTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storePayloads = false;
|
||||
this.omitNorms = true;
|
||||
this.omitNorms = false;
|
||||
this.omitTermFreqAndPositions = false;
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ public final class FieldInfo {
|
|||
this.storePayloads = true;
|
||||
}
|
||||
if (this.omitNorms != omitNorms) {
|
||||
this.omitNorms = false; // once norms are stored, always store
|
||||
this.omitNorms = true; // if one require omitNorms at least once, it remains off for life
|
||||
}
|
||||
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
|
||||
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
|
||||
|
|
|
@ -278,14 +278,21 @@ public final class FieldInfos {
|
|||
}
|
||||
|
||||
public boolean hasVectors() {
|
||||
boolean hasVectors = false;
|
||||
for (int i = 0; i < size(); i++) {
|
||||
if (fieldInfo(i).storeTermVector) {
|
||||
hasVectors = true;
|
||||
break;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return hasVectors;
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean hasNorms() {
|
||||
for (int i = 0; i < size(); i++) {
|
||||
if (!fieldInfo(i).omitNorms) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void write(Directory d, String name) throws IOException {
|
||||
|
|
|
@ -28,7 +28,7 @@ public abstract class Fields {
|
|||
* names. This will not return null. */
|
||||
public abstract FieldsEnum iterator() throws IOException;
|
||||
|
||||
/** Get the {@link Terms} for this field. This may return
|
||||
/** Get the {@link Terms} for this field. This will return
|
||||
* null if the field does not exist. */
|
||||
public abstract Terms terms(String field) throws IOException;
|
||||
|
||||
|
|
|
@ -355,12 +355,6 @@ public class FilterIndexReader extends IndexReader {
|
|||
return in.norms(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void norms(String f, byte[] bytes, int offset) throws IOException {
|
||||
ensureOpen();
|
||||
in.norms(f, bytes, offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException {
|
||||
in.setNorm(d, f, b);
|
||||
|
|
|
@ -931,14 +931,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
*/
|
||||
public abstract byte[] norms(String field) throws IOException;
|
||||
|
||||
/** Reads the byte-encoded normalization factor for the named field of every
|
||||
* document. This is used by the search code to score documents.
|
||||
*
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
*/
|
||||
public abstract void norms(String field, byte[] bytes, int offset)
|
||||
throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document. The norm represents the product of the field's {@link
|
||||
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
|
||||
|
@ -970,26 +962,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
protected abstract void doSetNorm(int doc, String field, byte value)
|
||||
throws CorruptIndexException, IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document.
|
||||
*
|
||||
* @see #norms(String)
|
||||
* @see Similarity#decodeNormValue(byte)
|
||||
*
|
||||
* @throws StaleReaderException if the index has changed
|
||||
* since this reader was opened
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws LockObtainFailedException if another writer
|
||||
* has this index open (<code>write.lock</code> could not
|
||||
* be obtained)
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void setNorm(int doc, String field, float value)
|
||||
throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
|
||||
ensureOpen();
|
||||
setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
|
||||
}
|
||||
|
||||
/** Flex API: returns {@link Fields} for this reader.
|
||||
* This method may return null if the reader has no
|
||||
* postings.
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
|
||||
/**
|
||||
|
@ -61,26 +60,24 @@ public class MultiNorms {
|
|||
ReaderUtil.gatherSubReaders(leaves, r);
|
||||
int end = 0;
|
||||
for (IndexReader leaf : leaves) {
|
||||
Fields fields = leaf.fields();
|
||||
boolean hasField = (fields != null && fields.terms(field) != null);
|
||||
|
||||
int start = end;
|
||||
leaf.norms(field, norms, start);
|
||||
byte leafNorms[] = leaf.norms(field);
|
||||
if (leafNorms == null) {
|
||||
if (hasField) { // omitted norms
|
||||
return null;
|
||||
}
|
||||
// doesn't have field, fill bytes
|
||||
leafNorms = new byte[leaf.maxDoc()];
|
||||
Arrays.fill(leafNorms, (byte) 0);
|
||||
}
|
||||
|
||||
System.arraycopy(leafNorms, 0, norms, start, leafNorms.length);
|
||||
end += leaf.maxDoc();
|
||||
}
|
||||
return norms;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Warning: this is heavy! Do not use in a loop, or implement norms()
|
||||
* in your own reader with this (you should likely cache the result).
|
||||
*/
|
||||
public static void norms(IndexReader r, String field, byte[] bytes, int offset)
|
||||
throws IOException {
|
||||
// TODO: optimize more maybe
|
||||
byte[] norms = norms(r, field);
|
||||
if (norms == null) {
|
||||
Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
|
||||
} else {
|
||||
System.arraycopy(norms, 0, bytes, offset, r.maxDoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -304,12 +304,6 @@ public class MultiReader extends IndexReader implements Cloneable {
|
|||
throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void norms(String field, byte[] result, int offset)
|
||||
throws IOException {
|
||||
throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int n, String field, byte value)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.util.List;
|
|||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
// TODO FI: norms could actually be stored as doc store
|
||||
|
||||
|
@ -37,7 +36,6 @@ import org.apache.lucene.search.Similarity;
|
|||
|
||||
final class NormsWriter extends InvertedDocEndConsumer {
|
||||
|
||||
private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
|
||||
private FieldInfos fieldInfos;
|
||||
@Override
|
||||
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
|
||||
|
@ -62,6 +60,10 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
|
||||
|
||||
if (!fieldInfos.hasNorms()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Typically, each thread will have encountered the same
|
||||
// field. So first we collate by field, ie, all
|
||||
// per-thread field instances that correspond to the
|
||||
|
@ -137,7 +139,7 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
// Fill hole
|
||||
for(;upto<minDocID;upto++)
|
||||
normsOut.writeByte(defaultNorm);
|
||||
normsOut.writeByte((byte) 0);
|
||||
|
||||
normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
|
||||
(uptos[minLoc])++;
|
||||
|
@ -155,12 +157,12 @@ final class NormsWriter extends InvertedDocEndConsumer {
|
|||
|
||||
// Fill final hole with defaultNorm
|
||||
for(;upto<state.numDocs;upto++)
|
||||
normsOut.writeByte(defaultNorm);
|
||||
normsOut.writeByte((byte) 0);
|
||||
} else if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
|
||||
normCount++;
|
||||
// Fill entire field with default norm:
|
||||
for(;upto<state.numDocs;upto++)
|
||||
normsOut.writeByte(defaultNorm);
|
||||
normsOut.writeByte((byte) 0);
|
||||
}
|
||||
|
||||
assert 4+normCount*state.numDocs == normsOut.getFilePointer() : ".nrm file size mismatch: expected=" + (4+normCount*state.numDocs) + " actual=" + normsOut.getFilePointer();
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
|
||||
/** Taps into DocInverter, as an InvertedDocEndConsumer,
|
||||
* which is called at the end of inverting each field. We
|
||||
|
|
|
@ -21,11 +21,8 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -439,29 +436,14 @@ public class ParallelReader extends IndexReader {
|
|||
return bytes;
|
||||
if (!hasNorms(field))
|
||||
return null;
|
||||
if (normsCache.containsKey(field)) // cached omitNorms, not missing key
|
||||
return null;
|
||||
|
||||
bytes = MultiNorms.norms(reader, field);
|
||||
normsCache.put(field, bytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void norms(String field, byte[] result, int offset)
|
||||
throws IOException {
|
||||
// TODO: maybe optimize
|
||||
ensureOpen();
|
||||
IndexReader reader = fieldToReader.get(field);
|
||||
if (reader==null)
|
||||
return;
|
||||
|
||||
byte[] norms = norms(field);
|
||||
if (norms == null) {
|
||||
Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
|
||||
} else {
|
||||
System.arraycopy(norms, 0, result, offset, maxDoc());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int n, String field, byte value)
|
||||
throws CorruptIndexException, IOException {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -570,13 +571,6 @@ final class SegmentMerger {
|
|||
}
|
||||
|
||||
private void mergeNorms() throws IOException {
|
||||
// get needed buffer size by finding the largest segment
|
||||
int bufferSize = 0;
|
||||
for (IndexReader reader : readers) {
|
||||
bufferSize = Math.max(bufferSize, reader.maxDoc());
|
||||
}
|
||||
|
||||
byte[] normBuffer = null;
|
||||
IndexOutput output = null;
|
||||
try {
|
||||
for (int i = 0, numFieldInfos = fieldInfos.size(); i < numFieldInfos; i++) {
|
||||
|
@ -586,12 +580,15 @@ final class SegmentMerger {
|
|||
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
|
||||
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
|
||||
}
|
||||
if (normBuffer == null) {
|
||||
normBuffer = new byte[bufferSize];
|
||||
}
|
||||
for (IndexReader reader : readers) {
|
||||
final int maxDoc = reader.maxDoc();
|
||||
reader.norms(fi.name, normBuffer, 0);
|
||||
byte normBuffer[] = reader.norms(fi.name);
|
||||
if (normBuffer == null) {
|
||||
// Can be null if this segment doesn't have
|
||||
// any docs with this field
|
||||
normBuffer = new byte[maxDoc];
|
||||
Arrays.fill(normBuffer, (byte)0);
|
||||
}
|
||||
if (!reader.hasDeletions()) {
|
||||
//optimized case for segments without deleted docs
|
||||
output.writeBytes(normBuffer, maxDoc);
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
|
@ -31,7 +30,6 @@ import java.util.Set;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -991,22 +989,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
norm.copyOnWrite()[doc] = value; // set the value
|
||||
}
|
||||
|
||||
/** Read norms into a pre-allocated array. */
|
||||
@Override
|
||||
public synchronized void norms(String field, byte[] bytes, int offset)
|
||||
throws IOException {
|
||||
|
||||
ensureOpen();
|
||||
Norm norm = norms.get(field);
|
||||
if (norm == null) {
|
||||
Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
|
||||
return;
|
||||
}
|
||||
|
||||
norm.bytes(bytes, offset, maxDoc());
|
||||
}
|
||||
|
||||
|
||||
private void openNorms(Directory cfsDir, int readBufferSize) throws IOException {
|
||||
long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
|
||||
int maxDoc = maxDoc();
|
||||
|
|
|
@ -18,19 +18,14 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil; // javadoc
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader; // javadoc
|
||||
import org.apache.lucene.index.MultiReader; // javadoc
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
|
||||
/**
|
||||
* This class forces a composite reader (eg a {@link
|
||||
|
@ -88,23 +83,13 @@ public final class SlowMultiReaderWrapper extends FilterIndexReader {
|
|||
return bytes;
|
||||
if (!hasNorms(field))
|
||||
return null;
|
||||
|
||||
if (normsCache.containsKey(field)) // cached omitNorms, not missing key
|
||||
return null;
|
||||
|
||||
bytes = MultiNorms.norms(in, field);
|
||||
normsCache.put(field, bytes);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void norms(String field, byte[] bytes, int offset) throws IOException {
|
||||
// TODO: maybe optimize
|
||||
ensureOpen();
|
||||
byte[] norms = norms(field);
|
||||
if (norms == null) {
|
||||
Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
|
||||
} else {
|
||||
System.arraycopy(norms, 0, bytes, offset, maxDoc());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public ReaderContext getTopReaderContext() {
|
||||
|
|
|
@ -565,11 +565,6 @@ public abstract class Similarity implements Serializable {
|
|||
}
|
||||
|
||||
/** Decodes a normalization factor stored in an index.
|
||||
* <p>
|
||||
* <b>WARNING: If you override this method, you should change the default
|
||||
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
|
||||
* Otherwise, your method may not always be called, especially if you omit norms
|
||||
* for some fields.</b>
|
||||
* @see #encodeNormValue(float)
|
||||
*/
|
||||
public float decodeNormValue(byte b) {
|
||||
|
@ -662,11 +657,6 @@ public abstract class Similarity implements Serializable {
|
|||
* are rounded down to the largest representable value. Positive values too
|
||||
* small to represent are rounded up to the smallest positive representable
|
||||
* value.
|
||||
* <p>
|
||||
* <b>WARNING: If you override this method, you should change the default
|
||||
* Similarity to your implementation with {@link Similarity#setDefault(Similarity)}.
|
||||
* Otherwise, your method may not always be called, especially if you omit norms
|
||||
* for some fields.</b>
|
||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||
* @see org.apache.lucene.util.SmallFloat
|
||||
*/
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.FieldCache;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.NumericRangeQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -216,6 +217,9 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
|
||||
public void testOptimizeOldIndex() throws Exception {
|
||||
for(int i=0;i<oldNames.length;i++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: index=" + oldNames[i]);
|
||||
}
|
||||
unzip(getDataFile("index." + oldNames[i] + ".zip"), oldNames[i]);
|
||||
|
||||
String fullPath = fullDir(oldNames[i]);
|
||||
|
@ -223,6 +227,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
|
||||
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer()));
|
||||
w.setInfoStream(VERBOSE ? System.out : null);
|
||||
w.optimize();
|
||||
w.close();
|
||||
|
||||
|
@ -419,7 +424,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
Term searchTerm = new Term("id", "6");
|
||||
int delCount = reader.deleteDocuments(searchTerm);
|
||||
assertEquals("wrong delete count", 1, delCount);
|
||||
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", (float) 2.0);
|
||||
reader.setNorm(searcher.search(new TermQuery(new Term("id", "22")), 10).scoreDocs[0].doc, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
searcher.close();
|
||||
|
||||
|
@ -467,7 +472,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
Term searchTerm = new Term("id", "6");
|
||||
int delCount = reader.deleteDocuments(searchTerm);
|
||||
assertEquals("wrong delete count", 1, delCount);
|
||||
reader.setNorm(22, "content", (float) 2.0);
|
||||
reader.setNorm(22, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
|
||||
// make sure they "took":
|
||||
|
@ -526,7 +531,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", (float) 1.5);
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
@ -563,7 +568,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", (float) 1.5);
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
|
||||
// The numbering of fields can vary depending on which
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -607,7 +608,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, policy, false);
|
||||
reader.deleteDocument(3*i+1);
|
||||
reader.setNorm(4*i+1, "content", 2.0F);
|
||||
reader.setNorm(4*i+1, "content", Similarity.getDefault().encodeNormValue(2.0F));
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(16*(1+i), hits.length);
|
||||
|
@ -715,7 +716,7 @@ public class TestDeletionPolicy extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, policy, false);
|
||||
reader.deleteDocument(3);
|
||||
reader.setNorm(5, "content", 2.0F);
|
||||
reader.setNorm(5, "content", Similarity.getDefault().encodeNormValue(2.0F));
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(16, hits.length);
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -68,7 +69,7 @@ public class TestIndexFileDeleter extends LuceneTestCase {
|
|||
assertEquals("didn't delete the right number of documents", 1, delCount);
|
||||
|
||||
// Set one norm so we get a .s0 file:
|
||||
reader.setNorm(21, "content", (float) 1.5);
|
||||
reader.setNorm(21, "content", Similarity.getDefault().encodeNormValue(1.5f));
|
||||
reader.close();
|
||||
|
||||
// Now, artificially create an extra .del file & extra
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -464,7 +465,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
}
|
||||
|
||||
try {
|
||||
reader.setNorm(5, "aaa", 2.0f);
|
||||
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
fail("setNorm after close failed to throw IOException");
|
||||
} catch (AlreadyClosedException e) {
|
||||
// expected
|
||||
|
@ -504,7 +505,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
}
|
||||
|
||||
try {
|
||||
reader.setNorm(5, "aaa", 2.0f);
|
||||
reader.setNorm(5, "aaa", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
fail("setNorm should have hit LockObtainFailedException");
|
||||
} catch (LockObtainFailedException e) {
|
||||
// expected
|
||||
|
@ -534,7 +535,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
|
||||
// now open reader & set norm for doc 0
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", (float) 2.0);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
|
||||
// we should be holding the write lock now:
|
||||
assertTrue("locked", IndexWriter.isLocked(dir));
|
||||
|
@ -548,7 +549,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
IndexReader reader2 = IndexReader.open(dir, false);
|
||||
|
||||
// set norm again for doc 0
|
||||
reader.setNorm(0, "content", (float) 3.0);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(3.0f));
|
||||
assertTrue("locked", IndexWriter.isLocked(dir));
|
||||
|
||||
reader.close();
|
||||
|
@ -581,12 +582,12 @@ public class TestIndexReader extends LuceneTestCase
|
|||
// now open reader & set norm for doc 0 (writes to
|
||||
// _0_1.s0)
|
||||
reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", (float) 2.0);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
|
||||
// now open reader again & set norm for doc 0 (writes to _0_2.s0)
|
||||
reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(0, "content", (float) 2.0);
|
||||
reader.setNorm(0, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.close();
|
||||
assertFalse("failed to remove first generation norms file on writing second generation",
|
||||
dir.fileExists("_0_1.s0"));
|
||||
|
@ -954,7 +955,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
int docId = 12;
|
||||
for(int i=0;i<13;i++) {
|
||||
reader.deleteDocument(docId);
|
||||
reader.setNorm(docId, "content", (float) 2.0);
|
||||
reader.setNorm(docId, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
docId += 12;
|
||||
}
|
||||
}
|
||||
|
@ -1113,7 +1114,7 @@ public class TestIndexReader extends LuceneTestCase
|
|||
|
||||
reader = IndexReader.open(dir, false);
|
||||
try {
|
||||
reader.setNorm(1, "content", (float) 2.0);
|
||||
reader.setNorm(1, "content", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
fail("did not hit exception when calling setNorm on an invalid doc number");
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// expected
|
||||
|
|
|
@ -276,7 +276,7 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
|
||||
IndexReader pr1Clone = (IndexReader) r1.clone();
|
||||
pr1Clone.deleteDocument(10);
|
||||
pr1Clone.setNorm(4, "field1", 0.5f);
|
||||
pr1Clone.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
|
||||
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(r1, "field1")[4]) == norm1);
|
||||
assertTrue(Similarity.getDefault().decodeNormValue(MultiNorms.norms(pr1Clone, "field1")[4]) != norm1);
|
||||
|
||||
|
@ -327,7 +327,7 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
TestIndexReaderReopen.createIndex(random, dir1, false);
|
||||
SegmentReader origSegmentReader = getOnlySegmentReader(IndexReader.open(dir1, false));
|
||||
origSegmentReader.deleteDocument(1);
|
||||
origSegmentReader.setNorm(4, "field1", 0.5f);
|
||||
origSegmentReader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(0.5f));
|
||||
|
||||
SegmentReader clonedSegmentReader = (SegmentReader) origSegmentReader
|
||||
.clone();
|
||||
|
@ -426,7 +426,7 @@ public class TestIndexReaderClone extends LuceneTestCase {
|
|||
final Directory dir1 = newDirectory();
|
||||
TestIndexReaderReopen.createIndex(random, dir1, false);
|
||||
IndexReader orig = IndexReader.open(dir1, false);
|
||||
orig.setNorm(1, "field1", 17.0f);
|
||||
orig.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(17.0f));
|
||||
final byte encoded = Similarity.getDefault().encodeNormValue(17.0f);
|
||||
assertEquals(encoded, MultiNorms.norms(orig, "field1")[1]);
|
||||
|
||||
|
|
|
@ -203,11 +203,11 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
IndexReader reader4C = (IndexReader) reader3C.clone();
|
||||
SegmentReader segmentReader4C = getOnlySegmentReader(reader4C);
|
||||
assertEquals(4, reader3CCNorm.bytesRef().get());
|
||||
reader4C.setNorm(5, "field1", 0.33f);
|
||||
reader4C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.33f));
|
||||
|
||||
// generate a cannot update exception in reader1
|
||||
try {
|
||||
reader3C.setNorm(1, "field1", 0.99f);
|
||||
reader3C.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(0.99f));
|
||||
fail("did not hit expected exception");
|
||||
} catch (Exception ex) {
|
||||
// expected
|
||||
|
@ -223,7 +223,7 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
IndexReader reader5C = (IndexReader) reader4C.clone();
|
||||
SegmentReader segmentReader5C = getOnlySegmentReader(reader5C);
|
||||
Norm reader5CCNorm = segmentReader5C.norms.get("field1");
|
||||
reader5C.setNorm(5, "field1", 0.7f);
|
||||
reader5C.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(0.7f));
|
||||
assertEquals(1, reader5CCNorm.bytesRef().get());
|
||||
|
||||
reader5C.close();
|
||||
|
@ -256,8 +256,8 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase {
|
|||
// System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
|
||||
modifiedNorms.set(i, Float.valueOf(newNorm));
|
||||
modifiedNorms.set(k, Float.valueOf(origNorm));
|
||||
ir.setNorm(i, "f" + 1, newNorm);
|
||||
ir.setNorm(k, "f" + 1, origNorm);
|
||||
ir.setNorm(i, "f" + 1, Similarity.getDefault().encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f" + 1, Similarity.getDefault().encodeNormValue(origNorm));
|
||||
// System.out.println("setNorm i: "+i);
|
||||
// break;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -614,8 +615,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
|
||||
IndexReader reader2 = reader1.reopen();
|
||||
modifier = IndexReader.open(dir1, false);
|
||||
modifier.setNorm(1, "field1", 50);
|
||||
modifier.setNorm(1, "field2", 50);
|
||||
modifier.setNorm(1, "field1", Similarity.getDefault().encodeNormValue(50f));
|
||||
modifier.setNorm(1, "field2", Similarity.getDefault().encodeNormValue(50f));
|
||||
modifier.close();
|
||||
|
||||
IndexReader reader3 = reader2.reopen();
|
||||
|
@ -708,7 +709,7 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
protected void modifyIndex(int i) throws IOException {
|
||||
if (i % 3 == 0) {
|
||||
IndexReader modifier = IndexReader.open(dir, false);
|
||||
modifier.setNorm(i, "field1", 50);
|
||||
modifier.setNorm(i, "field1", Similarity.getDefault().encodeNormValue(50f));
|
||||
modifier.close();
|
||||
} else if (i % 3 == 1) {
|
||||
IndexReader modifier = IndexReader.open(dir, false);
|
||||
|
@ -984,9 +985,9 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
}
|
||||
case 1: {
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(4, "field1", 123);
|
||||
reader.setNorm(44, "field2", 222);
|
||||
reader.setNorm(44, "field4", 22);
|
||||
reader.setNorm(4, "field1", Similarity.getDefault().encodeNormValue(123f));
|
||||
reader.setNorm(44, "field2", Similarity.getDefault().encodeNormValue(222f));
|
||||
reader.setNorm(44, "field4", Similarity.getDefault().encodeNormValue(22f));
|
||||
reader.close();
|
||||
break;
|
||||
}
|
||||
|
@ -1007,8 +1008,8 @@ public class TestIndexReaderReopen extends LuceneTestCase {
|
|||
}
|
||||
case 4: {
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
reader.setNorm(5, "field1", 123);
|
||||
reader.setNorm(55, "field2", 222);
|
||||
reader.setNorm(5, "field1", Similarity.getDefault().encodeNormValue(123f));
|
||||
reader.setNorm(55, "field2", Similarity.getDefault().encodeNormValue(222f));
|
||||
reader.close();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -169,8 +169,8 @@ public class TestNorms extends LuceneTestCase {
|
|||
//System.out.println(" and: for "+k+" from "+newNorm+" to "+origNorm);
|
||||
modifiedNorms.set(i, Float.valueOf(newNorm));
|
||||
modifiedNorms.set(k, Float.valueOf(origNorm));
|
||||
ir.setNorm(i, "f"+1, newNorm);
|
||||
ir.setNorm(k, "f"+1, origNorm);
|
||||
ir.setNorm(i, "f"+1, Similarity.getDefault().encodeNormValue(newNorm));
|
||||
ir.setNorm(k, "f"+1, Similarity.getDefault().encodeNormValue(origNorm));
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,304 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
public class TestOmitNorms extends LuceneTestCase {
|
||||
// Tests whether the DocumentWriter correctly enable the
|
||||
// omitNorms bit in the FieldInfo
|
||||
public void testOmitNorms() throws Exception {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
Document d = new Document();
|
||||
|
||||
// this field will have norms
|
||||
Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f1);
|
||||
|
||||
// this field will NOT have norms
|
||||
Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setOmitNorms(true);
|
||||
d.add(f2);
|
||||
|
||||
writer.addDocument(d);
|
||||
writer.optimize();
|
||||
// now we add another document which has term freq for field f2 and not for f1 and verify if the SegmentMerger
|
||||
// keep things constant
|
||||
d = new Document();
|
||||
|
||||
// Reverse
|
||||
f1.setOmitNorms(true);
|
||||
d.add(f1);
|
||||
|
||||
f2.setOmitNorms(false);
|
||||
d.add(f2);
|
||||
|
||||
writer.addDocument(d);
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
writer.close();
|
||||
_TestUtil.checkIndex(ram);
|
||||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms);
|
||||
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
}
|
||||
|
||||
// Tests whether merging of docs that have different
|
||||
// omitNorms for the same field works
|
||||
public void testMixedMerge() throws Exception {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(
|
||||
ram,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
|
||||
setMaxBufferedDocs(3).
|
||||
setMergePolicy(newLogMergePolicy(2))
|
||||
);
|
||||
Document d = new Document();
|
||||
|
||||
// this field will have norms
|
||||
Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f1);
|
||||
|
||||
// this field will NOT have norms
|
||||
Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f2.setOmitNorms(true);
|
||||
d.add(f2);
|
||||
|
||||
for (int i = 0; i < 30; i++) {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
// now we add another document which has norms for field f2 and not for f1 and verify if the SegmentMerger
|
||||
// keep things constant
|
||||
d = new Document();
|
||||
|
||||
// Reverese
|
||||
f1.setOmitNorms(true);
|
||||
d.add(f1);
|
||||
|
||||
f2.setOmitNorms(false);
|
||||
d.add(f2);
|
||||
|
||||
for (int i = 0; i < 30; i++) {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
writer.close();
|
||||
|
||||
_TestUtil.checkIndex(ram);
|
||||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f1").omitNorms);
|
||||
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
}
|
||||
|
||||
// Make sure first adding docs that do not omitNorms for
|
||||
// field X, then adding docs that do omitNorms for that same
|
||||
// field,
|
||||
public void testMixedRAM() throws Exception {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(
|
||||
ram,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).
|
||||
setMaxBufferedDocs(10).
|
||||
setMergePolicy(newLogMergePolicy(2))
|
||||
);
|
||||
Document d = new Document();
|
||||
|
||||
// this field will have norms
|
||||
Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f1);
|
||||
|
||||
// this field will NOT have norms
|
||||
Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED);
|
||||
d.add(f2);
|
||||
|
||||
for (int i = 0; i < 5; i++) {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
f2.setOmitNorms(true);
|
||||
|
||||
for (int i = 0; i < 20; i++) {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
|
||||
// flush
|
||||
writer.close();
|
||||
|
||||
_TestUtil.checkIndex(ram);
|
||||
|
||||
SegmentReader reader = getOnlySegmentReader(IndexReader.open(ram, false));
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
assertTrue("OmitNorms field bit should not be set.", !fi.fieldInfo("f1").omitNorms);
|
||||
assertTrue("OmitNorms field bit should be set.", fi.fieldInfo("f2").omitNorms);
|
||||
|
||||
reader.close();
|
||||
ram.close();
|
||||
}
|
||||
|
||||
private void assertNoNrm(Directory dir) throws Throwable {
|
||||
final String[] files = dir.listAll();
|
||||
for (int i = 0; i < files.length; i++) {
|
||||
assertFalse(files[i].endsWith(".nrm"));
|
||||
}
|
||||
}
|
||||
|
||||
// Verifies no *.nrm exists when all fields omit norms:
|
||||
public void testNoNrmFile() throws Throwable {
|
||||
Directory ram = newDirectory();
|
||||
Analyzer analyzer = new MockAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(3));
|
||||
writer.setInfoStream(VERBOSE ? System.out : null);
|
||||
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
|
||||
lmp.setMergeFactor(2);
|
||||
lmp.setUseCompoundFile(false);
|
||||
Document d = new Document();
|
||||
|
||||
Field f1 = newField("f1", "This field has no norms", Field.Store.NO, Field.Index.ANALYZED);
|
||||
f1.setOmitNorms(true);
|
||||
d.add(f1);
|
||||
|
||||
for (int i = 0; i < 30; i++) {
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
||||
writer.commit();
|
||||
|
||||
assertNoNrm(ram);
|
||||
|
||||
// force merge
|
||||
writer.optimize();
|
||||
// flush
|
||||
writer.close();
|
||||
|
||||
assertNoNrm(ram);
|
||||
_TestUtil.checkIndex(ram);
|
||||
ram.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests various combinations of omitNorms=true/false, the field not existing at all,
|
||||
* ensuring that only omitNorms is 'viral'.
|
||||
* Internally checks that MultiNorms.norms() is consistent (returns the same bytes)
|
||||
* as the optimized equivalent.
|
||||
*/
|
||||
public void testOmitNormsCombos() throws IOException {
|
||||
// indexed with norms
|
||||
Field norms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED);
|
||||
// indexed without norms
|
||||
Field noNorms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS);
|
||||
// not indexed, but stored
|
||||
Field noIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO);
|
||||
// not indexed but stored, omitNorms is set
|
||||
Field noNormsNoIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO);
|
||||
noNormsNoIndex.setOmitNorms(true);
|
||||
// not indexed nor stored (doesnt exist at all, we index a different field instead)
|
||||
Field emptyNorms = new Field("bar", "a", Field.Store.YES, Field.Index.ANALYZED);
|
||||
|
||||
assertNotNull(getNorms("foo", norms, norms));
|
||||
assertNull(getNorms("foo", norms, noNorms));
|
||||
assertNotNull(getNorms("foo", norms, noIndex));
|
||||
assertNotNull(getNorms("foo", norms, noNormsNoIndex));
|
||||
assertNotNull(getNorms("foo", norms, emptyNorms));
|
||||
assertNull(getNorms("foo", noNorms, noNorms));
|
||||
assertNull(getNorms("foo", noNorms, noIndex));
|
||||
assertNull(getNorms("foo", noNorms, noNormsNoIndex));
|
||||
assertNull(getNorms("foo", noNorms, emptyNorms));
|
||||
assertNull(getNorms("foo", noIndex, noIndex));
|
||||
assertNull(getNorms("foo", noIndex, noNormsNoIndex));
|
||||
assertNull(getNorms("foo", noIndex, emptyNorms));
|
||||
assertNull(getNorms("foo", noNormsNoIndex, noNormsNoIndex));
|
||||
assertNull(getNorms("foo", noNormsNoIndex, emptyNorms));
|
||||
assertNull(getNorms("foo", emptyNorms, emptyNorms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Indexes at least 1 document with f1, and at least 1 document with f2.
|
||||
* returns the norms for "field".
|
||||
*/
|
||||
static byte[] getNorms(String field, Field f1, Field f2) throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer());
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random, dir, iwc);
|
||||
|
||||
// add f1
|
||||
Document d = new Document();
|
||||
d.add(f1);
|
||||
riw.addDocument(d);
|
||||
|
||||
// add f2
|
||||
d = new Document();
|
||||
d.add(f2);
|
||||
riw.addDocument(d);
|
||||
|
||||
// add a mix of f1's and f2's
|
||||
int numExtraDocs = _TestUtil.nextInt(random, 1, 1000);
|
||||
for (int i = 0; i < numExtraDocs; i++) {
|
||||
d = new Document();
|
||||
d.add(random.nextBoolean() ? f1 : f2);
|
||||
riw.addDocument(d);
|
||||
}
|
||||
|
||||
IndexReader ir1 = riw.getReader();
|
||||
byte[] norms1 = MultiNorms.norms(ir1, field);
|
||||
|
||||
// optimize and validate MultiNorms against single segment.
|
||||
riw.optimize();
|
||||
IndexReader ir2 = riw.getReader();
|
||||
byte[] norms2 = ir2.getSequentialSubReaders()[0].norms(field);
|
||||
|
||||
assertArrayEquals(norms1, norms2);
|
||||
ir1.close();
|
||||
ir2.close();
|
||||
riw.close();
|
||||
dir.close();
|
||||
return norms1;
|
||||
}
|
||||
}
|
|
@ -147,7 +147,7 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
|
||||
assertTrue(pr.isCurrent());
|
||||
IndexReader modifier = IndexReader.open(dir1, false);
|
||||
modifier.setNorm(0, "f1", 100);
|
||||
modifier.setNorm(0, "f1", Similarity.getDefault().encodeNormValue(100f));
|
||||
modifier.close();
|
||||
|
||||
// one of the two IndexReaders which ParallelReader is using
|
||||
|
@ -155,7 +155,7 @@ public class TestParallelReader extends LuceneTestCase {
|
|||
assertFalse(pr.isCurrent());
|
||||
|
||||
modifier = IndexReader.open(dir2, false);
|
||||
modifier.setNorm(0, "f3", 100);
|
||||
modifier.setNorm(0, "f3", Similarity.getDefault().encodeNormValue(100f));
|
||||
modifier.close();
|
||||
|
||||
// now both are not current anymore
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
public class TestSegmentReader extends LuceneTestCase {
|
||||
|
@ -180,15 +179,9 @@ public class TestSegmentReader extends LuceneTestCase {
|
|||
assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms());
|
||||
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
|
||||
if (!reader.hasNorms(f.name())) {
|
||||
// test for fake norms of 1.0 or null depending on the flag
|
||||
// test for norms of null
|
||||
byte [] norms = MultiNorms.norms(reader, f.name());
|
||||
byte norm1 = Similarity.getDefault().encodeNormValue(1.0f);
|
||||
assertNull(norms);
|
||||
norms = new byte[reader.maxDoc()];
|
||||
MultiNorms.norms(reader, f.name(),norms, 0);
|
||||
for (int j=0; j<reader.maxDoc(); j++) {
|
||||
assertEquals(norms[j], norm1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
|
|||
assertEquals("one", ir.document(hits[2].doc).get("key"));
|
||||
|
||||
// change norm & retest
|
||||
ir.setNorm(0, "key", 400f);
|
||||
ir.setNorm(0, "key", Similarity.getDefault().encodeNormValue(400f));
|
||||
normsQuery = new MatchAllDocsQuery("key");
|
||||
hits = is.search(normsQuery, null, 1000).scoreDocs;
|
||||
assertEquals(3, hits.length);
|
||||
|
|
|
@ -51,10 +51,10 @@ public class TestSetNorm extends LuceneTestCase {
|
|||
|
||||
// reset the boost of each instance of this document
|
||||
IndexReader reader = IndexReader.open(store, false);
|
||||
reader.setNorm(0, "field", 1.0f);
|
||||
reader.setNorm(1, "field", 2.0f);
|
||||
reader.setNorm(2, "field", 4.0f);
|
||||
reader.setNorm(3, "field", 16.0f);
|
||||
reader.setNorm(0, "field", Similarity.getDefault().encodeNormValue(1.0f));
|
||||
reader.setNorm(1, "field", Similarity.getDefault().encodeNormValue(2.0f));
|
||||
reader.setNorm(2, "field", Similarity.getDefault().encodeNormValue(4.0f));
|
||||
reader.setNorm(3, "field", Similarity.getDefault().encodeNormValue(16.0f));
|
||||
reader.close();
|
||||
|
||||
// check that searches are ordered by this boost
|
||||
|
|
|
@ -287,11 +287,6 @@ public class SolrIndexReader extends FilterIndexReader {
|
|||
return in.norms(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void norms(String f, byte[] bytes, int offset) throws IOException {
|
||||
in.norms(f, bytes, offset);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doSetNorm(int d, String f, byte b) throws CorruptIndexException, IOException {
|
||||
in.setNorm(d, f, b);
|
||||
|
@ -472,11 +467,6 @@ public class SolrIndexReader extends FilterIndexReader {
|
|||
in.setNorm(doc, field, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNorm(int doc, String field, float value) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
|
||||
in.setNorm(doc, field, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
|
||||
in.undeleteAll();
|
||||
|
|
Loading…
Reference in New Issue