mirror of https://github.com/apache/lucene.git
make field norms optional : LUCENE-448
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@329366 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
52c12ea335
commit
ae11eb845b
|
@ -145,6 +145,10 @@ New features
|
||||||
20. Added a new class MatchAllDocsQuery that matches all documents.
|
20. Added a new class MatchAllDocsQuery that matches all documents.
|
||||||
(John Wang via Daniel Naber, bug #34946)
|
(John Wang via Daniel Naber, bug #34946)
|
||||||
|
|
||||||
|
21. Added ability to omit norms on a per field basis to decrease
|
||||||
|
index size and memory consumption when there are many indexed fields.
|
||||||
|
See Field.setOmitNorms()
|
||||||
|
(Yonik Seeley, LUCENE-448)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
|
|
|
@ -1245,10 +1245,20 @@ limitations under the License.
|
||||||
FieldBits --> Byte
|
FieldBits --> Byte
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
The low-order bit is one for
|
The low-order bit is one for
|
||||||
indexed fields, and zero for non-indexed fields. The second lowest-order
|
indexed fields, and zero for non-indexed fields.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
The second lowest-order
|
||||||
bit is one for fields that have term vectors stored, and zero for fields
|
bit is one for fields that have term vectors stored, and zero for fields
|
||||||
without term vectors.
|
without term vectors.
|
||||||
|
</li>
|
||||||
|
<li> If the third lowest-order bit is set (0x04), term positions are stored with the term vectors. </li>
|
||||||
|
<li> If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors. </li>
|
||||||
|
<li> If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field. </li>
|
||||||
|
</ul>
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
Fields are numbered by their order in this file. Thus field zero is
|
Fields are numbered by their order in this file. Thus field zero is
|
||||||
|
|
|
@ -42,6 +42,7 @@ public final class Field implements Serializable {
|
||||||
private boolean storeTermVector = false;
|
private boolean storeTermVector = false;
|
||||||
private boolean storeOffsetWithTermVector = false;
|
private boolean storeOffsetWithTermVector = false;
|
||||||
private boolean storePositionWithTermVector = false;
|
private boolean storePositionWithTermVector = false;
|
||||||
|
private boolean omitNorms = false;
|
||||||
private boolean isStored = false;
|
private boolean isStored = false;
|
||||||
private boolean isIndexed = true;
|
private boolean isIndexed = true;
|
||||||
private boolean isTokenized = true;
|
private boolean isTokenized = true;
|
||||||
|
@ -94,6 +95,15 @@ public final class Field implements Serializable {
|
||||||
* useful for unique Ids like product numbers.
|
* useful for unique Ids like product numbers.
|
||||||
*/
|
*/
|
||||||
public static final Index UN_TOKENIZED = new Index("UN_TOKENIZED");
|
public static final Index UN_TOKENIZED = new Index("UN_TOKENIZED");
|
||||||
|
|
||||||
|
/** Index the field's value without an Analyzer, and disable
|
||||||
|
* the storing of norms. No norms means that index-time boosting
|
||||||
|
* and field length normalization will be disabled. The benefit is
|
||||||
|
* less memory usage as norms take up one byte per indexed field
|
||||||
|
* for every document in the index.
|
||||||
|
*/
|
||||||
|
public static final Index NO_NORMS = new Index("NO_NORMS");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final class TermVector extends Parameter implements Serializable {
|
public static final class TermVector extends Parameter implements Serializable {
|
||||||
|
@ -338,6 +348,10 @@ public final class Field implements Serializable {
|
||||||
} else if (index == Index.UN_TOKENIZED) {
|
} else if (index == Index.UN_TOKENIZED) {
|
||||||
this.isIndexed = true;
|
this.isIndexed = true;
|
||||||
this.isTokenized = false;
|
this.isTokenized = false;
|
||||||
|
} else if (index == Index.NO_NORMS) {
|
||||||
|
this.isIndexed = true;
|
||||||
|
this.isTokenized = false;
|
||||||
|
this.omitNorms = true;
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("unknown index parameter " + index);
|
throw new IllegalArgumentException("unknown index parameter " + index);
|
||||||
}
|
}
|
||||||
|
@ -540,6 +554,16 @@ public final class Field implements Serializable {
|
||||||
/** True iff the value of the filed is stored as binary */
|
/** True iff the value of the filed is stored as binary */
|
||||||
public final boolean isBinary() { return isBinary; }
|
public final boolean isBinary() { return isBinary; }
|
||||||
|
|
||||||
|
/** True if norms are omitted for this indexed field */
|
||||||
|
public boolean getOmitNorms() { return omitNorms; }
|
||||||
|
|
||||||
|
/** Expert:
|
||||||
|
*
|
||||||
|
* If set, omit normalization factors associated with this indexed field.
|
||||||
|
* This effectively disables indexing boosts and length normalization for this field.
|
||||||
|
*/
|
||||||
|
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||||
|
|
||||||
/** Prints a Field for human consumption. */
|
/** Prints a Field for human consumption. */
|
||||||
public final String toString() {
|
public final String toString() {
|
||||||
StringBuffer result = new StringBuffer();
|
StringBuffer result = new StringBuffer();
|
||||||
|
@ -580,7 +604,9 @@ public final class Field implements Serializable {
|
||||||
result.append(",");
|
result.append(",");
|
||||||
result.append("binary");
|
result.append("binary");
|
||||||
}
|
}
|
||||||
|
if (omitNorms) {
|
||||||
|
result.append(",omitNorms");
|
||||||
|
}
|
||||||
result.append('<');
|
result.append('<');
|
||||||
result.append(name);
|
result.append(name);
|
||||||
result.append(':');
|
result.append(':');
|
||||||
|
|
|
@ -371,7 +371,7 @@ final class DocumentWriter {
|
||||||
private final void writeNorms(String segment) throws IOException {
|
private final void writeNorms(String segment) throws IOException {
|
||||||
for(int n = 0; n < fieldInfos.size(); n++){
|
for(int n = 0; n < fieldInfos.size(); n++){
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(n);
|
FieldInfo fi = fieldInfos.fieldInfo(n);
|
||||||
if(fi.isIndexed){
|
if(fi.isIndexed && !fi.omitNorms){
|
||||||
float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
|
float norm = fieldBoosts[n] * similarity.lengthNorm(fi.name, fieldLengths[n]);
|
||||||
IndexOutput norms = directory.createOutput(segment + ".f" + n);
|
IndexOutput norms = directory.createOutput(segment + ".f" + n);
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -26,13 +26,16 @@ final class FieldInfo {
|
||||||
boolean storeOffsetWithTermVector;
|
boolean storeOffsetWithTermVector;
|
||||||
boolean storePositionWithTermVector;
|
boolean storePositionWithTermVector;
|
||||||
|
|
||||||
|
boolean omitNorms; // omit norms associated with indexed fields
|
||||||
|
|
||||||
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
FieldInfo(String na, boolean tk, int nu, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||||
name = na;
|
name = na;
|
||||||
isIndexed = tk;
|
isIndexed = tk;
|
||||||
number = nu;
|
number = nu;
|
||||||
this.storeTermVector = storeTermVector;
|
this.storeTermVector = storeTermVector;
|
||||||
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
||||||
this.storePositionWithTermVector = storePositionWithTermVector;
|
this.storePositionWithTermVector = storePositionWithTermVector;
|
||||||
|
this.omitNorms = omitNorms;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,7 @@ final class FieldInfos {
|
||||||
static final byte STORE_TERMVECTOR = 0x2;
|
static final byte STORE_TERMVECTOR = 0x2;
|
||||||
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
|
static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4;
|
||||||
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
|
static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8;
|
||||||
|
static final byte OMIT_NORMS = 0x10;
|
||||||
|
|
||||||
private ArrayList byNumber = new ArrayList();
|
private ArrayList byNumber = new ArrayList();
|
||||||
private HashMap byName = new HashMap();
|
private HashMap byName = new HashMap();
|
||||||
|
@ -66,7 +67,7 @@ final class FieldInfos {
|
||||||
while (fields.hasMoreElements()) {
|
while (fields.hasMoreElements()) {
|
||||||
Field field = (Field) fields.nextElement();
|
Field field = (Field) fields.nextElement();
|
||||||
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||||
field.isStoreOffsetWithTermVector());
|
field.isStoreOffsetWithTermVector(), field.getOmitNorms());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -109,7 +110,7 @@ final class FieldInfos {
|
||||||
* @see #add(String, boolean, boolean, boolean, boolean)
|
* @see #add(String, boolean, boolean, boolean, boolean)
|
||||||
*/
|
*/
|
||||||
public void add(String name, boolean isIndexed) {
|
public void add(String name, boolean isIndexed) {
|
||||||
add(name, isIndexed, false, false, false);
|
add(name, isIndexed, false, false, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -120,7 +121,7 @@ final class FieldInfos {
|
||||||
* @param storeTermVector true if the term vector should be stored
|
* @param storeTermVector true if the term vector should be stored
|
||||||
*/
|
*/
|
||||||
public void add(String name, boolean isIndexed, boolean storeTermVector){
|
public void add(String name, boolean isIndexed, boolean storeTermVector){
|
||||||
add(name, isIndexed, storeTermVector, false, false);
|
add(name, isIndexed, storeTermVector, false, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||||
|
@ -136,9 +137,27 @@ final class FieldInfos {
|
||||||
*/
|
*/
|
||||||
public void add(String name, boolean isIndexed, boolean storeTermVector,
|
public void add(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) {
|
||||||
|
|
||||||
|
add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||||
|
* sure that the isIndexed flag is the same as was given previously for this
|
||||||
|
* field. If not - marks it as being indexed. Same goes for the TermVector
|
||||||
|
* parameters.
|
||||||
|
*
|
||||||
|
* @param name The name of the field
|
||||||
|
* @param isIndexed true if the field is indexed
|
||||||
|
* @param storeTermVector true if the term vector should be stored
|
||||||
|
* @param storePositionWithTermVector true if the term vector with positions should be stored
|
||||||
|
* @param storeOffsetWithTermVector true if the term vector with offsets should be stored
|
||||||
|
* @param omitNorms true if the norms for the indexed field should be omitted
|
||||||
|
*/
|
||||||
|
public void add(String name, boolean isIndexed, boolean storeTermVector,
|
||||||
|
boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||||
FieldInfo fi = fieldInfo(name);
|
FieldInfo fi = fieldInfo(name);
|
||||||
if (fi == null) {
|
if (fi == null) {
|
||||||
addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector);
|
addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms);
|
||||||
} else {
|
} else {
|
||||||
if (fi.isIndexed != isIndexed) {
|
if (fi.isIndexed != isIndexed) {
|
||||||
fi.isIndexed = true; // once indexed, always index
|
fi.isIndexed = true; // once indexed, always index
|
||||||
|
@ -152,15 +171,20 @@ final class FieldInfos {
|
||||||
if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
|
if (fi.storeOffsetWithTermVector != storeOffsetWithTermVector) {
|
||||||
fi.storeOffsetWithTermVector = true; // once vector, always vector
|
fi.storeOffsetWithTermVector = true; // once vector, always vector
|
||||||
}
|
}
|
||||||
|
if (fi.omitNorms != omitNorms) {
|
||||||
|
fi.omitNorms = false; // once norms are stored, always store
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addInternal(String name, boolean isIndexed,
|
private void addInternal(String name, boolean isIndexed,
|
||||||
boolean storeTermVector, boolean storePositionWithTermVector,
|
boolean storeTermVector, boolean storePositionWithTermVector,
|
||||||
boolean storeOffsetWithTermVector) {
|
boolean storeOffsetWithTermVector, boolean omitNorms) {
|
||||||
FieldInfo fi =
|
FieldInfo fi =
|
||||||
new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
|
new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector,
|
||||||
storeOffsetWithTermVector);
|
storeOffsetWithTermVector, omitNorms);
|
||||||
byNumber.add(fi);
|
byNumber.add(fi);
|
||||||
byName.put(name, fi);
|
byName.put(name, fi);
|
||||||
}
|
}
|
||||||
|
@ -245,6 +269,7 @@ final class FieldInfos {
|
||||||
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
||||||
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
|
if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR;
|
||||||
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR;
|
||||||
|
if (fi.omitNorms) bits |= OMIT_NORMS;
|
||||||
output.writeString(fi.name);
|
output.writeString(fi.name);
|
||||||
output.writeByte(bits);
|
output.writeByte(bits);
|
||||||
}
|
}
|
||||||
|
@ -259,7 +284,9 @@ final class FieldInfos {
|
||||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||||
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector);
|
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||||
|
|
||||||
|
addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,10 @@ public class FilterIndexReader extends IndexReader {
|
||||||
public boolean hasDeletions() { return in.hasDeletions(); }
|
public boolean hasDeletions() { return in.hasDeletions(); }
|
||||||
protected void doUndeleteAll() throws IOException { in.undeleteAll(); }
|
protected void doUndeleteAll() throws IOException { in.undeleteAll(); }
|
||||||
|
|
||||||
|
public boolean hasNorms(String field) throws IOException {
|
||||||
|
return in.hasNorms(field);
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] norms(String f) throws IOException { return in.norms(f); }
|
public byte[] norms(String f) throws IOException { return in.norms(f); }
|
||||||
public void norms(String f, byte[] bytes, int offset) throws IOException {
|
public void norms(String f, byte[] bytes, int offset) throws IOException {
|
||||||
in.norms(f, bytes, offset);
|
in.norms(f, bytes, offset);
|
||||||
|
|
|
@ -338,6 +338,13 @@ public abstract class IndexReader {
|
||||||
/** Returns true if any documents have been deleted */
|
/** Returns true if any documents have been deleted */
|
||||||
public abstract boolean hasDeletions();
|
public abstract boolean hasDeletions();
|
||||||
|
|
||||||
|
/** Returns true if there are norms stored for this field. */
|
||||||
|
public boolean hasNorms(String field) throws IOException {
|
||||||
|
// backward compatible implementation.
|
||||||
|
// SegmentReader has an efficient implementation.
|
||||||
|
return norms(field) != null;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns the byte-encoded normalization factor for the named field of
|
/** Returns the byte-encoded normalization factor for the named field of
|
||||||
* every document. This is used by the search code to score documents.
|
* every document. This is used by the search code to score documents.
|
||||||
*
|
*
|
||||||
|
|
|
@ -145,10 +145,25 @@ public class MultiReader extends IndexReader {
|
||||||
return hi;
|
return hi;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasNorms(String field) throws IOException {
|
||||||
|
for (int i = 0; i < subReaders.length; i++) {
|
||||||
|
if (subReaders[i].hasNorms(field)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] ones;
|
||||||
|
private synchronized byte[] fakeNorms() {
|
||||||
|
if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc());
|
||||||
|
return ones;
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized byte[] norms(String field) throws IOException {
|
public synchronized byte[] norms(String field) throws IOException {
|
||||||
byte[] bytes = (byte[])normsCache.get(field);
|
byte[] bytes = (byte[])normsCache.get(field);
|
||||||
if (bytes != null)
|
if (bytes != null)
|
||||||
return bytes; // cache hit
|
return bytes; // cache hit
|
||||||
|
if (!hasNorms(field))
|
||||||
|
return fakeNorms();
|
||||||
|
|
||||||
bytes = new byte[maxDoc()];
|
bytes = new byte[maxDoc()];
|
||||||
for (int i = 0; i < subReaders.length; i++)
|
for (int i = 0; i < subReaders.length; i++)
|
||||||
|
@ -160,6 +175,7 @@ public class MultiReader extends IndexReader {
|
||||||
public synchronized void norms(String field, byte[] result, int offset)
|
public synchronized void norms(String field, byte[] result, int offset)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
byte[] bytes = (byte[])normsCache.get(field);
|
byte[] bytes = (byte[])normsCache.get(field);
|
||||||
|
if (bytes==null && !hasNorms(field)) bytes=fakeNorms();
|
||||||
if (bytes != null) // cache hit
|
if (bytes != null) // cache hit
|
||||||
System.arraycopy(bytes, 0, result, offset, maxDoc());
|
System.arraycopy(bytes, 0, result, offset, maxDoc());
|
||||||
|
|
||||||
|
|
|
@ -165,6 +165,10 @@ public class ParallelReader extends IndexReader {
|
||||||
return ((IndexReader)fieldToReader.get(field)).getTermFreqVector(n, field);
|
return ((IndexReader)fieldToReader.get(field)).getTermFreqVector(n, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasNorms(String field) throws IOException {
|
||||||
|
return ((IndexReader)fieldToReader.get(field)).hasNorms(field);
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] norms(String field) throws IOException {
|
public byte[] norms(String field) throws IOException {
|
||||||
return ((IndexReader)fieldToReader.get(field)).norms(field);
|
return ((IndexReader)fieldToReader.get(field)).norms(field);
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.util.Vector;
|
import java.util.Vector;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.Collection;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
@ -122,7 +123,7 @@ final class SegmentMerger {
|
||||||
// Field norm files
|
// Field norm files
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
files.add(segment + ".f" + i);
|
files.add(segment + ".f" + i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -146,6 +147,15 @@ final class SegmentMerger {
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector,
|
||||||
|
boolean storeOffsetWithTermVector) throws IOException {
|
||||||
|
Iterator i = names.iterator();
|
||||||
|
while (i.hasNext()) {
|
||||||
|
String field = (String)i.next();
|
||||||
|
fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @return The number of documents in all of the readers
|
* @return The number of documents in all of the readers
|
||||||
|
@ -156,11 +166,11 @@ final class SegmentMerger {
|
||||||
int docCount = 0;
|
int docCount = 0;
|
||||||
for (int i = 0; i < readers.size(); i++) {
|
for (int i = 0; i < readers.size(); i++) {
|
||||||
IndexReader reader = (IndexReader) readers.elementAt(i);
|
IndexReader reader = (IndexReader) readers.elementAt(i);
|
||||||
fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true);
|
||||||
fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false);
|
||||||
fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true);
|
||||||
fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false);
|
||||||
fieldInfos.addIndexed(reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
|
addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false);
|
||||||
fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false);
|
fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false);
|
||||||
}
|
}
|
||||||
fieldInfos.write(directory, segment + ".fnm");
|
fieldInfos.write(directory, segment + ".fnm");
|
||||||
|
@ -386,7 +396,7 @@ final class SegmentMerger {
|
||||||
private void mergeNorms() throws IOException {
|
private void mergeNorms() throws IOException {
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
IndexOutput output = directory.createOutput(segment + ".f" + i);
|
IndexOutput output = directory.createOutput(segment + ".f" + i);
|
||||||
try {
|
try {
|
||||||
for (int j = 0; j < readers.size(); j++) {
|
for (int j = 0; j < readers.size(); j++) {
|
||||||
|
|
|
@ -17,12 +17,7 @@ package org.apache.lucene.index;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.*;
|
||||||
import java.util.Enumeration;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Hashtable;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.Vector;
|
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
|
@ -30,6 +25,7 @@ import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
|
import org.apache.lucene.search.DefaultSimilarity;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
|
@ -260,7 +256,7 @@ class SegmentReader extends IndexReader {
|
||||||
|
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed){
|
if (fi.isIndexed && !fi.omitNorms){
|
||||||
String name;
|
String name;
|
||||||
if(cfsReader == null)
|
if(cfsReader == null)
|
||||||
name = segment + ".f" + i;
|
name = segment + ".f" + i;
|
||||||
|
@ -442,10 +438,28 @@ class SegmentReader extends IndexReader {
|
||||||
return fieldSet;
|
return fieldSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized byte[] norms(String field) throws IOException {
|
|
||||||
|
public synchronized boolean hasNorms(String field) {
|
||||||
|
return norms.containsKey(field);
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte[] createFakeNorms(int size) {
|
||||||
|
byte[] ones = new byte[size];
|
||||||
|
Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
|
||||||
|
return ones;
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] ones;
|
||||||
|
private synchronized byte[] fakeNorms() {
|
||||||
|
if (ones==null) ones=createFakeNorms(maxDoc());
|
||||||
|
return ones;
|
||||||
|
}
|
||||||
|
|
||||||
|
// can return null if norms aren't stored
|
||||||
|
protected synchronized byte[] getNorms(String field) throws IOException {
|
||||||
Norm norm = (Norm) norms.get(field);
|
Norm norm = (Norm) norms.get(field);
|
||||||
if (norm == null) // not an indexed field
|
if (norm == null) return null; // not indexed, or norms not stored
|
||||||
return null;
|
|
||||||
if (norm.bytes == null) { // value not yet read
|
if (norm.bytes == null) { // value not yet read
|
||||||
byte[] bytes = new byte[maxDoc()];
|
byte[] bytes = new byte[maxDoc()];
|
||||||
norms(field, bytes, 0);
|
norms(field, bytes, 0);
|
||||||
|
@ -454,6 +468,13 @@ class SegmentReader extends IndexReader {
|
||||||
return norm.bytes;
|
return norm.bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns fake norms if norms aren't available
|
||||||
|
public synchronized byte[] norms(String field) throws IOException {
|
||||||
|
byte[] bytes = getNorms(field);
|
||||||
|
if (bytes==null) bytes=fakeNorms();
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
protected void doSetNorm(int doc, String field, byte value)
|
protected void doSetNorm(int doc, String field, byte value)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Norm norm = (Norm) norms.get(field);
|
Norm norm = (Norm) norms.get(field);
|
||||||
|
@ -470,8 +491,10 @@ class SegmentReader extends IndexReader {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
Norm norm = (Norm) norms.get(field);
|
Norm norm = (Norm) norms.get(field);
|
||||||
if (norm == null)
|
if (norm == null) {
|
||||||
return; // use zeros in array
|
System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
if (norm.bytes != null) { // can copy from cache
|
if (norm.bytes != null) { // can copy from cache
|
||||||
System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
|
System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
|
||||||
|
@ -487,10 +510,11 @@ class SegmentReader extends IndexReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void openNorms(Directory cfsDir) throws IOException {
|
private void openNorms(Directory cfsDir) throws IOException {
|
||||||
for (int i = 0; i < fieldInfos.size(); i++) {
|
for (int i = 0; i < fieldInfos.size(); i++) {
|
||||||
FieldInfo fi = fieldInfos.fieldInfo(i);
|
FieldInfo fi = fieldInfos.fieldInfo(i);
|
||||||
if (fi.isIndexed) {
|
if (fi.isIndexed && !fi.omitNorms) {
|
||||||
// look first if there are separate norms in compound format
|
// look first if there are separate norms in compound format
|
||||||
String fileName = segment + ".s" + fi.number;
|
String fileName = segment + ".s" + fi.number;
|
||||||
Directory d = directory();
|
Directory d = directory();
|
||||||
|
|
|
@ -848,10 +848,20 @@
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
<ul>
|
||||||
|
<li>
|
||||||
The low-order bit is one for
|
The low-order bit is one for
|
||||||
indexed fields, and zero for non-indexed fields. The second lowest-order
|
indexed fields, and zero for non-indexed fields.
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
The second lowest-order
|
||||||
bit is one for fields that have term vectors stored, and zero for fields
|
bit is one for fields that have term vectors stored, and zero for fields
|
||||||
without term vectors.
|
without term vectors.
|
||||||
|
</li>
|
||||||
|
<li> If the third lowest-order bit is set (0x04), term positions are stored with the term vectors. </li>
|
||||||
|
<li> If the fourth lowest-order bit is set (0x08), term offsets are stored with the term vectors. </li>
|
||||||
|
<li> If the fifth lowest-order bit is set (0x10), norms are omitted for the indexed field. </li>
|
||||||
|
</ul>
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p>
|
<p>
|
||||||
|
|
Loading…
Reference in New Issue