mirror of https://github.com/apache/lucene.git
LUCENE-3687: Allow similarity to encode norms other than a single byte
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1232014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
354a3be78f
commit
98e59fceee
|
@ -622,6 +622,11 @@ New features
|
|||
* LUCENE-3628: Norms are represented as DocValues. IndexReader exposes
|
||||
a #normValues(String) method to obtain norms per field. (Simon Willnauer)
|
||||
|
||||
* LUCENE-3687: Similarity#computeNorm(FieldInvertState, Norm) allows to compute
|
||||
norm values or arbitrary precision. Instead of returning a fixed single byte
|
||||
value, custom similarities can now set a integer, float or byte value to the
|
||||
given Norm object. (Simon Willnauer)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||
|
|
|
@ -550,3 +550,7 @@ you can now do this:
|
|||
IndexReader.openIfChanged (a static method), and now returns null
|
||||
(instead of the old reader) if there are no changes to the index, to
|
||||
prevent the common pitfall of accidentally closing the old reader.
|
||||
|
||||
* LUCENE-3687: Similarity#computeNorm() now expects a Norm object to set the computed
|
||||
norm value instead of returning a fixed single byte value. Custom similarities can now
|
||||
set integer, float and byte values if a single byte is not sufficient.
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
|
@ -48,7 +49,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleByteSource;
|
||||
import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleValueSource;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -1157,8 +1158,9 @@ public class MemoryIndex {
|
|||
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
|
||||
float boost = info != null ? info.getBoost() : 1.0f;
|
||||
FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost);
|
||||
byte norm = fieldSim.computeNorm(invertState);
|
||||
SingleByteSource singleByteSource = new SingleByteSource(new byte[] {norm});
|
||||
Norm norm = new Norm();
|
||||
fieldSim.computeNorm(invertState, norm);
|
||||
SingleValueSource singleByteSource = new SingleValueSource(norm);
|
||||
norms = new MemoryIndexNormDocValues(singleByteSource);
|
||||
// cache it for future reuse
|
||||
cachedNormValues = norms;
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.index.memory;
|
|||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -51,21 +52,56 @@ class MemoryIndexNormDocValues extends DocValues {
|
|||
return 1;
|
||||
}
|
||||
|
||||
public static class SingleByteSource extends Source {
|
||||
public static class SingleValueSource extends Source {
|
||||
|
||||
private final byte[] bytes;
|
||||
private final Number numericValue;
|
||||
private final BytesRef binaryValue;
|
||||
|
||||
protected SingleByteSource(byte[] bytes) {
|
||||
super(Type.BYTES_FIXED_STRAIGHT);
|
||||
this.bytes = bytes;
|
||||
protected SingleValueSource(Norm norm) {
|
||||
super(norm.type());
|
||||
this.numericValue = norm.field().numericValue();
|
||||
this.binaryValue = norm.field().binaryValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
switch (type) {
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
assert numericValue != null;
|
||||
return numericValue.longValue();
|
||||
}
|
||||
return super.getInt(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getFloat(int docID) {
|
||||
switch (type) {
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
assert numericValue != null;
|
||||
return numericValue.floatValue();
|
||||
}
|
||||
return super.getFloat(docID);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytes(int docID, BytesRef ref) {
|
||||
ref.bytes = bytes;
|
||||
ref.offset = docID;
|
||||
ref.length = 1;
|
||||
return ref;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
assert binaryValue != null;
|
||||
ref.copyBytes(binaryValue);
|
||||
return ref;
|
||||
}
|
||||
return super.getBytes(docID, ref);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -75,9 +111,33 @@ class MemoryIndexNormDocValues extends DocValues {
|
|||
|
||||
@Override
|
||||
public Object getArray() {
|
||||
return bytes;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return binaryValue.bytes;
|
||||
case FIXED_INTS_16:
|
||||
return new short[] { numericValue.shortValue() };
|
||||
case FIXED_INTS_32:
|
||||
return new int[] { numericValue.intValue() };
|
||||
case FIXED_INTS_64:
|
||||
return new long[] { numericValue.longValue() };
|
||||
case FIXED_INTS_8:
|
||||
return new byte[] { numericValue.byteValue() };
|
||||
case VAR_INTS:
|
||||
return new long[] { numericValue.longValue() };
|
||||
case FLOAT_32:
|
||||
return new float[] { numericValue.floatValue() };
|
||||
case FLOAT_64:
|
||||
return new double[] { numericValue.doubleValue() };
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown type " + type);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.misc;
|
|||
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Norm;
|
||||
|
||||
/**
|
||||
* A similarity with a lengthNorm that provides for a "plateau" of
|
||||
|
@ -106,7 +107,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
* discountOverlaps is true by default or true for this
|
||||
* specific field. */
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
final int numTokens;
|
||||
|
||||
if (discountOverlaps)
|
||||
|
@ -114,7 +115,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
else
|
||||
numTokens = state.getLength();
|
||||
|
||||
return encodeNormValue(state.getBoost() * computeLengthNorm(numTokens));
|
||||
norm.setByte(encodeNormValue(state.getBoost() * computeLengthNorm(numTokens)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,12 +24,24 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
|
||||
/**
|
||||
* Test of the SweetSpotSimilarity
|
||||
*/
|
||||
public class SweetSpotSimilarityTest extends LuceneTestCase {
|
||||
|
||||
public static float computeAndDecodeNorm(SweetSpotSimilarity decode, Similarity encode, FieldInvertState state) {
|
||||
return decode.decodeNormValue(computeAndGetNorm(encode, state));
|
||||
}
|
||||
|
||||
public static byte computeAndGetNorm(Similarity s, FieldInvertState state) {
|
||||
Norm norm = new Norm();
|
||||
s.computeNorm(state, norm);
|
||||
return norm.field().numericValue().byteValue();
|
||||
}
|
||||
|
||||
public void testSweetSpotComputeNorm() {
|
||||
|
||||
|
@ -45,9 +57,13 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
invertState.setBoost(1.0f);
|
||||
for (int i = 1; i < 1000; i++) {
|
||||
invertState.setLength(i);
|
||||
Norm lNorm = new Norm();
|
||||
Norm rNorm = new Norm();
|
||||
d.computeNorm(invertState, lNorm);
|
||||
s.computeNorm(invertState, rNorm);
|
||||
assertEquals("base case: i="+i,
|
||||
d.computeNorm(invertState),
|
||||
s.computeNorm(invertState),
|
||||
computeAndGetNorm(d, invertState),
|
||||
computeAndGetNorm(s, invertState),
|
||||
0.0f);
|
||||
}
|
||||
|
||||
|
@ -59,15 +75,15 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
invertState.setLength(i);
|
||||
assertEquals("3,10: spot i="+i,
|
||||
1.0f,
|
||||
ss.decodeNormValue(s.computeNorm(invertState)),
|
||||
computeAndDecodeNorm(ss, ss, invertState),
|
||||
0.0f);
|
||||
}
|
||||
|
||||
for (int i = 10; i < 1000; i++) {
|
||||
invertState.setLength(i-9);
|
||||
final byte normD = d.computeNorm(invertState);
|
||||
final byte normD = computeAndGetNorm(d, invertState);
|
||||
invertState.setLength(i);
|
||||
final byte normS = s.computeNorm(invertState);
|
||||
final byte normS = computeAndGetNorm(s, invertState);
|
||||
assertEquals("3,10: 10<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -105,14 +121,14 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
invertState.setLength(i);
|
||||
assertEquals("f: 3,10: spot i="+i,
|
||||
1.0f,
|
||||
ss.decodeNormValue(sp.get("foo").computeNorm(invertState)),
|
||||
computeAndDecodeNorm(ss, sp.get("foo"), invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 10; i < 1000; i++) {
|
||||
invertState.setLength(i-9);
|
||||
final byte normD = d.computeNorm(invertState);
|
||||
final byte normD = computeAndGetNorm(d, invertState);
|
||||
invertState.setLength(i);
|
||||
final byte normS = sp.get("foo").computeNorm(invertState);
|
||||
final byte normS = computeAndGetNorm(sp.get("foo"), invertState);
|
||||
assertEquals("f: 3,10: 10<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -122,21 +138,21 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
invertState.setLength(i);
|
||||
assertEquals("f: 8,13: spot i="+i,
|
||||
1.0f,
|
||||
ss.decodeNormValue(sp.get("bar").computeNorm(invertState)),
|
||||
computeAndDecodeNorm(ss, sp.get("bar"), invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 6; i <=9; i++) {
|
||||
invertState.setLength(i);
|
||||
assertEquals("f: 6,9: spot i="+i,
|
||||
1.0f,
|
||||
ss.decodeNormValue(sp.get("yak").computeNorm(invertState)),
|
||||
computeAndDecodeNorm(ss, sp.get("yak"), invertState),
|
||||
0.0f);
|
||||
}
|
||||
for (int i = 13; i < 1000; i++) {
|
||||
invertState.setLength(i-12);
|
||||
final byte normD = d.computeNorm(invertState);
|
||||
final byte normD = computeAndGetNorm(d, invertState);
|
||||
invertState.setLength(i);
|
||||
final byte normS = sp.get("bar").computeNorm(invertState);
|
||||
final byte normS = computeAndGetNorm(sp.get("bar"), invertState);
|
||||
assertEquals("f: 8,13: 13<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -144,9 +160,9 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
}
|
||||
for (int i = 9; i < 1000; i++) {
|
||||
invertState.setLength(i-8);
|
||||
final byte normD = d.computeNorm(invertState);
|
||||
final byte normD = computeAndGetNorm(d, invertState);
|
||||
invertState.setLength(i);
|
||||
final byte normS = sp.get("yak").computeNorm(invertState);
|
||||
final byte normS = computeAndGetNorm(sp.get("yak"), invertState);
|
||||
assertEquals("f: 6,9: 9<x : i="+i,
|
||||
normD,
|
||||
normS,
|
||||
|
@ -158,8 +174,8 @@ public class SweetSpotSimilarityTest extends LuceneTestCase {
|
|||
|
||||
for (int i = 9; i < 1000; i++) {
|
||||
invertState.setLength(i);
|
||||
final byte normSS = sp.get("a").computeNorm(invertState);
|
||||
final byte normS = sp.get("b").computeNorm(invertState);
|
||||
final byte normSS = computeAndGetNorm(sp.get("a"), invertState);
|
||||
final byte normS = computeAndGetNorm(sp.get("b"), invertState);
|
||||
assertTrue("s: i="+i+" : a="+normSS+
|
||||
" < b="+normS,
|
||||
normSS < normS);
|
||||
|
|
|
@ -115,23 +115,13 @@ public abstract class DocValuesConsumer {
|
|||
final Field scratchField;
|
||||
switch(type) {
|
||||
case VAR_INTS:
|
||||
scratchField = new DocValuesField("", (long) 0, type);
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
scratchField = new DocValuesField("", (short) 0, type);
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
scratchField = new DocValuesField("", 0, type);
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
scratchField = new DocValuesField("", (long) 0, type);
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
scratchField = new DocValuesField("", (byte) 0, type);
|
||||
scratchField = new DocValuesField("", (long) 0, type);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
scratchField = new DocValuesField("", (float) 0, type);
|
||||
break;
|
||||
case FLOAT_64:
|
||||
scratchField = new DocValuesField("", (double) 0, type);
|
||||
break;
|
||||
|
|
|
@ -52,7 +52,10 @@ public abstract class PerDocConsumer implements Closeable {
|
|||
for (int i = 0; i < docValues.length; i++) {
|
||||
docValues[i] = getDocValuesForMerge(mergeState.readers.get(i).reader, fieldInfo);
|
||||
}
|
||||
final DocValuesConsumer docValuesConsumer = addValuesField(getDocValuesType(fieldInfo), fieldInfo);
|
||||
Type docValuesType = getDocValuesType(fieldInfo);
|
||||
assert docValuesType != null;
|
||||
|
||||
final DocValuesConsumer docValuesConsumer = addValuesField(docValuesType, fieldInfo);
|
||||
assert docValuesConsumer != null;
|
||||
docValuesConsumer.merge(mergeState, docValues);
|
||||
}
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.codecs.PostingsFormat;
|
|||
import org.apache.lucene.codecs.SegmentInfosFormat;
|
||||
import org.apache.lucene.codecs.StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.TermVectorsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40SegmentInfosFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40StoredFieldsFormat;
|
||||
import org.apache.lucene.codecs.lucene40.Lucene40TermVectorsFormat;
|
||||
|
@ -56,7 +55,7 @@ public class Lucene3xCodec extends Codec {
|
|||
private final TermVectorsFormat vectorsFormat = new Lucene40TermVectorsFormat();
|
||||
|
||||
// TODO: this should really be a different impl
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene40FieldInfosFormat();
|
||||
private final FieldInfosFormat fieldInfosFormat = new Lucene3xFieldInfosFormat();
|
||||
|
||||
// TODO: this should really be a different impl
|
||||
// also if we want preflex to *really* be read-only it should throw exception for the writer?
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.codecs.lucene3x;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene3xFieldInfosFormat extends FieldInfosFormat {
|
||||
private final FieldInfosReader reader = new Lucene3xFieldInfosReader();
|
||||
|
||||
@Override
|
||||
public FieldInfosReader getFieldInfosReader() throws IOException {
|
||||
return reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
||||
throw new IllegalArgumentException("this codec can only be used for reading");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
Lucene3xFieldInfosReader.files(dir, info, files);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
package org.apache.lucene.codecs.lucene3x;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class Lucene3xFieldInfosReader extends FieldInfosReader {
|
||||
/** Extension of field infos */
|
||||
static final String FIELD_INFOS_EXTENSION = "fnm";
|
||||
|
||||
// First used in 2.9; prior to 2.9 there was no format header
|
||||
static final int FORMAT_START = -2;
|
||||
// First used in 3.4: omit only positional information
|
||||
static final int FORMAT_OMIT_POSITIONS = -3;
|
||||
static final int FORMAT_MINIMUM = FORMAT_START;
|
||||
static final int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS;
|
||||
static final byte IS_INDEXED = 0x1;
|
||||
static final byte STORE_TERMVECTOR = 0x2;
|
||||
static final byte OMIT_NORMS = 0x10;
|
||||
static final byte STORE_PAYLOADS = 0x20;
|
||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||
static final byte OMIT_POSITIONS = -128;
|
||||
|
||||
@Override
|
||||
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
|
||||
IndexInput input = directory.openInput(fileName, iocontext);
|
||||
|
||||
boolean hasVectors = false;
|
||||
boolean hasFreq = false;
|
||||
boolean hasProx = false;
|
||||
|
||||
try {
|
||||
final int format = input.readVInt();
|
||||
|
||||
if (format > FORMAT_MINIMUM) {
|
||||
throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
}
|
||||
if (format < FORMAT_CURRENT) {
|
||||
throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT);
|
||||
}
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
FieldInfo infos[] = new FieldInfo[size];
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = i;
|
||||
byte bits = input.readByte();
|
||||
boolean isIndexed = (bits & IS_INDEXED) != 0;
|
||||
boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & STORE_PAYLOADS) != 0;
|
||||
final IndexOptions indexOptions;
|
||||
if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_ONLY;
|
||||
} else if ((bits & OMIT_POSITIONS) != 0) {
|
||||
if (format <= FORMAT_OMIT_POSITIONS) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else {
|
||||
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
|
||||
}
|
||||
} else {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
|
||||
// LUCENE-3027: past indices were able to write
|
||||
// storePayloads=true when omitTFAP is also true,
|
||||
// which is invalid. We correct that, here:
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
storePayloads = false;
|
||||
}
|
||||
hasVectors |= storeTermVector;
|
||||
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms? Type.BYTES_VAR_STRAIGHT : null);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
return new FieldInfos(infos, hasFreq, hasProx, hasVectors);
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", FIELD_INFOS_EXTENSION));
|
||||
}
|
||||
}
|
|
@ -75,7 +75,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
try {
|
||||
long nextNormSeek = NORMS_HEADER.length; //skip header (header unused for now)
|
||||
for (FieldInfo fi : fields) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
if (fi.normsPresent()) {
|
||||
String fileName = getNormFilename(segmentName, normGen, fi.number);
|
||||
Directory d = hasSeparateNorms(normGen, fi.number) ? separateNormsDir : dir;
|
||||
|
||||
|
@ -161,7 +161,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
|
||||
static final class NormSource extends Source {
|
||||
protected NormSource(byte[] bytes) {
|
||||
super(Type.BYTES_FIXED_STRAIGHT);
|
||||
super(Type.FIXED_INTS_8);
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
|
@ -175,6 +175,11 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
return ref;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
return bytes[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
return true;
|
||||
|
@ -192,6 +197,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
// like first FI that has norms but doesn't have separate norms?
|
||||
final String normsFileName = IndexFileNames.segmentFileName(info.name, "", NORMS_EXTENSION);
|
||||
if (dir.fileExists(normsFileName)) {
|
||||
// only needed to do this in 3x - 4x can decide if the norms are present
|
||||
files.add(normsFileName);
|
||||
}
|
||||
}
|
||||
|
@ -231,7 +237,7 @@ class Lucene3xNormsProducer extends PerDocProducer {
|
|||
|
||||
@Override
|
||||
public Type type() {
|
||||
return Type.BYTES_FIXED_STRAIGHT;
|
||||
return Type.FIXED_INTS_8;
|
||||
}
|
||||
|
||||
byte[] bytes() throws IOException {
|
||||
|
|
|
@ -65,7 +65,7 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
|
|||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = format <= Lucene40FieldInfosWriter.FORMAT_FLEX? input.readInt():i;
|
||||
final int fieldNumber = input.readInt();
|
||||
byte bits = input.readByte();
|
||||
boolean isIndexed = (bits & Lucene40FieldInfosWriter.IS_INDEXED) != 0;
|
||||
boolean storeTermVector = (bits & Lucene40FieldInfosWriter.STORE_TERMVECTOR) != 0;
|
||||
|
@ -75,12 +75,8 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
|
|||
if ((bits & Lucene40FieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_ONLY;
|
||||
} else if ((bits & Lucene40FieldInfosWriter.OMIT_POSITIONS) != 0) {
|
||||
if (format <= Lucene40FieldInfosWriter.FORMAT_OMIT_POSITIONS) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else {
|
||||
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
|
||||
}
|
||||
} else if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX && (bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else if ((bits & Lucene40FieldInfosWriter.STORE_OFFSETS_IN_POSTINGS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
} else {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
|
@ -95,59 +91,12 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
|
|||
hasVectors |= storeTermVector;
|
||||
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
DocValues.Type docValuesType = null;
|
||||
if (format <= Lucene40FieldInfosWriter.FORMAT_FLEX) {
|
||||
final byte b = input.readByte();
|
||||
switch(b) {
|
||||
case 0:
|
||||
docValuesType = null;
|
||||
break;
|
||||
case 1:
|
||||
docValuesType = DocValues.Type.VAR_INTS;
|
||||
break;
|
||||
case 2:
|
||||
docValuesType = DocValues.Type.FLOAT_32;
|
||||
break;
|
||||
case 3:
|
||||
docValuesType = DocValues.Type.FLOAT_64;
|
||||
break;
|
||||
case 4:
|
||||
docValuesType = DocValues.Type.BYTES_FIXED_STRAIGHT;
|
||||
break;
|
||||
case 5:
|
||||
docValuesType = DocValues.Type.BYTES_FIXED_DEREF;
|
||||
break;
|
||||
case 6:
|
||||
docValuesType = DocValues.Type.BYTES_VAR_STRAIGHT;
|
||||
break;
|
||||
case 7:
|
||||
docValuesType = DocValues.Type.BYTES_VAR_DEREF;
|
||||
break;
|
||||
case 8:
|
||||
docValuesType = DocValues.Type.FIXED_INTS_16;
|
||||
break;
|
||||
case 9:
|
||||
docValuesType = DocValues.Type.FIXED_INTS_32;
|
||||
break;
|
||||
case 10:
|
||||
docValuesType = DocValues.Type.FIXED_INTS_64;
|
||||
break;
|
||||
case 11:
|
||||
docValuesType = DocValues.Type.FIXED_INTS_8;
|
||||
break;
|
||||
case 12:
|
||||
docValuesType = DocValues.Type.BYTES_FIXED_SORTED;
|
||||
break;
|
||||
case 13:
|
||||
docValuesType = DocValues.Type.BYTES_VAR_SORTED;
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||
}
|
||||
}
|
||||
// DV Types are packed in one byte
|
||||
byte val = input.readByte();
|
||||
final DocValues.Type docValuesType = getDocValuesType((byte) (val & 0x0F));
|
||||
final DocValues.Type normsType = getDocValuesType((byte) ((val >>> 4) & 0x0F));
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, docValuesType);
|
||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
|
@ -159,6 +108,42 @@ public class Lucene40FieldInfosReader extends FieldInfosReader {
|
|||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public DocValues.Type getDocValuesType(
|
||||
final byte b) {
|
||||
switch(b) {
|
||||
case 0:
|
||||
return null;
|
||||
case 1:
|
||||
return DocValues.Type.VAR_INTS;
|
||||
case 2:
|
||||
return DocValues.Type.FLOAT_32;
|
||||
case 3:
|
||||
return DocValues.Type.FLOAT_64;
|
||||
case 4:
|
||||
return DocValues.Type.BYTES_FIXED_STRAIGHT;
|
||||
case 5:
|
||||
return DocValues.Type.BYTES_FIXED_DEREF;
|
||||
case 6:
|
||||
return DocValues.Type.BYTES_VAR_STRAIGHT;
|
||||
case 7:
|
||||
return DocValues.Type.BYTES_VAR_DEREF;
|
||||
case 8:
|
||||
return DocValues.Type.FIXED_INTS_16;
|
||||
case 9:
|
||||
return DocValues.Type.FIXED_INTS_32;
|
||||
case 10:
|
||||
return DocValues.Type.FIXED_INTS_64;
|
||||
case 11:
|
||||
return DocValues.Type.FIXED_INTS_8;
|
||||
case 12:
|
||||
return DocValues.Type.BYTES_FIXED_SORTED;
|
||||
case 13:
|
||||
return DocValues.Type.BYTES_VAR_SORTED;
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + b);
|
||||
}
|
||||
}
|
||||
|
||||
public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", Lucene40FieldInfosWriter.FIELD_INFOS_EXTENSION));
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene40;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -35,15 +36,11 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter {
|
|||
/** Extension of field infos */
|
||||
static final String FIELD_INFOS_EXTENSION = "fnm";
|
||||
|
||||
// First used in 2.9; prior to 2.9 there was no format header
|
||||
static final int FORMAT_START = -2;
|
||||
// First used in 3.4: omit only positional information
|
||||
static final int FORMAT_OMIT_POSITIONS = -3;
|
||||
// per-field codec support, records index values for fields
|
||||
static final int FORMAT_FLEX = -4;
|
||||
static final int FORMAT_START = -4;
|
||||
|
||||
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||
static final int FORMAT_CURRENT = FORMAT_FLEX;
|
||||
static final int FORMAT_CURRENT = FORMAT_START;
|
||||
|
||||
static final byte IS_INDEXED = 0x1;
|
||||
static final byte STORE_TERMVECTOR = 0x2;
|
||||
|
@ -78,60 +75,53 @@ public class Lucene40FieldInfosWriter extends FieldInfosWriter {
|
|||
output.writeInt(fi.number);
|
||||
output.writeByte(bits);
|
||||
|
||||
final byte b;
|
||||
|
||||
if (!fi.hasDocValues()) {
|
||||
b = 0;
|
||||
} else {
|
||||
switch(fi.getDocValuesType()) {
|
||||
case VAR_INTS:
|
||||
b = 1;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
b = 2;
|
||||
break;
|
||||
case FLOAT_64:
|
||||
b = 3;
|
||||
break;
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
b = 4;
|
||||
break;
|
||||
case BYTES_FIXED_DEREF:
|
||||
b = 5;
|
||||
break;
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
b = 6;
|
||||
break;
|
||||
case BYTES_VAR_DEREF:
|
||||
b = 7;
|
||||
break;
|
||||
case FIXED_INTS_16:
|
||||
b = 8;
|
||||
break;
|
||||
case FIXED_INTS_32:
|
||||
b = 9;
|
||||
break;
|
||||
case FIXED_INTS_64:
|
||||
b = 10;
|
||||
break;
|
||||
case FIXED_INTS_8:
|
||||
b = 11;
|
||||
break;
|
||||
case BYTES_FIXED_SORTED:
|
||||
b = 12;
|
||||
break;
|
||||
case BYTES_VAR_SORTED:
|
||||
b = 13;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + fi.getDocValuesType());
|
||||
}
|
||||
}
|
||||
output.writeByte(b);
|
||||
// pack the DV types in one byte
|
||||
final byte dv = docValuesByte(fi.getDocValuesType());
|
||||
final byte nrm = docValuesByte(fi.getNormType());
|
||||
assert (dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0;
|
||||
byte val = (byte) (0xff & ((nrm << 4) | dv));
|
||||
output.writeByte(val);
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
public byte docValuesByte(Type type) {
|
||||
if (type == null) {
|
||||
return 0;
|
||||
} else {
|
||||
switch(type) {
|
||||
case VAR_INTS:
|
||||
return 1;
|
||||
case FLOAT_32:
|
||||
return 2;
|
||||
case FLOAT_64:
|
||||
return 3;
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
return 4;
|
||||
case BYTES_FIXED_DEREF:
|
||||
return 5;
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
return 6;
|
||||
case BYTES_VAR_DEREF:
|
||||
return 7;
|
||||
case FIXED_INTS_16:
|
||||
return 8;
|
||||
case FIXED_INTS_32:
|
||||
return 9;
|
||||
case FIXED_INTS_64:
|
||||
return 10;
|
||||
case FIXED_INTS_8:
|
||||
return 11;
|
||||
case BYTES_FIXED_SORTED:
|
||||
return 12;
|
||||
case BYTES_VAR_SORTED:
|
||||
return 13;
|
||||
default:
|
||||
throw new IllegalStateException("unhandled indexValues type " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -72,12 +72,12 @@ public class Lucene40NormsFormat extends NormsFormat {
|
|||
|
||||
@Override
|
||||
protected boolean canLoad(FieldInfo info) {
|
||||
return !info.omitNorms && info.isIndexed;
|
||||
return info.normsPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return Type.BYTES_FIXED_STRAIGHT;
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -102,23 +102,24 @@ public class Lucene40NormsFormat extends NormsFormat {
|
|||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return !info.omitNorms && info.isIndexed;
|
||||
return info.normsPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return Type.BYTES_FIXED_STRAIGHT;
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
public static void files(Directory dir, SegmentInfo segmentInfo, Set<String> files) throws IOException {
|
||||
// see the comment in all the other codecs... its bogus that we do fileExists here, but its
|
||||
// a harder problem since fieldinfos are never 'cleaned'
|
||||
final String normsFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_EXTENSION);
|
||||
if (dir.fileExists(normsFileName)) {
|
||||
final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
|
||||
assert dir.fileExists(normsEntriesFileName);
|
||||
files.add(normsFileName);
|
||||
files.add(normsEntriesFileName);
|
||||
FieldInfos fieldInfos = segmentInfo.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
final String normsEntriesFileName = IndexFileNames.segmentFileName(segmentInfo.name, NORMS_SEGMENT_SUFFIX, IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
|
||||
files.add(normsFileName);
|
||||
files.add(normsEntriesFileName);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -86,17 +86,18 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, NORMS);
|
||||
boolean omitNorms = !Boolean.parseBoolean(readString(NORMS.length, scratch));
|
||||
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, NORMS_TYPE);
|
||||
String nrmType = readString(NORMS_TYPE.length, scratch);
|
||||
final DocValues.Type normsType = docValuesType(nrmType);
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, DOCVALUES);
|
||||
String dvType = readString(DOCVALUES.length, scratch);
|
||||
final DocValues.Type docValuesType;
|
||||
final DocValues.Type docValuesType = docValuesType(dvType);
|
||||
|
||||
|
||||
if ("false".equals(dvType)) {
|
||||
docValuesType = null;
|
||||
} else {
|
||||
docValuesType = DocValues.Type.valueOf(dvType);
|
||||
}
|
||||
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
assert StringHelper.startsWith(scratch, INDEXOPTIONS);
|
||||
|
@ -107,7 +108,7 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, docValuesType);
|
||||
omitNorms, storePayloads, indexOptions, docValuesType, normsType);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
|
@ -119,6 +120,14 @@ public class SimpleTextFieldInfosReader extends FieldInfosReader {
|
|||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public DocValues.Type docValuesType(String dvType) {
|
||||
if ("false".equals(dvType)) {
|
||||
return null;
|
||||
} else {
|
||||
return DocValues.Type.valueOf(dvType);
|
||||
}
|
||||
}
|
||||
|
||||
private String readString(int offset, BytesRef scratch) {
|
||||
return new String(scratch.bytes, scratch.offset+offset, scratch.length-offset, IOUtils.CHARSET_UTF_8);
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.simpletext;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -48,6 +49,7 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
static final BytesRef STORETVOFF = new BytesRef(" term vector offsets ");
|
||||
static final BytesRef PAYLOADS = new BytesRef(" payloads ");
|
||||
static final BytesRef NORMS = new BytesRef(" norms ");
|
||||
static final BytesRef NORMS_TYPE = new BytesRef(" norms type ");
|
||||
static final BytesRef DOCVALUES = new BytesRef(" doc values ");
|
||||
static final BytesRef INDEXOPTIONS = new BytesRef(" index options ");
|
||||
|
||||
|
@ -88,12 +90,12 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
SimpleTextUtil.write(out, Boolean.toString(!fi.omitNorms), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, NORMS_TYPE);
|
||||
SimpleTextUtil.write(out, getDocValuesType(fi.getNormType()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, DOCVALUES);
|
||||
if (!fi.hasDocValues()) {
|
||||
SimpleTextUtil.write(out, "false", scratch);
|
||||
} else {
|
||||
SimpleTextUtil.write(out, fi.getDocValuesType().toString(), scratch);
|
||||
}
|
||||
SimpleTextUtil.write(out, getDocValuesType(fi.getDocValuesType()), scratch);
|
||||
SimpleTextUtil.writeNewline(out);
|
||||
|
||||
SimpleTextUtil.write(out, INDEXOPTIONS);
|
||||
|
@ -104,4 +106,8 @@ public class SimpleTextFieldInfosWriter extends FieldInfosWriter {
|
|||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static String getDocValuesType(DocValues.Type type) {
|
||||
return type == null ? "false" : type.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,17 +94,20 @@ public class SimpleTextNormsConsumer extends PerDocConsumer {
|
|||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return !info.omitNorms && info.isIndexed;
|
||||
return info.normsPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return Type.BYTES_FIXED_STRAIGHT;
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo)
|
||||
throws IOException {
|
||||
if (type != Type.FIXED_INTS_8) {
|
||||
throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type);
|
||||
}
|
||||
return new SimpleTextNormsDocValuesConsumer(fieldInfo);
|
||||
}
|
||||
|
||||
|
@ -131,10 +134,10 @@ public class SimpleTextNormsConsumer extends PerDocConsumer {
|
|||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.binaryValue());
|
||||
add(docID, docValue.numericValue().longValue());
|
||||
}
|
||||
|
||||
protected void add(int docID, BytesRef value) throws IOException {
|
||||
public void add(int docID, long value) {
|
||||
if (docIDs.length <= upto) {
|
||||
assert docIDs.length == upto;
|
||||
docIDs = ArrayUtil.grow(docIDs, 1 + upto);
|
||||
|
@ -143,8 +146,8 @@ public class SimpleTextNormsConsumer extends PerDocConsumer {
|
|||
assert norms.length == upto;
|
||||
norms = ArrayUtil.grow(norms, 1 + upto);
|
||||
}
|
||||
assert value.length == 1;
|
||||
norms[upto] = value.bytes[value.offset];
|
||||
norms[upto] = (byte) value;
|
||||
|
||||
docIDs[upto] = docID;
|
||||
upto++;
|
||||
}
|
||||
|
@ -281,7 +284,7 @@ public class SimpleTextNormsConsumer extends PerDocConsumer {
|
|||
FieldInfos fieldInfos = info.getFieldInfos();
|
||||
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (!fieldInfo.omitNorms && fieldInfo.isIndexed) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "",
|
||||
NORMS_EXTENSION));
|
||||
break;
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.codecs.PerDocProducer;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -95,11 +96,12 @@ public class SimpleTextNormsProducer extends PerDocProducer {
|
|||
}
|
||||
|
||||
static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
// TODO: This is what SI always did... but we can do this cleaner?
|
||||
// like first FI that has norms but doesn't have separate norms?
|
||||
final String normsFileName = IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION);
|
||||
if (dir.fileExists(normsFileName)) {
|
||||
files.add(normsFileName);
|
||||
FieldInfos fieldInfos = info.getFieldInfos();
|
||||
for (FieldInfo fieldInfo : fieldInfos) {
|
||||
if (fieldInfo.normsPresent()) {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", SimpleTextNormsConsumer.NORMS_EXTENSION));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,7 +132,7 @@ public class SimpleTextNormsProducer extends PerDocProducer {
|
|||
|
||||
@Override
|
||||
public Type type() {
|
||||
return Type.BYTES_FIXED_STRAIGHT;
|
||||
return Type.FIXED_INTS_8;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -141,7 +143,7 @@ public class SimpleTextNormsProducer extends PerDocProducer {
|
|||
|
||||
static final class Norm extends Source {
|
||||
protected Norm(byte[] bytes) {
|
||||
super(Type.BYTES_FIXED_STRAIGHT);
|
||||
super(Type.FIXED_INTS_8);
|
||||
this.bytes = bytes;
|
||||
}
|
||||
final byte bytes[];
|
||||
|
@ -153,6 +155,11 @@ public class SimpleTextNormsProducer extends PerDocProducer {
|
|||
ref.length = 1;
|
||||
return ref;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getInt(int docID) {
|
||||
return bytes[docID];
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasArray() {
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.index.IndexWriter; // javadocs
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.IndexableFieldType;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -383,13 +384,13 @@ public class Field implements IndexableField {
|
|||
* document.
|
||||
*
|
||||
* <p>The boost is used to compute the norm factor for the field. By
|
||||
* default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)} method,
|
||||
* default, in the {@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)} method,
|
||||
* the boost value is multiplied by the length normalization factor and then
|
||||
* rounded by {@link org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState, Norm)
|
||||
* @see org.apache.lucene.search.similarities.DefaultSimilarity#encodeNormValue(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
|
|
|
@ -671,7 +671,7 @@ public class CheckIndex {
|
|||
if (reader.normValues(info.name) != null) {
|
||||
throw new RuntimeException("field: " + info.name + " should omit norms but has them!");
|
||||
}
|
||||
if (info.isIndexed && !info.omitNorms) {
|
||||
if (info.normsPresent()) {
|
||||
throw new RuntimeException("field: " + info.name + " should have norms but omits them!");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -339,7 +339,7 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
perDocConsumer = docState.docWriter.codec.docValuesFormat().docsConsumer(perDocWriteState);
|
||||
}
|
||||
DocValuesConsumer docValuesConsumer = perDocConsumer.addValuesField(valueType, fieldInfo);
|
||||
fieldInfo.setDocValuesType(valueType);
|
||||
fieldInfo.setDocValuesType(valueType, false);
|
||||
|
||||
docValuesConsumerAndDocID = new DocValuesConsumerAndDocID(docValuesConsumer);
|
||||
docValuesConsumerAndDocID.docID = docState.docID;
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -24,11 +26,12 @@ public final class FieldInfo {
|
|||
public final int number;
|
||||
|
||||
public boolean isIndexed;
|
||||
private DocValues.Type docValues;
|
||||
private DocValues.Type docValueType;
|
||||
|
||||
// True if any document indexed term vectors
|
||||
public boolean storeTermVector;
|
||||
|
||||
private DocValues.Type normType;
|
||||
public boolean omitNorms; // omit norms associated with indexed fields
|
||||
public IndexOptions indexOptions;
|
||||
public boolean storePayloads; // whether this field stores payloads together with term positions
|
||||
|
@ -56,21 +59,23 @@ public final class FieldInfo {
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public FieldInfo(String name, boolean isIndexed, int number, boolean storeTermVector,
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normsType) {
|
||||
this.name = name;
|
||||
this.isIndexed = isIndexed;
|
||||
this.number = number;
|
||||
this.docValues = docValues;
|
||||
this.docValueType = docValues;
|
||||
if (isIndexed) {
|
||||
this.storeTermVector = storeTermVector;
|
||||
this.storePayloads = storePayloads;
|
||||
this.omitNorms = omitNorms;
|
||||
this.indexOptions = indexOptions;
|
||||
this.normType = !omitNorms ? normsType : null;
|
||||
} else { // for non-indexed fields, leave defaults
|
||||
this.storeTermVector = false;
|
||||
this.storePayloads = false;
|
||||
this.omitNorms = false;
|
||||
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
this.normType = null;
|
||||
}
|
||||
assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !storePayloads;
|
||||
}
|
||||
|
@ -78,7 +83,7 @@ public final class FieldInfo {
|
|||
@Override
|
||||
public Object clone() {
|
||||
return new FieldInfo(name, isIndexed, number, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, docValues);
|
||||
omitNorms, storePayloads, indexOptions, docValueType, normType);
|
||||
}
|
||||
|
||||
// should only be called by FieldInfos#addOrUpdate
|
||||
|
@ -109,27 +114,44 @@ public final class FieldInfo {
|
|||
assert this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads;
|
||||
}
|
||||
|
||||
void setDocValuesType(DocValues.Type v) {
|
||||
if (docValues == null) {
|
||||
docValues = v;
|
||||
}
|
||||
}
|
||||
|
||||
public void resetDocValuesType(DocValues.Type v) {
|
||||
if (docValues != null) {
|
||||
docValues = v;
|
||||
void setDocValuesType(DocValues.Type type, boolean force) {
|
||||
if (docValueType == null || force) {
|
||||
docValueType = type;
|
||||
} else if (type != docValueType) {
|
||||
throw new IllegalArgumentException("DocValues type already set to " + docValueType + " but was: " + type);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasDocValues() {
|
||||
return docValues != null;
|
||||
return docValueType != null;
|
||||
}
|
||||
|
||||
public DocValues.Type getDocValuesType() {
|
||||
return docValues;
|
||||
return docValueType;
|
||||
}
|
||||
|
||||
public DocValues.Type getNormType() {
|
||||
return normType;
|
||||
}
|
||||
|
||||
public void setStoreTermVectors() {
|
||||
storeTermVector = true;
|
||||
}
|
||||
|
||||
public void setNormValueType(Type type, boolean force) {
|
||||
if (normType == null || force) {
|
||||
normType = type;
|
||||
} else if (type != normType) {
|
||||
throw new IllegalArgumentException("Norm type already set to " + normType);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean omitNorms() {
|
||||
return omitNorms;
|
||||
}
|
||||
|
||||
public boolean normsPresent() {
|
||||
return isIndexed && !omitNorms && normType != null;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -268,7 +268,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
*/
|
||||
synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||
boolean omitNorms) {
|
||||
addOrUpdate(name, isIndexed, storeTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||
addOrUpdate(name, isIndexed, storeTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null);
|
||||
}
|
||||
|
||||
/** If the field is not yet known, adds it. If it is known, checks to make
|
||||
|
@ -284,8 +284,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
* @param indexOptions if term freqs should be omitted for this field
|
||||
*/
|
||||
synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector,
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
|
||||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
|
||||
return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
|
||||
}
|
||||
|
||||
// NOTE: this method does not carry over termVector
|
||||
|
@ -301,32 +301,37 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
// be updated by maybe FreqProxTermsWriterPerField:
|
||||
return addOrUpdateInternal(name, -1, fieldType.indexed(), false,
|
||||
fieldType.omitNorms(), false,
|
||||
fieldType.indexOptions(), null);
|
||||
fieldType.indexOptions(), null, null);
|
||||
}
|
||||
|
||||
synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed,
|
||||
boolean storeTermVector,
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues) {
|
||||
boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValues.Type docValues, DocValues.Type normType) {
|
||||
if (globalFieldNumbers == null) {
|
||||
throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos");
|
||||
}
|
||||
FieldInfo fi = fieldInfo(name);
|
||||
if (fi == null) {
|
||||
final int fieldNumber = nextFieldNumber(name, preferredFieldNumber);
|
||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues);
|
||||
fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
|
||||
} else {
|
||||
fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
|
||||
fi.setDocValuesType(docValues);
|
||||
if (docValues != null) {
|
||||
fi.setDocValuesType(docValues, true);
|
||||
}
|
||||
if (normType != null) {
|
||||
fi.setNormValueType(normType, true);
|
||||
}
|
||||
}
|
||||
version++;
|
||||
return fi;
|
||||
}
|
||||
|
||||
|
||||
synchronized public FieldInfo add(FieldInfo fi) {
|
||||
// IMPORTANT - reuse the field number if possible for consistent field numbers across segments
|
||||
return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector,
|
||||
fi.omitNorms, fi.storePayloads,
|
||||
fi.indexOptions, fi.getDocValuesType());
|
||||
fi.indexOptions, fi.getDocValuesType(), fi.getNormType());
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -334,12 +339,12 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
*/
|
||||
private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed,
|
||||
boolean storeTermVector, boolean omitNorms, boolean storePayloads,
|
||||
IndexOptions indexOptions, DocValues.Type docValuesType) {
|
||||
IndexOptions indexOptions, DocValues.Type docValuesType, DocValues.Type normType) {
|
||||
// don't check modifiable here since we use that to initially build up FIs
|
||||
if (globalFieldNumbers != null) {
|
||||
globalFieldNumbers.setIfNotSet(fieldNumber, name);
|
||||
}
|
||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType);
|
||||
final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normType);
|
||||
putInternal(fi);
|
||||
return fi;
|
||||
}
|
||||
|
|
|
@ -49,8 +49,9 @@ public class MultiDocValues extends DocValues {
|
|||
|
||||
public boolean stopLoadingOnNull(IndexReader reader, String field) throws IOException {
|
||||
// for norms we drop all norms if one leaf reader has no norms and the field is present
|
||||
Fields fields = reader.fields();
|
||||
return (fields != null && fields.terms(field) != null);
|
||||
FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
return fieldInfo != null && fieldInfo.omitNorms;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Stores the normalization value computed in
|
||||
* {@link Similarity#computeNorm(FieldInvertState, Norm)} per field.
|
||||
* Normalization values must be consistent within a single field, different
|
||||
* value types are not permitted within a single field. All values set must be
|
||||
* fixed size values ie. all values passed to {@link Norm#setBytes(BytesRef)}
|
||||
* must have the same length per field.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class Norm {
|
||||
private DocValuesField field;
|
||||
private BytesRef spare;
|
||||
|
||||
/**
|
||||
* Returns the {@link IndexableField} representation for this norm
|
||||
*/
|
||||
public IndexableField field() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link Type} for this norm.
|
||||
*/
|
||||
public Type type() {
|
||||
return field == null? null : field.fieldType().docValueType();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a spare {@link BytesRef}
|
||||
*/
|
||||
public BytesRef getSpare() {
|
||||
if (spare == null) {
|
||||
spare = new BytesRef();
|
||||
}
|
||||
return spare;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a float norm value
|
||||
*/
|
||||
public void setFloat(float norm) {
|
||||
setType(Type.FLOAT_32);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a double norm value
|
||||
*/
|
||||
public void setDouble(double norm) {
|
||||
setType(Type.FLOAT_64);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a short norm value
|
||||
*/
|
||||
public void setShort(short norm) {
|
||||
setType(Type.FIXED_INTS_16);
|
||||
this.field.setValue(norm);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a int norm value
|
||||
*/
|
||||
public void setInt(int norm) {
|
||||
setType(Type.FIXED_INTS_32);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a long norm value
|
||||
*/
|
||||
public void setLong(long norm) {
|
||||
setType(Type.FIXED_INTS_64);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a byte norm value
|
||||
*/
|
||||
public void setByte(byte norm) {
|
||||
setType(Type.FIXED_INTS_8);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a fixed byte array norm value
|
||||
*/
|
||||
public void setBytes(BytesRef norm) {
|
||||
setType(Type.BYTES_FIXED_STRAIGHT);
|
||||
this.field.setValue(norm);
|
||||
}
|
||||
|
||||
|
||||
private void setType(Type type) {
|
||||
if (field != null) {
|
||||
if (type != field.fieldType().docValueType()) {
|
||||
throw new IllegalArgumentException("FieldType missmatch - expected "+type+" but was " + field.fieldType().docValueType());
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
this.field = new DocValuesField("", new BytesRef(), type);
|
||||
break;
|
||||
|
||||
case FIXED_INTS_16:
|
||||
case FIXED_INTS_32:
|
||||
case FIXED_INTS_64:
|
||||
case FIXED_INTS_8:
|
||||
case VAR_INTS:
|
||||
this.field = new DocValuesField("", 0, type);
|
||||
break;
|
||||
case FLOAT_32:
|
||||
case FLOAT_64:
|
||||
this.field = new DocValuesField("", 0f, type);
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown Type: " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -24,9 +24,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
// TODO FI: norms could actually be stored as doc store
|
||||
|
@ -69,13 +67,12 @@ final class NormsConsumer extends InvertedDocEndConsumer {
|
|||
if (!fi.omitNorms) {
|
||||
if (toWrite != null && toWrite.initialized()) {
|
||||
anythingFlushed = true;
|
||||
toWrite.flush(state.numDocs);
|
||||
final Type type = toWrite.flush(state.numDocs);
|
||||
assert fi.getNormType() == type;
|
||||
} else if (fi.isIndexed) {
|
||||
anythingFlushed = true;
|
||||
final DocValuesConsumer valuesConsumer = newConsumer(new PerDocWriteState(state), fi);
|
||||
final DocValuesField value = new DocValuesField("", new BytesRef(new byte[] {0x0}), Type.BYTES_FIXED_STRAIGHT);
|
||||
valuesConsumer.add(state.numDocs-1, value);
|
||||
valuesConsumer.finish(state.numDocs);
|
||||
assert fi.getNormType() == null;
|
||||
fi.setNormValueType(null, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -107,12 +104,12 @@ final class NormsConsumer extends InvertedDocEndConsumer {
|
|||
}
|
||||
|
||||
DocValuesConsumer newConsumer(PerDocWriteState perDocWriteState,
|
||||
FieldInfo fieldInfo) throws IOException {
|
||||
FieldInfo fieldInfo, Type type) throws IOException {
|
||||
if (consumer == null) {
|
||||
consumer = normsFormat.docsConsumer(perDocWriteState);
|
||||
}
|
||||
DocValuesConsumer addValuesField = consumer.addValuesField(
|
||||
Type.BYTES_FIXED_STRAIGHT, fieldInfo);
|
||||
DocValuesConsumer addValuesField = consumer.addValuesField(type, fieldInfo);
|
||||
return addValuesField;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.document.DocValuesField;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -29,9 +30,9 @@ public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implem
|
|||
private final Similarity similarity;
|
||||
private final FieldInvertState fieldState;
|
||||
private DocValuesConsumer consumer;
|
||||
private final BytesRef spare = new BytesRef(1);
|
||||
private final DocValuesField value = new DocValuesField("", spare, Type.BYTES_FIXED_STRAIGHT);
|
||||
private final Norm norm;
|
||||
private final NormsConsumer parent;
|
||||
private Type initType;
|
||||
|
||||
public NormsConsumerPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo, NormsConsumer parent) {
|
||||
this.fieldInfo = fieldInfo;
|
||||
|
@ -39,10 +40,9 @@ public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implem
|
|||
docState = docInverterPerField.docState;
|
||||
fieldState = docInverterPerField.fieldState;
|
||||
similarity = docState.similarityProvider.get(fieldInfo.name);
|
||||
spare.length = 1;
|
||||
spare.offset = 0;
|
||||
|
||||
norm = new Norm();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(NormsConsumerPerField other) {
|
||||
return fieldInfo.name.compareTo(other.fieldInfo.name);
|
||||
|
@ -51,20 +51,33 @@ public class NormsConsumerPerField extends InvertedDocEndConsumerPerField implem
|
|||
@Override
|
||||
void finish() throws IOException {
|
||||
if (fieldInfo.isIndexed && !fieldInfo.omitNorms) {
|
||||
DocValuesConsumer consumer = getConsumer();
|
||||
spare.bytes[0] = similarity.computeNorm(fieldState);
|
||||
consumer.add(docState.docID, value);
|
||||
similarity.computeNorm(fieldState, norm);
|
||||
|
||||
if (norm.type() != null) {
|
||||
IndexableField field = norm.field();
|
||||
// some similarity might not compute any norms
|
||||
DocValuesConsumer consumer = getConsumer(norm.type());
|
||||
consumer.add(docState.docID, field);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void flush(int docCount) throws IOException {
|
||||
assert initialized();
|
||||
Type flush(int docCount) throws IOException {
|
||||
if (!initialized()) {
|
||||
return null; // null type - not omitted but not written
|
||||
}
|
||||
consumer.finish(docCount);
|
||||
return initType;
|
||||
}
|
||||
|
||||
private DocValuesConsumer getConsumer() throws IOException {
|
||||
private DocValuesConsumer getConsumer(Type type) throws IOException {
|
||||
if (consumer == null) {
|
||||
consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo);
|
||||
fieldInfo.setNormValueType(type, false);
|
||||
consumer = parent.newConsumer(docState.docWriter.newPerDocWriteState(""), fieldInfo, type);
|
||||
this.initType = type;
|
||||
}
|
||||
if (initType != type) {
|
||||
throw new IllegalArgumentException("NormTypes for field: " + fieldInfo.name + " doesn't match " + initType + " != " + type);
|
||||
}
|
||||
return consumer;
|
||||
}
|
||||
|
|
|
@ -191,9 +191,18 @@ final class SegmentMerger {
|
|||
}
|
||||
|
||||
private void mergeFieldInfos() throws IOException {
|
||||
mergeDocValuesAndNormsFieldInfos();
|
||||
// write the merged infos
|
||||
FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat()
|
||||
.getFieldInfosWriter();
|
||||
fieldInfosWriter.write(directory, segment, mergeState.fieldInfos, context);
|
||||
}
|
||||
|
||||
public void mergeDocValuesAndNormsFieldInfos() throws IOException {
|
||||
// mapping from all docvalues fields found to their promoted types
|
||||
// this is because FieldInfos does not store the valueSize
|
||||
Map<FieldInfo,TypePromoter> docValuesTypes = new HashMap<FieldInfo,TypePromoter>();
|
||||
Map<FieldInfo,TypePromoter> normValuesTypes = new HashMap<FieldInfo,TypePromoter>();
|
||||
|
||||
for (MergeState.IndexReaderAndLiveDocs readerAndLiveDocs : mergeState.readers) {
|
||||
final IndexReader reader = readerAndLiveDocs.reader;
|
||||
|
@ -205,28 +214,44 @@ final class SegmentMerger {
|
|||
TypePromoter previous = docValuesTypes.get(merged);
|
||||
docValuesTypes.put(merged, mergeDocValuesType(previous, reader.docValues(fi.name)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// update any promoted doc values types:
|
||||
for (Map.Entry<FieldInfo,TypePromoter> e : docValuesTypes.entrySet()) {
|
||||
FieldInfo fi = e.getKey();
|
||||
TypePromoter promoter = e.getValue();
|
||||
if (promoter == null) {
|
||||
fi.resetDocValuesType(null);
|
||||
} else {
|
||||
assert promoter != TypePromoter.getIdentityPromoter();
|
||||
if (fi.getDocValuesType() != promoter.type()) {
|
||||
// reset the type if we got promoted
|
||||
fi.resetDocValuesType(promoter.type());
|
||||
if (fi.normsPresent()) {
|
||||
TypePromoter previous = normValuesTypes.get(merged);
|
||||
normValuesTypes.put(merged, mergeDocValuesType(previous, reader.normValues(fi.name)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write the merged infos
|
||||
FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter();
|
||||
fieldInfosWriter.write(directory, segment, mergeState.fieldInfos, context);
|
||||
updatePromoted(normValuesTypes, true);
|
||||
updatePromoted(docValuesTypes, false);
|
||||
}
|
||||
|
||||
protected void updatePromoted(Map<FieldInfo,TypePromoter> infoAndPromoter, boolean norms) {
|
||||
// update any promoted doc values types:
|
||||
for (Map.Entry<FieldInfo,TypePromoter> e : infoAndPromoter.entrySet()) {
|
||||
FieldInfo fi = e.getKey();
|
||||
TypePromoter promoter = e.getValue();
|
||||
if (promoter == null) {
|
||||
if (norms) {
|
||||
fi.setNormValueType(null, true);
|
||||
} else {
|
||||
fi.setDocValuesType(null, true);
|
||||
}
|
||||
} else {
|
||||
assert promoter != TypePromoter.getIdentityPromoter();
|
||||
if (norms) {
|
||||
if (fi.getNormType() != promoter.type()) {
|
||||
// reset the type if we got promoted
|
||||
fi.setNormValueType(promoter.type(), true);
|
||||
}
|
||||
} else {
|
||||
if (fi.getDocValuesType() != promoter.type()) {
|
||||
// reset the type if we got promoted
|
||||
fi.setDocValuesType(promoter.type(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
@ -198,7 +198,7 @@ public final class SegmentReader extends IndexReader {
|
|||
public boolean hasNorms(String field) {
|
||||
ensureOpen();
|
||||
FieldInfo fi = core.fieldInfos.fieldInfo(field);
|
||||
return fi != null && fi.isIndexed && !fi.omitNorms;
|
||||
return fi.normsPresent();
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
@ -122,10 +123,11 @@ public class BM25Similarity extends Similarity {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public final byte computeNorm(FieldInvertState state) {
|
||||
public final void computeNorm(FieldInvertState state, Norm norm) {
|
||||
final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
|
||||
return encodeNormValue(state.getBoost(), numTerms);
|
||||
norm.setByte(encodeNormValue(state.getBoost(), numTerms));
|
||||
}
|
||||
|
||||
public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package org.apache.lucene.search.similarities;
|
||||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
|
@ -22,7 +23,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
|
||||
/** Expert: Default scoring implementation. */
|
||||
public class DefaultSimilarity extends TFIDFSimilarity {
|
||||
|
||||
|
||||
/** Implemented as
|
||||
* <code>state.getBoost()*lengthNorm(numTerms)</code>, where
|
||||
* <code>numTerms</code> is {@link FieldInvertState#getLength()} if {@link
|
||||
|
@ -32,13 +33,13 @@ public class DefaultSimilarity extends TFIDFSimilarity {
|
|||
*
|
||||
* @lucene.experimental */
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
final int numTerms;
|
||||
if (discountOverlaps)
|
||||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTerms = state.getLength();
|
||||
return encodeNormValue(state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms))));
|
||||
norm.setByte(encodeNormValue(state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)))));
|
||||
}
|
||||
|
||||
/** Implemented as <code>sqrt(freq)</code>. */
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
@ -40,8 +41,8 @@ public class MultiSimilarity extends Similarity {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return sims[0].computeNorm(state);
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
sims[0].computeNorm(state, norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.document.DocValuesField; // javadoc
|
|||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader; // javadoc
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.Terms; // javadoc
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
|
@ -36,7 +37,6 @@ import org.apache.lucene.search.TermStatistics;
|
|||
import org.apache.lucene.search.spans.SpanQuery; // javadoc
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat; // javadoc
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -55,8 +55,8 @@ import org.apache.lucene.util.TermContext;
|
|||
* <a href="#querytime">query-time</a>.
|
||||
* <p>
|
||||
* <a name="indextime"/>
|
||||
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState)}, allowing
|
||||
* the Similarity implementation to return a per-document byte for the field that will
|
||||
* At indexing time, the indexer calls {@link #computeNorm(FieldInvertState, Norm)}, allowing
|
||||
* the Similarity implementation to set a per-document value for the field that will
|
||||
* be later accessible via {@link IndexReader#normValues(String)}. Lucene makes no assumption
|
||||
* about what is in this byte, but it is most useful for encoding length normalization
|
||||
* information.
|
||||
|
@ -109,23 +109,24 @@ import org.apache.lucene.util.TermContext;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public abstract class Similarity {
|
||||
|
||||
/**
|
||||
* Computes the normalization value for a field, given the accumulated
|
||||
* state of term processing for this field (see {@link FieldInvertState}).
|
||||
*
|
||||
* <p>Implementations should calculate a byte value based on the field
|
||||
* state and then return that value.
|
||||
* <p>Implementations should calculate a norm value based on the field
|
||||
* state and set that value to the given {@link Norm}.
|
||||
*
|
||||
* <p>Matches in longer fields are less precise, so implementations of this
|
||||
* method usually return smaller values when <code>state.getLength()</code> is large,
|
||||
* method usually set smaller values when <code>state.getLength()</code> is large,
|
||||
* and larger values when <code>state.getLength()</code> is small.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*
|
||||
* @param state current processing state for this field
|
||||
* @return the calculated byte norm
|
||||
* @param norm holds the computed norm value when this method returns
|
||||
*/
|
||||
public abstract byte computeNorm(FieldInvertState state);
|
||||
public abstract void computeNorm(FieldInvertState state, Norm norm);
|
||||
|
||||
/**
|
||||
* Compute any collection-level stats (e.g. IDF, average document length, etc) needed for scoring a query.
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
|
@ -234,13 +235,13 @@ public abstract class SimilarityBase extends Similarity {
|
|||
|
||||
/** Encodes the document length in the same way as {@link TFIDFSimilarity}. */
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
final float numTerms;
|
||||
if (discountOverlaps)
|
||||
numTerms = state.getLength() - state.getNumOverlap();
|
||||
else
|
||||
numTerms = state.getLength() / state.getBoost();
|
||||
return encodeNormValue(state.getBoost(), numTerms);
|
||||
norm.setByte(encodeNormValue(state.getBoost(), numTerms));
|
||||
}
|
||||
|
||||
/** Decodes a normalization factor (document length) stored in an index.
|
||||
|
|
|
@ -22,14 +22,12 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
|
||||
|
|
|
@ -155,7 +155,7 @@ subclassing the Similarity, one can simply introduce a new basic model and tell
|
|||
matching term occurs. In these
|
||||
cases people have overridden Similarity to return 1 from the tf() method.</p></li>
|
||||
<li><p>Changing Length Normalization — By overriding
|
||||
{@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState state)},
|
||||
{@link org.apache.lucene.search.similarities.Similarity#computeNorm(FieldInvertState state, Norm)},
|
||||
it is possible to discount how the length of a field contributes
|
||||
to a score. In {@link org.apache.lucene.search.similarities.DefaultSimilarity},
|
||||
lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.codecs.preflexrw;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosFormat;
|
||||
import org.apache.lucene.codecs.NormsFormat;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene3x.Lucene3xCodec;
|
||||
|
@ -29,6 +30,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
public class PreFlexRWCodec extends Lucene3xCodec {
|
||||
private final PostingsFormat postings = new PreFlexRWPostingsFormat();
|
||||
private final NormsFormat norms = new PreFlexRWNormsFormat();
|
||||
private final FieldInfosFormat fieldInfos = new PreFlexRWFieldInfosFormat();
|
||||
|
||||
@Override
|
||||
public PostingsFormat postingsFormat() {
|
||||
|
@ -47,4 +49,13 @@ public class PreFlexRWCodec extends Lucene3xCodec {
|
|||
return super.normsFormat();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosFormat fieldInfosFormat() {
|
||||
if (LuceneTestCase.PREFLEX_IMPERSONATION_IS_ACTIVE) {
|
||||
return fieldInfos;
|
||||
} else {
|
||||
return super.fieldInfosFormat();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
package org.apache.lucene.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.codecs.lucene3x.Lucene3xFieldInfosFormat;
|
||||
|
||||
/**
|
||||
*
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PreFlexRWFieldInfosFormat extends Lucene3xFieldInfosFormat {
|
||||
|
||||
@Override
|
||||
public FieldInfosReader getFieldInfosReader() throws IOException {
|
||||
return new PreFlexRWFieldInfosReader();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfosWriter getFieldInfosWriter() throws IOException {
|
||||
return new PreFlexRWFieldInfosWriter();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package org.apache.lucene.codecs.preflexrw;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexFormatTooNewException;
|
||||
import org.apache.lucene.index.IndexFormatTooOldException;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PreFlexRWFieldInfosReader extends FieldInfosReader {
|
||||
static final int FORMAT_MINIMUM = PreFlexRWFieldInfosWriter.FORMAT_START;
|
||||
|
||||
@Override
|
||||
public FieldInfos read(Directory directory, String segmentName, IOContext iocontext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION);
|
||||
IndexInput input = directory.openInput(fileName, iocontext);
|
||||
|
||||
boolean hasVectors = false;
|
||||
boolean hasFreq = false;
|
||||
boolean hasProx = false;
|
||||
|
||||
try {
|
||||
final int format = input.readVInt();
|
||||
|
||||
if (format > FORMAT_MINIMUM) {
|
||||
throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
|
||||
}
|
||||
if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) {
|
||||
throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT);
|
||||
}
|
||||
|
||||
final int size = input.readVInt(); //read in the size
|
||||
FieldInfo infos[] = new FieldInfo[size];
|
||||
|
||||
for (int i = 0; i < size; i++) {
|
||||
String name = input.readString();
|
||||
final int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.readInt() : i;
|
||||
byte bits = input.readByte();
|
||||
boolean isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0;
|
||||
boolean storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0;
|
||||
boolean omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0;
|
||||
boolean storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0;
|
||||
final IndexOptions indexOptions;
|
||||
if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) {
|
||||
indexOptions = IndexOptions.DOCS_ONLY;
|
||||
} else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) {
|
||||
if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS;
|
||||
} else {
|
||||
throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")");
|
||||
}
|
||||
} else {
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
}
|
||||
|
||||
// LUCENE-3027: past indices were able to write
|
||||
// storePayloads=true when omitTFAP is also true,
|
||||
// which is invalid. We correct that, here:
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
storePayloads = false;
|
||||
}
|
||||
hasVectors |= storeTermVector;
|
||||
hasProx |= isIndexed && indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
hasFreq |= isIndexed && indexOptions != IndexOptions.DOCS_ONLY;
|
||||
|
||||
Type normType = isIndexed && !omitNorms ? Type.FIXED_INTS_8 : null;
|
||||
if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) {
|
||||
// RW can have norms but doesn't write them
|
||||
normType = input.readByte() != 0 ? Type.FIXED_INTS_8 : null;
|
||||
}
|
||||
|
||||
infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector,
|
||||
omitNorms, storePayloads, indexOptions, null, normType);
|
||||
}
|
||||
|
||||
if (input.getFilePointer() != input.length()) {
|
||||
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length() + " (resource: " + input + ")");
|
||||
}
|
||||
return new FieldInfos(infos, hasFreq, hasProx, hasVectors);
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static void files(Directory dir, SegmentInfo info, Set<String> files) throws IOException {
|
||||
files.add(IndexFileNames.segmentFileName(info.name, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package org.apache.lucene.codecs.preflexrw;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldInfosWriter;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PreFlexRWFieldInfosWriter extends FieldInfosWriter {
|
||||
// TODO move to test-framework preflex RW?
|
||||
|
||||
/** Extension of field infos */
|
||||
static final String FIELD_INFOS_EXTENSION = "fnm";
|
||||
|
||||
// First used in 2.9; prior to 2.9 there was no format header
|
||||
static final int FORMAT_START = -2;
|
||||
// First used in 3.4: omit only positional information
|
||||
static final int FORMAT_OMIT_POSITIONS = -3;
|
||||
|
||||
static final int FORMAT_PREFLEX_RW = Integer.MIN_VALUE;
|
||||
|
||||
// whenever you add a new format, make it 1 smaller (negative version logic)!
|
||||
static final int FORMAT_CURRENT = FORMAT_OMIT_POSITIONS;
|
||||
|
||||
static final byte IS_INDEXED = 0x1;
|
||||
static final byte STORE_TERMVECTOR = 0x2;
|
||||
static final byte OMIT_NORMS = 0x10;
|
||||
static final byte STORE_PAYLOADS = 0x20;
|
||||
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
|
||||
static final byte OMIT_POSITIONS = -128;
|
||||
|
||||
@Override
|
||||
public void write(Directory directory, String segmentName, FieldInfos infos, IOContext context) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(segmentName, "", FIELD_INFOS_EXTENSION);
|
||||
IndexOutput output = directory.createOutput(fileName, context);
|
||||
try {
|
||||
output.writeVInt(FORMAT_PREFLEX_RW);
|
||||
output.writeVInt(infos.size());
|
||||
for (FieldInfo fi : infos) {
|
||||
assert fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.storePayloads;
|
||||
byte bits = 0x0;
|
||||
if (fi.isIndexed) bits |= IS_INDEXED;
|
||||
if (fi.storeTermVector) bits |= STORE_TERMVECTOR;
|
||||
if (fi.omitNorms) bits |= OMIT_NORMS;
|
||||
if (fi.storePayloads) bits |= STORE_PAYLOADS;
|
||||
if (fi.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
bits |= OMIT_TERM_FREQ_AND_POSITIONS;
|
||||
} else if (fi.indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
bits |= OMIT_POSITIONS;
|
||||
}
|
||||
output.writeString(fi.name);
|
||||
/*
|
||||
* we need to write the field number since IW tries
|
||||
* to stabelize the field numbers across segments so the
|
||||
* FI ordinal is not necessarily equivalent to the field number
|
||||
*/
|
||||
output.writeInt(fi.number);
|
||||
output.writeByte(bits);
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
// to allow null norm types we need to indicate if norms are written
|
||||
// only in RW case
|
||||
output.writeByte((byte) (fi.getNormType() == null ? 0 : 1));
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
output.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
class PreFlexFieldsWriter extends FieldsConsumer {
|
||||
class PreFlexRWFieldsWriter extends FieldsConsumer {
|
||||
|
||||
private final TermInfosWriter termsOut;
|
||||
private final IndexOutput freqOut;
|
||||
|
@ -44,7 +44,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
|||
private final Lucene40SkipListWriter skipListWriter;
|
||||
private final int totalNumDocs;
|
||||
|
||||
public PreFlexFieldsWriter(SegmentWriteState state) throws IOException {
|
||||
public PreFlexRWFieldsWriter(SegmentWriteState state) throws IOException {
|
||||
termsOut = new TermInfosWriter(state.directory,
|
||||
state.segmentName,
|
||||
state.fieldInfos,
|
||||
|
@ -89,7 +89,7 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
|||
public TermsConsumer addField(FieldInfo field) throws IOException {
|
||||
assert field.number != -1;
|
||||
if (field.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
||||
throw new UnsupportedOperationException("this codec cannot index offsets");
|
||||
throw new IllegalArgumentException("this codec cannot index offsets");
|
||||
}
|
||||
//System.out.println("w field=" + field.name + " storePayload=" + field.storePayloads + " number=" + field.number);
|
||||
return new PreFlexTermsWriter(field);
|
||||
|
@ -164,7 +164,6 @@ class PreFlexFieldsWriter extends FieldsConsumer {
|
|||
assert proxOut != null;
|
||||
assert startOffset == -1;
|
||||
assert endOffset == -1;
|
||||
|
||||
//System.out.println(" w pos=" + position + " payl=" + payload);
|
||||
final int delta = position - lastPosition;
|
||||
lastPosition = position;
|
|
@ -22,9 +22,9 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
|
@ -34,14 +34,13 @@ import org.apache.lucene.store.IOContext;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Writes and Merges Lucene 3.x norms format
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class PreFlexNormsConsumer extends PerDocConsumer {
|
||||
class PreFlexRWNormsConsumer extends PerDocConsumer {
|
||||
|
||||
/** norms header placeholder */
|
||||
private static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
|
||||
|
@ -62,7 +61,7 @@ class PreFlexNormsConsumer extends PerDocConsumer {
|
|||
|
||||
private NormsWriter writer;
|
||||
|
||||
public PreFlexNormsConsumer(Directory directory, String segment, IOContext context){
|
||||
public PreFlexRWNormsConsumer(Directory directory, String segment, IOContext context){
|
||||
this.directory = directory;
|
||||
this.segment = segment;
|
||||
this.context = context;
|
||||
|
@ -79,10 +78,23 @@ class PreFlexNormsConsumer extends PerDocConsumer {
|
|||
writer.finish();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean canMerge(FieldInfo info) {
|
||||
return info.normsPresent();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Type getDocValuesType(FieldInfo info) {
|
||||
return info.getNormType();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocValuesConsumer addValuesField(Type type, FieldInfo fieldInfo)
|
||||
throws IOException {
|
||||
if (type != Type.FIXED_INTS_8) {
|
||||
throw new UnsupportedOperationException("Codec only supports single byte norm values. Type give: " + type);
|
||||
}
|
||||
return new Lucene3xNormsDocValuesConsumer(fieldInfo);
|
||||
}
|
||||
|
||||
|
@ -134,10 +146,10 @@ class PreFlexNormsConsumer extends PerDocConsumer {
|
|||
|
||||
@Override
|
||||
public void add(int docID, IndexableField docValue) throws IOException {
|
||||
add(docID, docValue.binaryValue());
|
||||
add(docID, docValue.numericValue().longValue());
|
||||
}
|
||||
|
||||
protected void add(int docID, BytesRef value) throws IOException {
|
||||
protected void add(int docID, long value) {
|
||||
if (docIDs.length <= upto) {
|
||||
assert docIDs.length == upto;
|
||||
docIDs = ArrayUtil.grow(docIDs, 1 + upto);
|
||||
|
@ -146,8 +158,7 @@ class PreFlexNormsConsumer extends PerDocConsumer {
|
|||
assert norms.length == upto;
|
||||
norms = ArrayUtil.grow(norms, 1 + upto);
|
||||
}
|
||||
assert value.length == 1;
|
||||
norms[upto] = value.bytes[value.offset];
|
||||
norms[upto] = (byte) value;
|
||||
|
||||
docIDs[upto] = docID;
|
||||
upto++;
|
||||
|
@ -217,7 +228,7 @@ class PreFlexNormsConsumer extends PerDocConsumer {
|
|||
public void merge(MergeState mergeState) throws IOException {
|
||||
int numMergedDocs = 0;
|
||||
for (FieldInfo fi : mergeState.fieldInfos) {
|
||||
if (fi.isIndexed && !fi.omitNorms) {
|
||||
if (fi.normsPresent()) {
|
||||
startField(fi);
|
||||
int numMergedDocsForField = 0;
|
||||
for (MergeState.IndexReaderAndLiveDocs reader : mergeState.readers) {
|
|
@ -21,11 +21,15 @@ import org.apache.lucene.codecs.PerDocConsumer;
|
|||
import org.apache.lucene.codecs.lucene3x.Lucene3xNormsFormat;
|
||||
import org.apache.lucene.index.PerDocWriteState;
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PreFlexRWNormsFormat extends Lucene3xNormsFormat {
|
||||
|
||||
@Override
|
||||
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
|
||||
return new PreFlexNormsConsumer(state.directory, state.segmentName, state.context);
|
||||
return new PreFlexRWNormsConsumer(state.directory, state.segmentName, state.context);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,7 +41,7 @@ public class PreFlexRWPostingsFormat extends Lucene3xPostingsFormat {
|
|||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
return new PreFlexFieldsWriter(state);
|
||||
return new PreFlexRWFieldsWriter(state);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,206 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Before;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class TestCustomNorms extends LuceneTestCase {
|
||||
final String floatTestField = "normsTestFloat";
|
||||
final String exceptionTestField = "normsTestExcp";
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
assumeFalse("cannot work with preflex codec", Codec.getDefault().getName()
|
||||
.equals("Lucene3x"));
|
||||
assumeFalse("cannot work with simple text codec", Codec.getDefault()
|
||||
.getName().equals("SimpleText"));
|
||||
|
||||
}
|
||||
|
||||
public void testFloatNorms() throws IOException {
|
||||
|
||||
MockDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
SimilarityProvider provider = new MySimProvider();
|
||||
config.setSimilarityProvider(provider);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = docs.nextDoc();
|
||||
float nextFloat = random.nextFloat();
|
||||
Field f = new Field(floatTestField, "" + nextFloat, TextField.TYPE_STORED);
|
||||
f.setBoost(nextFloat);
|
||||
|
||||
doc.add(f);
|
||||
writer.addDocument(doc);
|
||||
doc.removeField(floatTestField);
|
||||
if (rarely()) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
writer.commit();
|
||||
writer.close();
|
||||
IndexReader open = new SlowMultiReaderWrapper(IndexReader.open(dir));
|
||||
DocValues normValues = open.normValues(floatTestField);
|
||||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FLOAT_32, normValues.type());
|
||||
float[] norms = (float[]) source.getArray();
|
||||
for (int i = 0; i < open.maxDoc(); i++) {
|
||||
Document document = open.document(i);
|
||||
float expected = Float.parseFloat(document.get(floatTestField));
|
||||
assertEquals(expected, norms[i], 0.0f);
|
||||
}
|
||||
open.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testExceptionOnRandomType() throws IOException {
|
||||
MockDirectoryWrapper dir = newDirectory();
|
||||
dir.setCheckIndexOnClose(false); // can't set sim to checkindex yet
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
SimilarityProvider provider = new MySimProvider();
|
||||
config.setSimilarityProvider(provider);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
int num = atLeast(100);
|
||||
try {
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = docs.nextDoc();
|
||||
float nextFloat = random.nextFloat();
|
||||
Field f = new Field(exceptionTestField, "" + nextFloat,
|
||||
TextField.TYPE_STORED);
|
||||
f.setBoost(nextFloat);
|
||||
|
||||
doc.add(f);
|
||||
writer.addDocument(doc);
|
||||
doc.removeField(exceptionTestField);
|
||||
if (rarely()) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
fail("expected exception - incompatible types");
|
||||
} catch (IllegalArgumentException e) {
|
||||
// expected
|
||||
}
|
||||
writer.commit();
|
||||
writer.close();
|
||||
dir.close();
|
||||
|
||||
}
|
||||
|
||||
public class MySimProvider implements SimilarityProvider {
|
||||
SimilarityProvider delegate = new DefaultSimilarityProvider();
|
||||
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (floatTestField.equals(field)) {
|
||||
return new FloatEncodingBoostSimilarity();
|
||||
} else if (exceptionTestField.equals(field)) {
|
||||
return new RandomTypeSimilarity(random);
|
||||
} else {
|
||||
return delegate.get(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
public static class FloatEncodingBoostSimilarity extends DefaultSimilarity {
|
||||
|
||||
@Override
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
float boost = state.getBoost();
|
||||
norm.setFloat(boost);
|
||||
}
|
||||
}
|
||||
|
||||
public static class RandomTypeSimilarity extends DefaultSimilarity {
|
||||
|
||||
private final Random random;
|
||||
|
||||
public RandomTypeSimilarity(Random random) {
|
||||
this.random = random;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
float boost = state.getBoost();
|
||||
int nextInt = random.nextInt(10);
|
||||
switch (nextInt) {
|
||||
case 0:
|
||||
norm.setDouble((double) boost);
|
||||
break;
|
||||
case 1:
|
||||
norm.setFloat(boost);
|
||||
break;
|
||||
case 2:
|
||||
norm.setLong((long) boost);
|
||||
break;
|
||||
case 3:
|
||||
norm.setBytes(new BytesRef(new byte[6]));
|
||||
break;
|
||||
case 4:
|
||||
norm.setInt((int) boost);
|
||||
break;
|
||||
case 5:
|
||||
norm.setShort((short) boost);
|
||||
break;
|
||||
default:
|
||||
norm.setByte((byte) boost);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -141,7 +141,7 @@ public class TestFieldInfos extends LuceneTestCase {
|
|||
try {
|
||||
readOnly.addOrUpdate("bogus", random.nextBoolean(), random.nextBoolean(),
|
||||
random.nextBoolean(),
|
||||
random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null);
|
||||
random.nextBoolean(), random.nextBoolean() ? IndexOptions.DOCS_ONLY : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null);
|
||||
fail("instance should be read only");
|
||||
} catch (IllegalStateException e) {
|
||||
// expected
|
||||
|
|
|
@ -117,8 +117,8 @@ public class TestMaxTermFrequency extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue((float) state.getMaxTermFrequency());
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue((float) state.getMaxTermFrequency()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,20 +18,20 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.DocValues.Source;
|
||||
import org.apache.lucene.index.DocValues.Type;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
|
@ -39,7 +39,8 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
* separate norms, addDocument, addIndexes, forceMerge.
|
||||
*/
|
||||
public class TestNorms extends LuceneTestCase {
|
||||
|
||||
final String byteTestField = "normsTestByte";
|
||||
|
||||
class CustomNormEncodingSimilarity extends DefaultSimilarity {
|
||||
@Override
|
||||
public byte encodeNormValue(float f) {
|
||||
|
@ -52,8 +53,8 @@ public class TestNorms extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue((float) state.getLength());
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue((float) state.getLength()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,4 +94,160 @@ public class TestNorms extends LuceneTestCase {
|
|||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMaxByteNorms() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
buildIndex(dir, true);
|
||||
IndexReader open = new SlowMultiReaderWrapper(IndexReader.open(dir));
|
||||
DocValues normValues = open.normValues(byteTestField);
|
||||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.type());
|
||||
byte[] norms = (byte[]) source.getArray();
|
||||
for (int i = 0; i < open.maxDoc(); i++) {
|
||||
Document document = open.document(i);
|
||||
int expected = Integer.parseInt(document.get(byteTestField));
|
||||
assertEquals((byte)expected, norms[i]);
|
||||
}
|
||||
open.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* this test randomly creates segments with or without norms but not omitting
|
||||
* norms. The similarity used doesn't write a norm value if writeNorms = false is
|
||||
* passed. This differs from omitNorm since norms are simply not written for this segment
|
||||
* while merging fills in default values based on the Norm {@link Type}
|
||||
*/
|
||||
public void testNormsNotPresent() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
boolean firstWriteNorm = random.nextBoolean();
|
||||
buildIndex(dir, firstWriteNorm);
|
||||
|
||||
Directory otherDir = newDirectory();
|
||||
boolean secondWriteNorm = random.nextBoolean();
|
||||
buildIndex(otherDir, secondWriteNorm);
|
||||
|
||||
IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(otherDir));
|
||||
FieldInfos fieldInfos = reader.getFieldInfos();
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(byteTestField);
|
||||
assertFalse(fieldInfo.omitNorms);
|
||||
assertTrue(fieldInfo.isIndexed);
|
||||
if (secondWriteNorm) {
|
||||
assertTrue(fieldInfo.normsPresent());
|
||||
} else {
|
||||
assertFalse(fieldInfo.normsPresent());
|
||||
}
|
||||
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
writer.addIndexes(reader);
|
||||
IndexReader mergedReader = new SlowMultiReaderWrapper(writer.getReader());
|
||||
if (!firstWriteNorm && !secondWriteNorm) {
|
||||
DocValues normValues = mergedReader.normValues(byteTestField);
|
||||
assertNull(normValues);
|
||||
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
|
||||
assertFalse(fi.omitNorms);
|
||||
assertTrue(fi.isIndexed);
|
||||
assertFalse(fi.normsPresent());
|
||||
} else {
|
||||
FieldInfo fi = mergedReader.getFieldInfos().fieldInfo(byteTestField);
|
||||
assertFalse(fi.omitNorms);
|
||||
assertTrue(fi.isIndexed);
|
||||
assertTrue(fi.normsPresent());
|
||||
|
||||
DocValues normValues = mergedReader.normValues(byteTestField);
|
||||
assertNotNull(normValues);
|
||||
Source source = normValues.getSource();
|
||||
assertTrue(source.hasArray());
|
||||
assertEquals(Type.FIXED_INTS_8, normValues.type());
|
||||
byte[] norms = (byte[]) source.getArray();
|
||||
for (int i = 0; i < mergedReader.maxDoc(); i++) {
|
||||
Document document = mergedReader.document(i);
|
||||
int expected = Integer.parseInt(document.get(byteTestField));
|
||||
assertEquals((byte) expected, norms[i]);
|
||||
}
|
||||
}
|
||||
mergedReader.close();
|
||||
reader.close();
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
otherDir.close();
|
||||
}
|
||||
|
||||
public void buildIndex(Directory dir, boolean writeNorms) throws IOException,
|
||||
CorruptIndexException {
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random));
|
||||
SimilarityProvider provider = new MySimProvider(writeNorms);
|
||||
config.setSimilarityProvider(provider);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, config);
|
||||
final LineFileDocs docs = new LineFileDocs(random);
|
||||
int num = atLeast(100);
|
||||
for (int i = 0; i < num; i++) {
|
||||
Document doc = docs.nextDoc();
|
||||
int boost = writeNorms ? 1 + random.nextInt(255) : 0;
|
||||
Field f = new Field(byteTestField, "" + boost,
|
||||
TextField.TYPE_STORED);
|
||||
f.setBoost(boost);
|
||||
doc.add(f);
|
||||
writer.addDocument(doc);
|
||||
doc.removeField(byteTestField);
|
||||
if (rarely()) {
|
||||
writer.commit();
|
||||
}
|
||||
}
|
||||
writer.commit();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
|
||||
public class MySimProvider implements SimilarityProvider {
|
||||
SimilarityProvider delegate = new DefaultSimilarityProvider();
|
||||
private boolean writeNorms;
|
||||
public MySimProvider(boolean writeNorms) {
|
||||
this.writeNorms = writeNorms;
|
||||
}
|
||||
@Override
|
||||
public float queryNorm(float sumOfSquaredWeights) {
|
||||
|
||||
return delegate.queryNorm(sumOfSquaredWeights);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Similarity get(String field) {
|
||||
if (byteTestField.equals(field)) {
|
||||
return new ByteEncodingBoostSimilarity(writeNorms);
|
||||
} else {
|
||||
return delegate.get(field);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float coord(int overlap, int maxOverlap) {
|
||||
return delegate.coord(overlap, maxOverlap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class ByteEncodingBoostSimilarity extends DefaultSimilarity {
|
||||
|
||||
private boolean writeNorms;
|
||||
|
||||
public ByteEncodingBoostSimilarity(boolean writeNorms) {
|
||||
this.writeNorms = writeNorms;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
if (writeNorms) {
|
||||
int boost = (int) state.getBoost();
|
||||
norm.setByte((byte) (0xFF & boost));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -44,8 +44,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
public Similarity get(String field) {
|
||||
return new TFIDFSimilarity() {
|
||||
|
||||
@Override public byte computeNorm(FieldInvertState state) { return encodeNormValue(state.getBoost()); }
|
||||
@Override public void computeNorm(FieldInvertState state, Norm norm) { norm.setByte(encodeNormValue(state.getBoost())); }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
|
|
|
@ -102,8 +102,8 @@ public class TestUniqueTermCount extends LuceneTestCase {
|
|||
class TestSimilarity extends DefaultSimilarity {
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return (byte) state.getUniqueTermCount();
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte((byte) state.getUniqueTermCount());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -261,7 +262,7 @@ final class JustCompileSearch {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.document.FieldType;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.SlowMultiReaderWrapper;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -66,9 +67,9 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
// Disable length norm
|
||||
return encodeNormValue(state.getBoost());
|
||||
norm.setByte(encodeNormValue(state.getBoost()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.index.DocValues;
|
|||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -152,8 +153,8 @@ public class TestDocValuesScoring extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return sim.computeNorm(state);
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
sim.computeNorm(state, norm);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.io.IOException;
|
|||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
|
@ -45,7 +46,7 @@ public class TestSimilarity extends LuceneTestCase {
|
|||
public float coord(int overlap, int maxOverlap) { return 1.0f; }
|
||||
public Similarity get(String field) {
|
||||
return new DefaultSimilarity() {
|
||||
@Override public byte computeNorm(FieldInvertState state) { return encodeNormValue(state.getBoost()); }
|
||||
@Override public void computeNorm(FieldInvertState state, Norm norm) { norm.setByte(encodeNormValue(state.getBoost())); }
|
||||
@Override public float tf(float freq) { return freq; }
|
||||
@Override public float sloppyFreq(int distance) { return 2.0f; }
|
||||
@Override public float idf(int docFreq, int numDocs) { return 1.0f; }
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.FieldInvertState;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -111,9 +112,10 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private class Sim1 extends TFIDFSimilarity {
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue(1f);
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue(1f));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -138,9 +140,10 @@ public class TestSimilarityProvider extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private class Sim2 extends TFIDFSimilarity {
|
||||
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue(10f);
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue(10f));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -330,8 +331,8 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
//Make everything else 1 so we see the effect of the payload
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue(state.getBoost());
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue(state.getBoost()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.search.spans.Spans;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -324,9 +325,9 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
//Make everything else 1 so we see the effect of the payload
|
||||
//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
@Override
|
||||
public byte computeNorm(FieldInvertState state) {
|
||||
return encodeNormValue(state.getBoost());
|
||||
@Override
|
||||
public void computeNorm(FieldInvertState state, Norm norm) {
|
||||
norm.setByte(encodeNormValue(state.getBoost()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.solr.search.function;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.Norm;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
|
@ -346,8 +347,11 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
|
|||
FieldInvertState state = new FieldInvertState();
|
||||
state.setBoost(1.0f);
|
||||
state.setLength(4);
|
||||
Norm norm = new Norm();
|
||||
similarity.computeNorm(state, norm);
|
||||
float nrm = similarity.decodeNormValue(norm.field().numericValue().byteValue());
|
||||
assertQ(req("fl","*,score","q", "{!func}norm(a_t)", "fq","id:2"),
|
||||
"//float[@name='score']='" + similarity.decodeNormValue(similarity.computeNorm(state)) + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
|
||||
"//float[@name='score']='" + nrm + "'"); // sqrt(4)==2 and is exactly representable when quantized to a byte
|
||||
|
||||
// test that ord and rord are working on a global index basis, not just
|
||||
// at the segment level (since Lucene 2.9 has switched to per-segment searching)
|
||||
|
|
Loading…
Reference in New Issue