mirror of https://github.com/apache/lucene.git
LUCENE-7679: MemoryIndex should take into account omitNorms
This commit is contained in:
parent
aa20136bb1
commit
6696eafaae
|
@ -115,6 +115,9 @@ Bug Fixes
|
||||||
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
|
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
|
||||||
Also added TestFilterCodecReader class. (Christine Poerschke)
|
Also added TestFilterCodecReader class. (Christine Poerschke)
|
||||||
|
|
||||||
|
* LUCENE-7679: MemoryIndex was ignoring omitNorms settings on passed-in
|
||||||
|
IndexableFields. (Alan Woodward)
|
||||||
|
|
||||||
Improvements
|
Improvements
|
||||||
|
|
||||||
* LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
|
* LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost
|
||||||
|
|
|
@ -203,7 +203,8 @@ public final class FieldInfo {
|
||||||
return pointNumBytes;
|
return pointNumBytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
void setDocValuesType(DocValuesType type) {
|
/** Record that this field is indexed with docvalues, with the specified type */
|
||||||
|
public void setDocValuesType(DocValuesType type) {
|
||||||
if (type == null) {
|
if (type == null) {
|
||||||
throw new NullPointerException("DocValuesType must not be null (field: \"" + name + "\")");
|
throw new NullPointerException("DocValuesType must not be null (field: \"" + name + "\")");
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.index.*;
|
import org.apache.lucene.index.*;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
|
@ -195,6 +196,8 @@ public class MemoryIndex {
|
||||||
|
|
||||||
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
|
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
|
||||||
|
|
||||||
|
private FieldType defaultFieldType = new FieldType();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs an empty instance that will not store offsets or payloads.
|
* Constructs an empty instance that will not store offsets or payloads.
|
||||||
*/
|
*/
|
||||||
|
@ -236,6 +239,9 @@ public class MemoryIndex {
|
||||||
MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
|
MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
|
||||||
this.storeOffsets = storeOffsets;
|
this.storeOffsets = storeOffsets;
|
||||||
this.storePayloads = storePayloads;
|
this.storePayloads = storePayloads;
|
||||||
|
this.defaultFieldType.setIndexOptions(storeOffsets ?
|
||||||
|
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||||
|
this.defaultFieldType.setStoreTermVectors(true);
|
||||||
this.bytesUsed = Counter.newCounter();
|
this.bytesUsed = Counter.newCounter();
|
||||||
final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
|
final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
|
||||||
final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES));
|
final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES));
|
||||||
|
@ -269,8 +275,8 @@ public class MemoryIndex {
|
||||||
throw new IllegalArgumentException("analyzer must not be null");
|
throw new IllegalArgumentException("analyzer must not be null");
|
||||||
|
|
||||||
TokenStream stream = analyzer.tokenStream(fieldName, text);
|
TokenStream stream = analyzer.tokenStream(fieldName, text);
|
||||||
addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName),
|
storeTerms(getInfo(fieldName, defaultFieldType), stream, 1.0f,
|
||||||
DocValuesType.NONE, null, 0, 0, null);
|
analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -385,10 +391,11 @@ public class MemoryIndex {
|
||||||
* @param field the field to add
|
* @param field the field to add
|
||||||
* @param analyzer the analyzer to use for term analysis
|
* @param analyzer the analyzer to use for term analysis
|
||||||
* @param boost a field boost
|
* @param boost a field boost
|
||||||
* @throws IllegalArgumentException if the field is a DocValues or Point field, as these
|
|
||||||
* structures are not supported by MemoryIndex
|
|
||||||
*/
|
*/
|
||||||
public void addField(IndexableField field, Analyzer analyzer, float boost) {
|
public void addField(IndexableField field, Analyzer analyzer, float boost) {
|
||||||
|
|
||||||
|
Info info = getInfo(field.name(), field.fieldType());
|
||||||
|
|
||||||
int offsetGap;
|
int offsetGap;
|
||||||
TokenStream tokenStream;
|
TokenStream tokenStream;
|
||||||
int positionIncrementGap;
|
int positionIncrementGap;
|
||||||
|
@ -401,6 +408,9 @@ public class MemoryIndex {
|
||||||
tokenStream = field.tokenStream(null, null);
|
tokenStream = field.tokenStream(null, null);
|
||||||
positionIncrementGap = 0;
|
positionIncrementGap = 0;
|
||||||
}
|
}
|
||||||
|
if (tokenStream != null) {
|
||||||
|
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
|
||||||
|
}
|
||||||
|
|
||||||
DocValuesType docValuesType = field.fieldType().docValuesType();
|
DocValuesType docValuesType = field.fieldType().docValuesType();
|
||||||
Object docValuesValue;
|
Object docValuesValue;
|
||||||
|
@ -420,12 +430,14 @@ public class MemoryIndex {
|
||||||
default:
|
default:
|
||||||
throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
|
throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
|
||||||
}
|
}
|
||||||
BytesRef pointValue = null;
|
if (docValuesValue != null) {
|
||||||
if (field.fieldType().pointDimensionCount() > 0) {
|
storeDocValues(info, docValuesType, docValuesValue);
|
||||||
pointValue = field.binaryValue();
|
|
||||||
}
|
}
|
||||||
addField(field.name(), tokenStream, boost, positionIncrementGap, offsetGap, docValuesType, docValuesValue,
|
|
||||||
field.fieldType().pointDimensionCount(), field.fieldType().pointNumBytes(), pointValue);
|
if (field.fieldType().pointDimensionCount() > 0) {
|
||||||
|
storePointValues(info, field.binaryValue());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -494,42 +506,40 @@ public class MemoryIndex {
|
||||||
* @see org.apache.lucene.document.Field#setBoost(float)
|
* @see org.apache.lucene.document.Field#setBoost(float)
|
||||||
*/
|
*/
|
||||||
public void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
|
public void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
|
||||||
addField(fieldName, tokenStream, boost, positionIncrementGap, offsetGap, DocValuesType.NONE, null, 0, 0, null);
|
Info info = getInfo(fieldName, defaultFieldType);
|
||||||
|
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap,
|
private Info getInfo(String fieldName, IndexableFieldType fieldType) {
|
||||||
DocValuesType docValuesType, Object docValuesValue, int pointDimensionCount, int pointNumBytes,
|
|
||||||
BytesRef pointValue) {
|
|
||||||
|
|
||||||
if (frozen) {
|
if (frozen) {
|
||||||
throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
|
throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
|
||||||
}
|
}
|
||||||
if (fieldName == null) {
|
if (fieldName == null) {
|
||||||
throw new IllegalArgumentException("fieldName must not be null");
|
throw new IllegalArgumentException("fieldName must not be null");
|
||||||
}
|
}
|
||||||
if (boost <= 0.0f) {
|
|
||||||
throw new IllegalArgumentException("boost factor must be greater than 0.0");
|
|
||||||
}
|
|
||||||
|
|
||||||
Info info = fields.get(fieldName);
|
Info info = fields.get(fieldName);
|
||||||
if (info == null) {
|
if (info == null) {
|
||||||
IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
fields.put(fieldName, info = new Info(createFieldInfo(fieldName, fields.size(), fieldType), byteBlockPool));
|
||||||
FieldInfo fieldInfo = new FieldInfo(fieldName, fields.size(), true, false, storePayloads, indexOptions, docValuesType, -1, Collections.emptyMap(), 0, 0);
|
|
||||||
fields.put(fieldName, info = new Info(fieldInfo, byteBlockPool));
|
|
||||||
}
|
}
|
||||||
if (pointDimensionCount > 0) {
|
if (fieldType.pointDimensionCount() != info.fieldInfo.getPointDimensionCount()) {
|
||||||
storePointValues(info, pointDimensionCount, pointNumBytes, pointValue);
|
if (fieldType.pointDimensionCount() > 0)
|
||||||
|
info.fieldInfo.setPointDimensions(fieldType.pointDimensionCount(), fieldType.pointNumBytes());
|
||||||
}
|
}
|
||||||
if (docValuesType != DocValuesType.NONE) {
|
if (fieldType.docValuesType() != info.fieldInfo.getDocValuesType()) {
|
||||||
storeDocValues(info, docValuesType, docValuesValue);
|
if (fieldType.docValuesType() != DocValuesType.NONE)
|
||||||
}
|
info.fieldInfo.setDocValuesType(fieldType.docValuesType());
|
||||||
if (tokenStream != null) {
|
|
||||||
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
|
|
||||||
}
|
}
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void storePointValues(Info info, int pointDimensionCount, int pointNumBytes, BytesRef pointValue) {
|
private FieldInfo createFieldInfo(String fieldName, int ord, IndexableFieldType fieldType) {
|
||||||
info.fieldInfo.setPointDimensions(pointDimensionCount, pointNumBytes);
|
IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||||
|
return new FieldInfo(fieldName, ord, fieldType.storeTermVectors(), fieldType.omitNorms(), storePayloads,
|
||||||
|
indexOptions, fieldType.docValuesType(), -1, Collections.emptyMap(),
|
||||||
|
fieldType.pointDimensionCount(), fieldType.pointNumBytes());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void storePointValues(Info info, BytesRef pointValue) {
|
||||||
if (info.pointValues == null) {
|
if (info.pointValues == null) {
|
||||||
info.pointValues = new BytesRef[4];
|
info.pointValues = new BytesRef[4];
|
||||||
}
|
}
|
||||||
|
@ -591,6 +601,11 @@ public class MemoryIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void storeTerms(Info info, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
|
private void storeTerms(Info info, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
|
||||||
|
|
||||||
|
if (boost <= 0.0f) {
|
||||||
|
throw new IllegalArgumentException("boost factor must be greater than 0.0");
|
||||||
|
}
|
||||||
|
|
||||||
int pos = -1;
|
int pos = -1;
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
if (info.numTokens == 0) {
|
if (info.numTokens == 0) {
|
||||||
|
@ -1598,7 +1613,7 @@ public class MemoryIndex {
|
||||||
@Override
|
@Override
|
||||||
public NumericDocValues getNormValues(String field) {
|
public NumericDocValues getNormValues(String field) {
|
||||||
Info info = fields.get(field);
|
Info info = fields.get(field);
|
||||||
if (info == null) {
|
if (info == null || info.fieldInfo.omitsNorms()) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
return info.getNormDocValues();
|
return info.getNormDocValues();
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.BinaryDocValues;
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
import org.apache.lucene.index.DocValuesType;
|
import org.apache.lucene.index.DocValuesType;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.IndexableField;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
@ -199,6 +200,21 @@ public class TestMemoryIndex extends LuceneTestCase {
|
||||||
TestUtil.checkReader(reader);
|
TestUtil.checkReader(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOmitNorms() throws IOException {
|
||||||
|
MemoryIndex mi = new MemoryIndex();
|
||||||
|
FieldType ft = new FieldType();
|
||||||
|
ft.setTokenized(true);
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||||
|
ft.setOmitNorms(true);
|
||||||
|
mi.addField(new Field("f1", "some text in here", ft), analyzer);
|
||||||
|
mi.freeze();
|
||||||
|
|
||||||
|
LeafReader leader = (LeafReader) mi.createSearcher().getIndexReader();
|
||||||
|
NumericDocValues norms = leader.getNormValues("f1");
|
||||||
|
assertNull(norms);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBuildFromDocument() {
|
public void testBuildFromDocument() {
|
||||||
|
|
||||||
|
@ -276,7 +292,7 @@ public class TestMemoryIndex extends LuceneTestCase {
|
||||||
try {
|
try {
|
||||||
MemoryIndex.fromDocument(doc, analyzer);
|
MemoryIndex.fromDocument(doc, analyzer);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
assertEquals("Can't add [BINARY] doc values field [field], because [NUMERIC] doc values field already exists", e.getMessage());
|
assertEquals("cannot change DocValues type from NUMERIC to BINARY for field \"field\"", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
doc = new Document();
|
doc = new Document();
|
||||||
|
|
Loading…
Reference in New Issue