LUCENE-7679: MemoryIndex should take into account omitNorms

This commit is contained in:
Alan Woodward 2017-02-07 10:41:20 +00:00
parent aa20136bb1
commit 6696eafaae
4 changed files with 68 additions and 33 deletions

View File

@ -115,6 +115,9 @@ Bug Fixes
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods. * LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
Also added TestFilterCodecReader class. (Christine Poerschke) Also added TestFilterCodecReader class. (Christine Poerschke)
* LUCENE-7679: MemoryIndex was ignoring omitNorms settings on passed-in
IndexableFields. (Alan Woodward)
Improvements Improvements
* LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost * LUCENE-7055: Added Weight#scorerSupplier, which allows to estimate the cost

View File

@ -203,7 +203,8 @@ public final class FieldInfo {
return pointNumBytes; return pointNumBytes;
} }
void setDocValuesType(DocValuesType type) { /** Record that this field is indexed with docvalues, with the specified type */
public void setDocValuesType(DocValuesType type) {
if (type == null) { if (type == null) {
throw new NullPointerException("DocValuesType must not be null (field: \"" + name + "\")"); throw new NullPointerException("DocValuesType must not be null (field: \"" + name + "\")");
} }

View File

@ -34,6 +34,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.*; import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
@ -195,6 +196,8 @@ public class MemoryIndex {
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity(); private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
private FieldType defaultFieldType = new FieldType();
/** /**
* Constructs an empty instance that will not store offsets or payloads. * Constructs an empty instance that will not store offsets or payloads.
*/ */
@ -236,6 +239,9 @@ public class MemoryIndex {
MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) { MemoryIndex(boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
this.storeOffsets = storeOffsets; this.storeOffsets = storeOffsets;
this.storePayloads = storePayloads; this.storePayloads = storePayloads;
this.defaultFieldType.setIndexOptions(storeOffsets ?
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
this.defaultFieldType.setStoreTermVectors(true);
this.bytesUsed = Counter.newCounter(); this.bytesUsed = Counter.newCounter();
final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE ); final int maxBufferedByteBlocks = (int)((maxReusedBytes/2) / ByteBlockPool.BYTE_BLOCK_SIZE );
final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES)); final int maxBufferedIntBlocks = (int) ((maxReusedBytes - (maxBufferedByteBlocks*ByteBlockPool.BYTE_BLOCK_SIZE))/(IntBlockPool.INT_BLOCK_SIZE * Integer.BYTES));
@ -269,8 +275,8 @@ public class MemoryIndex {
throw new IllegalArgumentException("analyzer must not be null"); throw new IllegalArgumentException("analyzer must not be null");
TokenStream stream = analyzer.tokenStream(fieldName, text); TokenStream stream = analyzer.tokenStream(fieldName, text);
addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName), storeTerms(getInfo(fieldName, defaultFieldType), stream, 1.0f,
DocValuesType.NONE, null, 0, 0, null); analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
} }
/** /**
@ -385,10 +391,11 @@ public class MemoryIndex {
* @param field the field to add * @param field the field to add
* @param analyzer the analyzer to use for term analysis * @param analyzer the analyzer to use for term analysis
* @param boost a field boost * @param boost a field boost
* @throws IllegalArgumentException if the field is a DocValues or Point field, as these
* structures are not supported by MemoryIndex
*/ */
public void addField(IndexableField field, Analyzer analyzer, float boost) { public void addField(IndexableField field, Analyzer analyzer, float boost) {
Info info = getInfo(field.name(), field.fieldType());
int offsetGap; int offsetGap;
TokenStream tokenStream; TokenStream tokenStream;
int positionIncrementGap; int positionIncrementGap;
@ -401,6 +408,9 @@ public class MemoryIndex {
tokenStream = field.tokenStream(null, null); tokenStream = field.tokenStream(null, null);
positionIncrementGap = 0; positionIncrementGap = 0;
} }
if (tokenStream != null) {
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
}
DocValuesType docValuesType = field.fieldType().docValuesType(); DocValuesType docValuesType = field.fieldType().docValuesType();
Object docValuesValue; Object docValuesValue;
@ -420,12 +430,14 @@ public class MemoryIndex {
default: default:
throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]"); throw new UnsupportedOperationException("unknown doc values type [" + docValuesType + "]");
} }
BytesRef pointValue = null; if (docValuesValue != null) {
if (field.fieldType().pointDimensionCount() > 0) { storeDocValues(info, docValuesType, docValuesValue);
pointValue = field.binaryValue();
} }
addField(field.name(), tokenStream, boost, positionIncrementGap, offsetGap, docValuesType, docValuesValue,
field.fieldType().pointDimensionCount(), field.fieldType().pointNumBytes(), pointValue); if (field.fieldType().pointDimensionCount() > 0) {
storePointValues(info, field.binaryValue());
}
} }
/** /**
@ -494,42 +506,40 @@ public class MemoryIndex {
* @see org.apache.lucene.document.Field#setBoost(float) * @see org.apache.lucene.document.Field#setBoost(float)
*/ */
public void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) { public void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
addField(fieldName, tokenStream, boost, positionIncrementGap, offsetGap, DocValuesType.NONE, null, 0, 0, null); Info info = getInfo(fieldName, defaultFieldType);
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
} }
private void addField(String fieldName, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap, private Info getInfo(String fieldName, IndexableFieldType fieldType) {
DocValuesType docValuesType, Object docValuesValue, int pointDimensionCount, int pointNumBytes,
BytesRef pointValue) {
if (frozen) { if (frozen) {
throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen"); throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
} }
if (fieldName == null) { if (fieldName == null) {
throw new IllegalArgumentException("fieldName must not be null"); throw new IllegalArgumentException("fieldName must not be null");
} }
if (boost <= 0.0f) {
throw new IllegalArgumentException("boost factor must be greater than 0.0");
}
Info info = fields.get(fieldName); Info info = fields.get(fieldName);
if (info == null) { if (info == null) {
IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; fields.put(fieldName, info = new Info(createFieldInfo(fieldName, fields.size(), fieldType), byteBlockPool));
FieldInfo fieldInfo = new FieldInfo(fieldName, fields.size(), true, false, storePayloads, indexOptions, docValuesType, -1, Collections.emptyMap(), 0, 0);
fields.put(fieldName, info = new Info(fieldInfo, byteBlockPool));
} }
if (pointDimensionCount > 0) { if (fieldType.pointDimensionCount() != info.fieldInfo.getPointDimensionCount()) {
storePointValues(info, pointDimensionCount, pointNumBytes, pointValue); if (fieldType.pointDimensionCount() > 0)
info.fieldInfo.setPointDimensions(fieldType.pointDimensionCount(), fieldType.pointNumBytes());
} }
if (docValuesType != DocValuesType.NONE) { if (fieldType.docValuesType() != info.fieldInfo.getDocValuesType()) {
storeDocValues(info, docValuesType, docValuesValue); if (fieldType.docValuesType() != DocValuesType.NONE)
} info.fieldInfo.setDocValuesType(fieldType.docValuesType());
if (tokenStream != null) {
storeTerms(info, tokenStream, boost, positionIncrementGap, offsetGap);
} }
return info;
} }
private void storePointValues(Info info, int pointDimensionCount, int pointNumBytes, BytesRef pointValue) { private FieldInfo createFieldInfo(String fieldName, int ord, IndexableFieldType fieldType) {
info.fieldInfo.setPointDimensions(pointDimensionCount, pointNumBytes); IndexOptions indexOptions = storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
return new FieldInfo(fieldName, ord, fieldType.storeTermVectors(), fieldType.omitNorms(), storePayloads,
indexOptions, fieldType.docValuesType(), -1, Collections.emptyMap(),
fieldType.pointDimensionCount(), fieldType.pointNumBytes());
}
private void storePointValues(Info info, BytesRef pointValue) {
if (info.pointValues == null) { if (info.pointValues == null) {
info.pointValues = new BytesRef[4]; info.pointValues = new BytesRef[4];
} }
@ -591,6 +601,11 @@ public class MemoryIndex {
} }
private void storeTerms(Info info, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) { private void storeTerms(Info info, TokenStream tokenStream, float boost, int positionIncrementGap, int offsetGap) {
if (boost <= 0.0f) {
throw new IllegalArgumentException("boost factor must be greater than 0.0");
}
int pos = -1; int pos = -1;
int offset = 0; int offset = 0;
if (info.numTokens == 0) { if (info.numTokens == 0) {
@ -1598,7 +1613,7 @@ public class MemoryIndex {
@Override @Override
public NumericDocValues getNormValues(String field) { public NumericDocValues getNormValues(String field) {
Info info = fields.get(field); Info info = fields.get(field);
if (info == null) { if (info == null || info.fieldInfo.omitsNorms()) {
return null; return null;
} }
return info.getNormDocValues(); return info.getNormDocValues();

View File

@ -45,6 +45,7 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
@ -199,6 +200,21 @@ public class TestMemoryIndex extends LuceneTestCase {
TestUtil.checkReader(reader); TestUtil.checkReader(reader);
} }
@Test
public void testOmitNorms() throws IOException {
MemoryIndex mi = new MemoryIndex();
FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
ft.setOmitNorms(true);
mi.addField(new Field("f1", "some text in here", ft), analyzer);
mi.freeze();
LeafReader leader = (LeafReader) mi.createSearcher().getIndexReader();
NumericDocValues norms = leader.getNormValues("f1");
assertNull(norms);
}
@Test @Test
public void testBuildFromDocument() { public void testBuildFromDocument() {
@ -276,7 +292,7 @@ public class TestMemoryIndex extends LuceneTestCase {
try { try {
MemoryIndex.fromDocument(doc, analyzer); MemoryIndex.fromDocument(doc, analyzer);
} catch (IllegalArgumentException e) { } catch (IllegalArgumentException e) {
assertEquals("Can't add [BINARY] doc values field [field], because [NUMERIC] doc values field already exists", e.getMessage()); assertEquals("cannot change DocValues type from NUMERIC to BINARY for field \"field\"", e.getMessage());
} }
doc = new Document(); doc = new Document();