mirror of https://github.com/apache/lucene.git
LUCENE-1590: don't allow stored-only fields to mess up the index-only attrs like omitNorms and omitTermFreqAndPositions
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@763591 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6b4831fd1d
commit
bb8b2e1462
|
@ -107,6 +107,12 @@ Bug fixes
|
|||
public APIs to throw InterruptedException. (Jeremy Volkman vai
|
||||
Mike McCandless)
|
||||
|
||||
9. LUCENE-1590: Fixed stored-only Field instances to not change the
|
||||
value of omitNorms, omitTermFreqAndPositions in FieldInfo; when you
|
||||
retrieve such fields they will now have omitNorms=true and
|
||||
omitTermFreqAndPositions=false (though these values are unused).
|
||||
(Uwe Schindler via Mike McCandless)
|
||||
|
||||
New features
|
||||
|
||||
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior
|
||||
|
|
|
@ -329,6 +329,8 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
if (index == Index.NO) {
|
||||
this.isIndexed = false;
|
||||
this.isTokenized = false;
|
||||
this.omitTermFreqAndPositions = false;
|
||||
this.omitNorms = true;
|
||||
} else if (index == Index.ANALYZED) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
|
@ -492,6 +494,8 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
|
|||
|
||||
isIndexed = false;
|
||||
isTokenized = false;
|
||||
omitTermFreqAndPositions = false;
|
||||
omitNorms = true;
|
||||
|
||||
isBinary = true;
|
||||
binaryLength = length;
|
||||
|
|
|
@ -38,12 +38,21 @@ final class FieldInfo {
|
|||
name = na;
|
||||
isIndexed = tk;
|
||||
number = nu;
|
||||
if (isIndexed) {
|
||||
this.storeTermVector = storeTermVector;
|
||||
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
|
||||
this.storePositionWithTermVector = storePositionWithTermVector;
|
||||
this.omitNorms = omitNorms;
|
||||
this.storePayloads = storePayloads;
|
||||
this.omitNorms = omitNorms;
|
||||
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
|
||||
} else { // for non-indexed fields, leave defaults
|
||||
this.storeTermVector = false;
|
||||
this.storeOffsetWithTermVector = false;
|
||||
this.storePositionWithTermVector = false;
|
||||
this.storePayloads = false;
|
||||
this.omitNorms = true;
|
||||
this.omitTermFreqAndPositions = false;
|
||||
}
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
|
@ -56,6 +65,7 @@ final class FieldInfo {
|
|||
if (this.isIndexed != isIndexed) {
|
||||
this.isIndexed = true; // once indexed, always index
|
||||
}
|
||||
if (isIndexed) { // if updated field data is not for indexing, leave the updates out
|
||||
if (this.storeTermVector != storeTermVector) {
|
||||
this.storeTermVector = true; // once vector, always vector
|
||||
}
|
||||
|
@ -65,14 +75,15 @@ final class FieldInfo {
|
|||
if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
|
||||
this.storeOffsetWithTermVector = true; // once vector, always vector
|
||||
}
|
||||
if (this.storePayloads != storePayloads) {
|
||||
this.storePayloads = true;
|
||||
}
|
||||
if (this.omitNorms != omitNorms) {
|
||||
this.omitNorms = false; // once norms are stored, always store
|
||||
}
|
||||
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
|
||||
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
|
||||
}
|
||||
if (this.storePayloads != storePayloads) {
|
||||
this.storePayloads = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -84,7 +84,7 @@ final class FieldInfos {
|
|||
while (fieldIterator.hasNext()) {
|
||||
Fieldable field = (Fieldable) fieldIterator.next();
|
||||
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
|
||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms());
|
||||
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTf());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -322,6 +322,7 @@ final class FieldsReader implements Cloneable {
|
|||
//skip over the part that we aren't loading
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTf(fi.omitTermFreqAndPositions);
|
||||
} else {
|
||||
int length = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
|
@ -332,6 +333,7 @@ final class FieldsReader implements Cloneable {
|
|||
fieldsStream.skipChars(length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setOmitTf(fi.omitTermFreqAndPositions);
|
||||
}
|
||||
doc.add(f);
|
||||
}
|
||||
|
@ -365,7 +367,6 @@ final class FieldsReader implements Cloneable {
|
|||
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
|
||||
else
|
||||
doc.add(new Field(fi.name, b, Field.Store.YES));
|
||||
|
||||
} else {
|
||||
Field.Store store = Field.Store.YES;
|
||||
Field.Index index = getIndexType(fi, tokenize);
|
||||
|
@ -383,6 +384,7 @@ final class FieldsReader implements Cloneable {
|
|||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitTf(fi.omitTermFreqAndPositions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
} else {
|
||||
f = new Field(fi.name, // name
|
||||
|
@ -390,6 +392,7 @@ final class FieldsReader implements Cloneable {
|
|||
store,
|
||||
index,
|
||||
termVector);
|
||||
f.setOmitTf(fi.omitTermFreqAndPositions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
}
|
||||
doc.add(f);
|
||||
|
@ -641,6 +644,7 @@ final class FieldsReader implements Cloneable {
|
|||
this.name = fi.name.intern();
|
||||
this.isIndexed = fi.isIndexed;
|
||||
this.omitNorms = fi.omitNorms;
|
||||
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
|
||||
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
|
||||
this.storePositionWithTermVector = fi.storePositionWithTermVector;
|
||||
this.storeTermVector = fi.storeTermVector;
|
||||
|
|
|
@ -64,6 +64,14 @@ class DocHelper {
|
|||
public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
|
||||
public static final String NO_TF_TEXT = "analyzed with no tf and positions";
|
||||
public static final String NO_TF_KEY = "omitTermFreqAndPositions";
|
||||
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
|
||||
Field.Store.YES, Field.Index.ANALYZED);
|
||||
static {
|
||||
noTFField.setOmitTermFreqAndPositions(true);
|
||||
}
|
||||
|
||||
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
|
||||
public static final String UNINDEXED_FIELD_KEY = "unIndField";
|
||||
public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
|
||||
|
@ -119,6 +127,7 @@ class DocHelper {
|
|||
compressedTextField2,
|
||||
keyField,
|
||||
noNormsField,
|
||||
noTFField,
|
||||
unIndField,
|
||||
unStoredField1,
|
||||
unStoredField2,
|
||||
|
@ -139,6 +148,7 @@ class DocHelper {
|
|||
public static Map notermvector=new HashMap();
|
||||
public static Map lazy= new HashMap();
|
||||
public static Map noNorms=new HashMap();
|
||||
public static Map noTf=new HashMap();
|
||||
|
||||
static {
|
||||
//Initialize the large Lazy Field
|
||||
|
@ -167,6 +177,7 @@ class DocHelper {
|
|||
if (f.isStored()) add(stored,f);
|
||||
else add(unstored,f);
|
||||
if (f.getOmitNorms()) add(noNorms,f);
|
||||
if (f.getOmitTf()) add(noTf,f);
|
||||
if (f.isLazy()) add(lazy, f);
|
||||
}
|
||||
}
|
||||
|
@ -186,6 +197,7 @@ class DocHelper {
|
|||
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
|
||||
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
|
||||
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
|
||||
nameValues.put(NO_TF_KEY, NO_TF_TEXT);
|
||||
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
|
||||
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
|
||||
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
|
||||
|
|
|
@ -259,14 +259,13 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
doc.add(new Field("f2", "v1", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(new Field("f2", "v2", Store.YES, Index.NOT_ANALYZED, TermVector.NO));
|
||||
|
||||
RAMDirectory ram = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(ram, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
_TestUtil.checkIndex(ram);
|
||||
_TestUtil.checkIndex(dir);
|
||||
|
||||
IndexReader reader = IndexReader.open(ram);
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
// f1
|
||||
TermFreqVector tfv1 = reader.getTermFreqVector(0, "f1");
|
||||
assertNotNull(tfv1);
|
||||
|
@ -276,4 +275,37 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
assertNotNull(tfv2);
|
||||
assertEquals("the 'with_tv' setting should rule!",2,tfv2.getTerms().length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test adding two fields with the same name, one indexed
|
||||
* the other stored only. The omitNorms and omitTermFreqAndPositions setting
|
||||
* of the stored field should not affect the indexed one (LUCENE-1590)
|
||||
*/
|
||||
public void testLUCENE_1590() throws Exception {
|
||||
Document doc = new Document();
|
||||
// f1 has no norms
|
||||
doc.add(new Field("f1", "v1", Store.NO, Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("f1", "v2", Store.YES, Index.NO));
|
||||
// f2 has no TF
|
||||
Field f = new Field("f2", "v1", Store.NO, Index.ANALYZED);
|
||||
f.setOmitTermFreqAndPositions(true);
|
||||
doc.add(f);
|
||||
doc.add(new Field("f2", "v2", Store.YES, Index.NO));
|
||||
|
||||
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
writer.addDocument(doc);
|
||||
writer.optimize(); // be sure to have a single segment
|
||||
writer.close();
|
||||
|
||||
_TestUtil.checkIndex(dir);
|
||||
|
||||
SegmentReader reader = (SegmentReader) IndexReader.open(dir);
|
||||
FieldInfos fi = reader.fieldInfos();
|
||||
// f1
|
||||
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
|
||||
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
|
||||
// f2
|
||||
assertTrue("f2 should have norms", reader.hasNorms("f2"));
|
||||
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,6 +72,7 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue(field.isStoreOffsetWithTermVector() == true);
|
||||
assertTrue(field.isStorePositionWithTermVector() == true);
|
||||
assertTrue(field.getOmitNorms() == false);
|
||||
assertTrue(field.getOmitTf() == false);
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
||||
assertTrue(field != null);
|
||||
|
@ -79,8 +80,15 @@ public class TestFieldsReader extends LuceneTestCase {
|
|||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||
assertTrue(field.getOmitNorms() == true);
|
||||
assertTrue(field.getOmitTf() == false);
|
||||
|
||||
|
||||
field = doc.getField(DocHelper.NO_TF_KEY);
|
||||
assertTrue(field != null);
|
||||
assertTrue(field.isTermVectorStored() == false);
|
||||
assertTrue(field.isStoreOffsetWithTermVector() == false);
|
||||
assertTrue(field.isStorePositionWithTermVector() == false);
|
||||
assertTrue(field.getOmitNorms() == false);
|
||||
assertTrue(field.getOmitTf() == true);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue