LUCENE-1590: don't allow stored-only fields to mess up the index-only attrs like omitNorms and omitTermFreqAndPositions

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@763591 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-04-09 10:37:50 +00:00
parent 6b4831fd1d
commit bb8b2e1462
8 changed files with 107 additions and 30 deletions

View File

@ -107,6 +107,12 @@ Bug fixes
public APIs to throw InterruptedException. (Jeremy Volkman vai
Mike McCandless)
9. LUCENE-1590: Fixed stored-only Field instances to not change the
value of omitNorms, omitTermFreqAndPositions in FieldInfo; when you
retrieve such fields they will now have omitNorms=true and
omitTermFreqAndPositions=false (though these values are unused).
(Uwe Schindler via Mike McCandless)
New features
1. LUCENE-1411: Added expert API to open an IndexWriter on a prior

View File

@ -329,6 +329,8 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
if (index == Index.NO) {
this.isIndexed = false;
this.isTokenized = false;
this.omitTermFreqAndPositions = false;
this.omitNorms = true;
} else if (index == Index.ANALYZED) {
this.isIndexed = true;
this.isTokenized = true;
@ -492,6 +494,8 @@ public final class Field extends AbstractField implements Fieldable, Serializabl
isIndexed = false;
isTokenized = false;
omitTermFreqAndPositions = false;
omitNorms = true;
isBinary = true;
binaryLength = length;

View File

@ -38,12 +38,21 @@ final class FieldInfo {
name = na;
isIndexed = tk;
number = nu;
this.storeTermVector = storeTermVector;
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
this.storePositionWithTermVector = storePositionWithTermVector;
this.omitNorms = omitNorms;
this.storePayloads = storePayloads;
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
if (isIndexed) {
this.storeTermVector = storeTermVector;
this.storeOffsetWithTermVector = storeOffsetWithTermVector;
this.storePositionWithTermVector = storePositionWithTermVector;
this.storePayloads = storePayloads;
this.omitNorms = omitNorms;
this.omitTermFreqAndPositions = omitTermFreqAndPositions;
} else { // for non-indexed fields, leave defaults
this.storeTermVector = false;
this.storeOffsetWithTermVector = false;
this.storePositionWithTermVector = false;
this.storePayloads = false;
this.omitNorms = true;
this.omitTermFreqAndPositions = false;
}
}
public Object clone() {
@ -56,23 +65,25 @@ final class FieldInfo {
if (this.isIndexed != isIndexed) {
this.isIndexed = true; // once indexed, always index
}
if (this.storeTermVector != storeTermVector) {
this.storeTermVector = true; // once vector, always vector
}
if (this.storePositionWithTermVector != storePositionWithTermVector) {
this.storePositionWithTermVector = true; // once vector, always vector
}
if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
this.storeOffsetWithTermVector = true; // once vector, always vector
}
if (this.omitNorms != omitNorms) {
this.omitNorms = false; // once norms are stored, always store
}
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
}
if (this.storePayloads != storePayloads) {
this.storePayloads = true;
if (isIndexed) { // if updated field data is not for indexing, leave the updates out
if (this.storeTermVector != storeTermVector) {
this.storeTermVector = true; // once vector, always vector
}
if (this.storePositionWithTermVector != storePositionWithTermVector) {
this.storePositionWithTermVector = true; // once vector, always vector
}
if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) {
this.storeOffsetWithTermVector = true; // once vector, always vector
}
if (this.storePayloads != storePayloads) {
this.storePayloads = true;
}
if (this.omitNorms != omitNorms) {
this.omitNorms = false; // once norms are stored, always store
}
if (this.omitTermFreqAndPositions != omitTermFreqAndPositions) {
this.omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life
}
}
}
}

View File

@ -84,7 +84,7 @@ final class FieldInfos {
while (fieldIterator.hasNext()) {
Fieldable field = (Fieldable) fieldIterator.next();
add(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(),
field.isStoreOffsetWithTermVector(), field.getOmitNorms());
field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getOmitTf());
}
}

View File

@ -322,6 +322,7 @@ final class FieldsReader implements Cloneable {
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
f.setOmitTf(fi.omitTermFreqAndPositions);
} else {
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
@ -332,6 +333,7 @@ final class FieldsReader implements Cloneable {
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
f.setOmitNorms(fi.omitNorms);
f.setOmitTf(fi.omitTermFreqAndPositions);
}
doc.add(f);
}
@ -365,7 +367,6 @@ final class FieldsReader implements Cloneable {
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
else
doc.add(new Field(fi.name, b, Field.Store.YES));
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
@ -383,6 +384,7 @@ final class FieldsReader implements Cloneable {
store,
index,
termVector);
f.setOmitTf(fi.omitTermFreqAndPositions);
f.setOmitNorms(fi.omitNorms);
} else {
f = new Field(fi.name, // name
@ -390,6 +392,7 @@ final class FieldsReader implements Cloneable {
store,
index,
termVector);
f.setOmitTf(fi.omitTermFreqAndPositions);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
@ -641,6 +644,7 @@ final class FieldsReader implements Cloneable {
this.name = fi.name.intern();
this.isIndexed = fi.isIndexed;
this.omitNorms = fi.omitNorms;
this.omitTermFreqAndPositions = fi.omitTermFreqAndPositions;
this.storeOffsetWithTermVector = fi.storeOffsetWithTermVector;
this.storePositionWithTermVector = fi.storePositionWithTermVector;
this.storeTermVector = fi.storeTermVector;

View File

@ -64,6 +64,14 @@ class DocHelper {
public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT,
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
public static final String NO_TF_TEXT = "analyzed with no tf and positions";
public static final String NO_TF_KEY = "omitTermFreqAndPositions";
public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT,
Field.Store.YES, Field.Index.ANALYZED);
static {
noTFField.setOmitTermFreqAndPositions(true);
}
public static final String UNINDEXED_FIELD_TEXT = "unindexed field text";
public static final String UNINDEXED_FIELD_KEY = "unIndField";
public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT,
@ -119,6 +127,7 @@ class DocHelper {
compressedTextField2,
keyField,
noNormsField,
noTFField,
unIndField,
unStoredField1,
unStoredField2,
@ -139,6 +148,7 @@ class DocHelper {
public static Map notermvector=new HashMap();
public static Map lazy= new HashMap();
public static Map noNorms=new HashMap();
public static Map noTf=new HashMap();
static {
//Initialize the large Lazy Field
@ -167,6 +177,7 @@ class DocHelper {
if (f.isStored()) add(stored,f);
else add(unstored,f);
if (f.getOmitNorms()) add(noNorms,f);
if (f.getOmitTf()) add(noTf,f);
if (f.isLazy()) add(lazy, f);
}
}
@ -186,6 +197,7 @@ class DocHelper {
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
nameValues.put(NO_TF_KEY, NO_TF_TEXT);
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);

View File

@ -259,14 +259,13 @@ public class TestDocumentWriter extends LuceneTestCase {
doc.add(new Field("f2", "v1", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("f2", "v2", Store.YES, Index.NOT_ANALYZED, TermVector.NO));
RAMDirectory ram = new RAMDirectory();
IndexWriter writer = new IndexWriter(ram, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
writer.addDocument(doc);
writer.close();
_TestUtil.checkIndex(ram);
_TestUtil.checkIndex(dir);
IndexReader reader = IndexReader.open(ram);
IndexReader reader = IndexReader.open(dir);
// f1
TermFreqVector tfv1 = reader.getTermFreqVector(0, "f1");
assertNotNull(tfv1);
@ -276,4 +275,37 @@ public class TestDocumentWriter extends LuceneTestCase {
assertNotNull(tfv2);
assertEquals("the 'with_tv' setting should rule!",2,tfv2.getTerms().length);
}
/**
* Test adding two fields with the same name, one indexed
* the other stored only. The omitNorms and omitTermFreqAndPositions setting
* of the stored field should not affect the indexed one (LUCENE-1590)
*/
public void testLUCENE_1590() throws Exception {
Document doc = new Document();
// f1 has no norms
doc.add(new Field("f1", "v1", Store.NO, Index.ANALYZED_NO_NORMS));
doc.add(new Field("f1", "v2", Store.YES, Index.NO));
// f2 has no TF
Field f = new Field("f2", "v1", Store.NO, Index.ANALYZED);
f.setOmitTermFreqAndPositions(true);
doc.add(f);
doc.add(new Field("f2", "v2", Store.YES, Index.NO));
IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
writer.addDocument(doc);
writer.optimize(); // be sure to have a single segment
writer.close();
_TestUtil.checkIndex(dir);
SegmentReader reader = (SegmentReader) IndexReader.open(dir);
FieldInfos fi = reader.fieldInfos();
// f1
assertFalse("f1 should have no norms", reader.hasNorms("f1"));
assertFalse("omitTermFreqAndPositions field bit should not be set for f1", fi.fieldInfo("f1").omitTermFreqAndPositions);
// f2
assertTrue("f2 should have norms", reader.hasNorms("f2"));
assertTrue("omitTermFreqAndPositions field bit should be set for f2", fi.fieldInfo("f2").omitTermFreqAndPositions);
}
}

View File

@ -72,6 +72,7 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == true);
assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTf() == false);
field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
@ -79,8 +80,15 @@ public class TestFieldsReader extends LuceneTestCase {
assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == true);
assertTrue(field.getOmitTf() == false);
field = doc.getField(DocHelper.NO_TF_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == false);
assertTrue(field.isStoreOffsetWithTermVector() == false);
assertTrue(field.isStorePositionWithTermVector() == false);
assertTrue(field.getOmitNorms() == false);
assertTrue(field.getOmitTf() == true);
reader.close();
}