LUCENE-6049: don't throw exc writing segment if document hit non-aborting exc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1637524 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-11-08 08:24:13 +00:00
parent 54156c042d
commit b7059e7628
28 changed files with 159 additions and 97 deletions

View File

@ -46,7 +46,7 @@ public class BinaryDocValuesField extends Field {
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValuesType.BINARY);
TYPE.setDocValuesType(DocValuesType.BINARY);
TYPE.freeze();
}

View File

@ -312,7 +312,7 @@ public final class Document implements IndexDocument {
return new FilterIterator<StorableField, Field>(fields.iterator()) {
@Override
protected boolean predicateFunction(Field field) {
return field.type.stored() || field.type.docValueType() != DocValuesType.NONE;
return field.type.stored() || field.type.docValuesType() != DocValuesType.NONE;
}
};
}

View File

@ -54,7 +54,7 @@ public class FieldType implements IndexableFieldType {
private NumericType numericType;
private boolean frozen;
private int numericPrecisionStep = NumericUtils.PRECISION_STEP_DEFAULT;
private DocValuesType docValueType = DocValuesType.NONE;
private DocValuesType docValuesType = DocValuesType.NONE;
/**
* Create a new mutable FieldType with all of the properties from <code>ref</code>
@ -68,7 +68,7 @@ public class FieldType implements IndexableFieldType {
this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.docValueType = ref.docValueType();
this.docValuesType = ref.docValuesType();
this.numericType = ref.numericType();
// Do not copy frozen!
}
@ -377,12 +377,12 @@ public class FieldType implements IndexableFieldType {
result.append(numericPrecisionStep);
}
}
if (docValueType != DocValuesType.NONE) {
if (docValuesType != DocValuesType.NONE) {
if (result.length() > 0) {
result.append(",");
}
result.append("docValueType=");
result.append(docValueType);
result.append("docValuesType=");
result.append(docValuesType);
}
return result.toString();
@ -394,11 +394,11 @@ public class FieldType implements IndexableFieldType {
* {@inheritDoc}
* <p>
* The default is <code>null</code> (no docValues)
* @see #setDocValueType(DocValuesType)
* @see #setDocValuesType(DocValuesType)
*/
@Override
public DocValuesType docValueType() {
return docValueType;
public DocValuesType docValuesType() {
return docValuesType;
}
/**
@ -406,21 +406,21 @@ public class FieldType implements IndexableFieldType {
* @param type DocValues type, or null if no DocValues should be stored.
* @throws IllegalStateException if this FieldType is frozen against
* future modifications.
* @see #docValueType()
* @see #docValuesType()
*/
public void setDocValueType(DocValuesType type) {
public void setDocValuesType(DocValuesType type) {
checkIfFrozen();
if (type == null) {
throw new NullPointerException("DocValuesType cannot be null");
}
docValueType = type;
docValuesType = type;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((docValueType == null) ? 0 : docValueType.hashCode());
result = prime * result + ((docValuesType == null) ? 0 : docValuesType.hashCode());
result = prime * result + indexOptions.hashCode();
result = prime * result + numericPrecisionStep;
result = prime * result + ((numericType == null) ? 0 : numericType.hashCode());
@ -440,7 +440,7 @@ public class FieldType implements IndexableFieldType {
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
FieldType other = (FieldType) obj;
if (docValueType != other.docValueType) return false;
if (docValuesType != other.docValuesType) return false;
if (indexOptions != other.indexOptions) return false;
if (numericPrecisionStep != other.numericPrecisionStep) return false;
if (numericType != other.numericType) return false;

View File

@ -40,7 +40,7 @@ public class NumericDocValuesField extends Field {
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValuesType.NUMERIC);
TYPE.setDocValuesType(DocValuesType.NUMERIC);
TYPE.freeze();
}

View File

@ -43,7 +43,7 @@ public class SortedDocValuesField extends Field {
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValuesType.SORTED);
TYPE.setDocValuesType(DocValuesType.SORTED);
TYPE.freeze();
}

View File

@ -50,7 +50,7 @@ public class SortedNumericDocValuesField extends Field {
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValuesType.SORTED_NUMERIC);
TYPE.setDocValuesType(DocValuesType.SORTED_NUMERIC);
TYPE.freeze();
}

View File

@ -44,7 +44,7 @@ public class SortedSetDocValuesField extends Field {
*/
public static final FieldType TYPE = new FieldType();
static {
TYPE.setDocValueType(DocValuesType.SORTED_SET);
TYPE.setDocValuesType(DocValuesType.SORTED_SET);
TYPE.freeze();
}

View File

@ -361,9 +361,9 @@ final class DefaultIndexingChain extends DocConsumer {
abort = false;
}
DocValuesType dvType = fieldType.docValueType();
DocValuesType dvType = fieldType.docValuesType();
if (dvType == null) {
throw new NullPointerException("docValueType cannot be null (field: \"" + fieldName + "\")");
throw new NullPointerException("docValuesType cannot be null (field: \"" + fieldName + "\")");
}
if (dvType != DocValuesType.NONE) {
indexDocValue(fp, dvType, field);
@ -484,7 +484,11 @@ final class DefaultIndexingChain extends DocConsumer {
if (fp == null) {
// First time we are seeing this field in this segment
FieldInfo fi = fieldInfos.addOrUpdate(name, fieldType);
FieldInfo fi = fieldInfos.getOrAdd(name);
// Messy: must set this here because e.g. FreqProxTermsWriterPerField looks at the initial
// IndexOptions to decide what arrays it must create). Then, we also must set it in
// PerField.invert to allow for later downgrading of the index options:
fi.setIndexOptions(fieldType.indexOptions());
fp = new PerField(fi, invert);
fp.next = fieldHash[hashPos];
@ -502,13 +506,13 @@ final class DefaultIndexingChain extends DocConsumer {
fields = newFields;
}
} else {
fp.fieldInfo.update(fieldType);
if (invert && fp.invertState == null) {
} else if (invert && fp.invertState == null) {
// Messy: must set this here because e.g. FreqProxTermsWriterPerField looks at the initial
// IndexOptions to decide what arrays it must create). Then, we also must set it in
// PerField.invert to allow for later downgrading of the index options:
fp.fieldInfo.setIndexOptions(fieldType.indexOptions());
fp.setInvertState();
}
}
return fp;
}
@ -539,6 +543,8 @@ final class DefaultIndexingChain extends DocConsumer {
// reused
TokenStream tokenStream;
IndexOptions indexOptions;
public PerField(FieldInfo fieldInfo, boolean invert) {
this.fieldInfo = fieldInfo;
similarity = docState.similarity;
@ -582,11 +588,18 @@ final class DefaultIndexingChain extends DocConsumer {
IndexableFieldType fieldType = field.fieldType();
IndexOptions indexOptions = fieldType.indexOptions();
fieldInfo.setIndexOptions(indexOptions);
if (fieldType.omitNorms()) {
fieldInfo.setOmitsNorms();
}
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
// only bother checking offsets if something will consume them.
// TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
final boolean checkOffsets = indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
/*
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream

View File

@ -108,10 +108,6 @@ public final class FieldInfo {
return true;
}
void update(IndexableFieldType ft) {
update(false, ft.omitNorms(), false, ft.indexOptions());
}
// should only be called by FieldInfos#addOrUpdate
void update(boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) {
if (indexOptions == null) {
@ -144,7 +140,7 @@ public final class FieldInfo {
}
void setDocValuesType(DocValuesType type) {
if (docValuesType != DocValuesType.NONE && docValuesType != type) {
if (docValuesType != DocValuesType.NONE && type != DocValuesType.NONE && docValuesType != type) {
throw new IllegalArgumentException("cannot change DocValues type from " + docValuesType + " to " + type + " for field \"" + name + "\"");
}
docValuesType = type;
@ -156,6 +152,23 @@ public final class FieldInfo {
return indexOptions;
}
/** Record the {@link IndexOptions} to use with this field. */
public void setIndexOptions(IndexOptions newIndexOptions) {
if (indexOptions != newIndexOptions) {
if (indexOptions == IndexOptions.NONE) {
indexOptions = newIndexOptions;
} else if (newIndexOptions != IndexOptions.NONE) {
// downgrade
indexOptions = indexOptions.compareTo(newIndexOptions) < 0 ? indexOptions : newIndexOptions;
}
}
if (indexOptions == IndexOptions.NONE || indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
// cannot store payloads if we don't store positions:
storePayloads = false;
}
}
/**
* Returns {@link DocValuesType} of the docValues; this is
* {@code DocValuesType.NONE} if the field has no docvalues.
@ -197,6 +210,14 @@ public final class FieldInfo {
return omitNorms;
}
/** Omit norms for this field. */
public void setOmitsNorms() {
if (indexOptions == IndexOptions.NONE) {
throw new IllegalStateException("cannot omit norms: this field is not indexed");
}
omitNorms = true;
}
/**
* Returns true if this field actually has any norms.
*/

View File

@ -277,18 +277,23 @@ public class FieldInfos implements Iterable<FieldInfo> {
}
}
/** NOTE: this method does not carry over termVector
* the indexer chain must set these fields when they
* succeed in consuming the document */
public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) {
// TODO: really, indexer shouldn't even call this
// method (it's only called from DocFieldProcessor);
// rather, each component in the chain should update
// what it "owns". EG fieldType.indexOptions() should
// be updated by maybe FreqProxTermsWriterPerField:
return addOrUpdateInternal(name, -1, false,
fieldType.omitNorms(), false,
fieldType.indexOptions(), fieldType.docValueType());
/** Create a new field, or return existing one. */
public FieldInfo getOrAdd(String name) {
FieldInfo fi = fieldInfo(name);
if (fi == null) {
// This field wasn't yet added to this in-RAM
// segment's FieldInfo, so now we get a global
// number for this field. If the field was seen
// before then we'll get the same name and number,
// else we'll allocate a new one:
final int fieldNumber = globalFieldNumbers.addOrGet(name, -1, DocValuesType.NONE);
fi = new FieldInfo(name, fieldNumber, false, false, false, IndexOptions.NONE, DocValuesType.NONE, -1, null);
assert !byName.containsKey(fi.name);
globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, DocValuesType.NONE);
byName.put(fi.name, fi);
}
return fi;
}
private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber,
@ -317,7 +322,7 @@ public class FieldInfos implements Iterable<FieldInfo> {
boolean updateGlobal = fi.getDocValuesType() == DocValuesType.NONE;
if (updateGlobal) {
// Must also update docValuesType map so it's
// aware of this field's DocValueType. This will throw IllegalArgumentException if
// aware of this field's DocValuesType. This will throw IllegalArgumentException if
// an illegal type change was attempted.
globalFieldNumbers.setDocValuesType(fi.number, name, docValues);
}

View File

@ -1496,7 +1496,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
DocValuesUpdate[] dvUpdates = new DocValuesUpdate[updates.length];
for (int i = 0; i < updates.length; i++) {
final Field f = updates[i];
final DocValuesType dvType = f.fieldType().docValueType();
final DocValuesType dvType = f.fieldType().docValuesType();
if (dvType == null) {
throw new NullPointerException("DocValuesType cannot be null (field: \"" + f.name() + "\")");
}

View File

@ -95,5 +95,5 @@ public interface IndexableFieldType {
* DocValues {@link DocValuesType}: how the field's value will be indexed
* into docValues.
*/
public DocValuesType docValueType();
public DocValuesType docValuesType();
}

View File

@ -496,11 +496,13 @@ class ReadersAndUpdates {
}
// create new fields or update existing ones to have NumericDV type
for (String f : dvUpdates.numericDVUpdates.keySet()) {
builder.addOrUpdate(f, NumericDocValuesField.TYPE);
FieldInfo fieldInfo = builder.getOrAdd(f);
fieldInfo.setDocValuesType(DocValuesType.NUMERIC);
}
// create new fields or update existing ones to have BinaryDV type
for (String f : dvUpdates.binaryDVUpdates.keySet()) {
builder.addOrUpdate(f, BinaryDocValuesField.TYPE);
FieldInfo fieldInfo = builder.getOrAdd(f);
fieldInfo.setDocValuesType(DocValuesType.BINARY);
}
fieldInfos = builder.finish();

View File

@ -86,7 +86,7 @@ abstract class TermsHash {
}
}
abstract TermsHashPerField addField(FieldInvertState fieldInvertState, final FieldInfo fieldInfo);
abstract TermsHashPerField addField(FieldInvertState fieldInvertState, FieldInfo fieldInfo);
void finishDocument() throws IOException {
if (nextTermsHash != null) {

View File

@ -39,7 +39,7 @@ public class TestFieldType extends LuceneTestCase {
assertFalse(ft3.equals(ft));
FieldType ft4 = new FieldType();
ft4.setDocValueType(DocValuesType.BINARY);
ft4.setDocValuesType(DocValuesType.BINARY);
assertFalse(ft4.equals(ft));
FieldType ft5 = new FieldType();

View File

@ -833,7 +833,7 @@ public class TestBinaryDocValuesUpdates extends LuceneTestCase {
public void testUpdateBinaryDVFieldWithSameNameAsPostingField() throws Exception {
// this used to fail because FieldInfos.Builder neglected to update
// globalFieldMaps.docValueTypes map
// globalFieldMaps.docValuesTypes map
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -88,35 +88,12 @@ public class TestCodecs extends LuceneTestCase {
this.omitTF = omitTF;
this.storePayloads = storePayloads;
// TODO: change this test to use all three
fieldInfo = fieldInfos.addOrUpdate(name, new IndexableFieldType() {
@Override
public boolean stored() { return false; }
@Override
public boolean tokenized() { return false; }
@Override
public boolean storeTermVectors() { return false; }
@Override
public boolean storeTermVectorOffsets() { return false; }
@Override
public boolean storeTermVectorPositions() { return false; }
@Override
public boolean storeTermVectorPayloads() { return false; }
@Override
public boolean omitNorms() { return false; }
@Override
public IndexOptions indexOptions() { return omitTF ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; }
@Override
public DocValuesType docValueType() { return DocValuesType.NONE; }
});
fieldInfo = fieldInfos.getOrAdd(name);
if (omitTF) {
fieldInfo.setIndexOptions(IndexOptions.DOCS);
} else {
fieldInfo.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
}
if (storePayloads) {
fieldInfo.setStorePayloads();
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
@ -899,4 +900,32 @@ public class TestDocValuesIndexing extends LuceneTestCase {
dir.close();
}
// LUCENE-6049
public void testExcIndexingDocBeforeDocValues() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setDocValuesType(DocValuesType.SORTED);
ft.freeze();
Field field = new Field("test", "value", ft);
field.setTokenStream(new TokenStream() {
@Override
public boolean incrementToken() {
throw new RuntimeException("no");
}
});
doc.add(field);
try {
w.addDocument(doc);
fail("did not hit exception");
} catch (RuntimeException re) {
// expected
}
w.addDocument(new Document());
w.close();
dir.close();
}
}

View File

@ -50,7 +50,13 @@ public class TestFieldsReader extends LuceneTestCase {
fieldInfos = new FieldInfos.Builder();
DocHelper.setupDoc(testDoc);
for (IndexableField field : testDoc.getFields()) {
fieldInfos.addOrUpdate(field.name(), field.fieldType());
FieldInfo fieldInfo = fieldInfos.getOrAdd(field.name());
IndexableFieldType ift = field.fieldType();
fieldInfo.setIndexOptions(ift.indexOptions());
if (ift.omitNorms()) {
fieldInfo.setOmitsNorms();
}
fieldInfo.setDocValuesType(ift.docValuesType());
}
dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()))

View File

@ -86,7 +86,7 @@ public class TestIndexableField extends LuceneTestCase {
}
@Override
public DocValuesType docValueType() {
public DocValuesType docValuesType() {
return DocValuesType.NONE;
}
};

View File

@ -826,7 +826,7 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
@Test
public void testUpdateNumericDVFieldWithSameNameAsPostingField() throws Exception {
// this used to fail because FieldInfos.Builder neglected to update
// globalFieldMaps.docValueTypes map
// globalFieldMaps.docValuesTypes map
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -106,7 +106,7 @@ public class BBoxStrategy extends SpatialStrategy {
FieldType fieldType = new FieldType(DoubleField.TYPE_NOT_STORED);
fieldType.setNumericPrecisionStep(8);//Solr's default
fieldType.setDocValueType(DocValuesType.NUMERIC);
fieldType.setDocValuesType(DocValuesType.NUMERIC);
setFieldType(fieldType);
}

View File

@ -112,7 +112,7 @@ public class TestBBoxStrategy extends RandomSpatialOpStrategyTestCase {
if (random().nextBoolean()) {
BBoxStrategy bboxStrategy = (BBoxStrategy) strategy;
FieldType fieldType = new FieldType(bboxStrategy.getFieldType());
fieldType.setDocValueType(DocValuesType.NONE);
fieldType.setDocValuesType(DocValuesType.NONE);
bboxStrategy.setFieldType(fieldType);
}
for (SpatialOperation operation : SpatialOperation.values()) {

View File

@ -49,7 +49,8 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
Codec codec = getCodec();
SegmentInfo segmentInfo = newSegmentInfo(dir, "_123");
FieldInfos.Builder builder = new FieldInfos.Builder();
FieldInfo fi = builder.addOrUpdate("field", TextField.TYPE_STORED);
FieldInfo fi = builder.getOrAdd("field");
fi.setIndexOptions(TextField.TYPE_STORED.indexOptions());
addAttributes(fi);
FieldInfos infos = builder.finish();
codec.fieldInfosFormat().write(dir, segmentInfo, "", infos, IOContext.DEFAULT);
@ -81,7 +82,15 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
FieldInfos.Builder builder = new FieldInfos.Builder();
for (String field : fieldNames) {
IndexableFieldType fieldType = randomFieldType(random());
FieldInfo fi = builder.addOrUpdate(field, fieldType);
FieldInfo fi = builder.getOrAdd(field);
IndexOptions indexOptions = fieldType.indexOptions();
if (indexOptions != IndexOptions.NONE) {
fi.setIndexOptions(indexOptions);
if (fieldType.omitNorms()) {
fi.setOmitsNorms();
}
}
fi.setDocValuesType(fieldType.docValuesType());
if (fieldType.indexOptions() != IndexOptions.NONE && fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
if (random().nextBoolean()) {
fi.setStorePayloads();
@ -118,7 +127,7 @@ public abstract class BaseFieldInfoFormatTestCase extends BaseIndexFileFormatTes
if (r.nextBoolean()) {
DocValuesType values[] = getDocValuesTypes();
type.setDocValueType(values[r.nextInt(values.length)]);
type.setDocValuesType(values[r.nextInt(values.length)]);
}
return type;

View File

@ -887,7 +887,7 @@ public final class TestUtil {
for(IndexableField f : doc1.getFields()) {
final Field field1 = (Field) f;
final Field field2;
final DocValuesType dvType = field1.fieldType().docValueType();
final DocValuesType dvType = field1.fieldType().docValuesType();
final NumericType numType = field1.fieldType().numericType();
if (dvType != DocValuesType.NONE) {
switch(dvType) {

View File

@ -180,7 +180,7 @@ public class LukeRequestHandler extends RequestHandlerBase
flags.append( (f != null && f.fieldType().indexOptions() != IndexOptions.NONE) ? FieldFlag.INDEXED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().tokenized()) ? FieldFlag.TOKENIZED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().stored()) ? FieldFlag.STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().docValueType() != DocValuesType.NONE) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (f != null && f.fieldType().docValuesType() != DocValuesType.NONE) ? FieldFlag.DOC_VALUES.getAbbreviation() : "-" );
flags.append( (false) ? FieldFlag.MULTI_VALUED.getAbbreviation() : '-' ); // SchemaField Specific
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );

View File

@ -103,7 +103,7 @@ public class BBoxField extends AbstractSpatialFieldType<BBoxStrategy> implements
//and annoyingly this field isn't going to have a docValues format because Solr uses a separate Field for that
if (field.hasDocValues()) {
luceneType = new org.apache.lucene.document.FieldType(luceneType);
luceneType.setDocValueType(DocValuesType.NUMERIC);
luceneType.setDocValuesType(DocValuesType.NUMERIC);
}
strategy.setFieldType(luceneType);
return strategy;

View File

@ -303,7 +303,7 @@ public abstract class FieldType extends FieldProperties {
*/
public List<StorableField> createFields(SchemaField field, Object value, float boost) {
StorableField f = createField( field, value, boost);
if (field.hasDocValues() && f.fieldType().docValueType() == null) {
if (field.hasDocValues() && f.fieldType().docValuesType() == null) {
// field types that support doc values should either override createField
// to return a field with doc values or extend createFields if this can't
// be done in a single field instance (see StrField for example)