LUCENE-8441: IndexWriter now checks doc value type of index sort fields and fails the document if they are not compatible.

This commit is contained in:
Jim Ferenczi 2018-08-01 18:28:51 +02:00
parent 1133bf98a5
commit 679b4aa71d
5 changed files with 100 additions and 4 deletions

View File

@ -199,6 +199,9 @@ Bug Fixes:
* LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack * LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
overflows by enforcing a maximum term length. (Adrien Grand) overflows by enforcing a maximum term length. (Adrien Grand)
* LUCENE-8441: IndexWriter now checks doc value type for index sort fields
and fails the document if they are not compatible. (Jim Ferenczi, Mike McCandless)
Changes in Runtime Behavior: Changes in Runtime Behavior:
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

View File

@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
@ -524,6 +526,48 @@ final class DefaultIndexingChain extends DocConsumer {
fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue()); fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
} }
private void validateIndexSortDVType(Sort indexSort, String fieldName, DocValuesType dvType) {
for (SortField sortField : indexSort.getSort()) {
if (sortField.getField().equals(fieldName)) {
switch (dvType) {
case NUMERIC:
if (sortField.getType().equals(SortField.Type.INT) == false &&
sortField.getType().equals(SortField.Type.LONG) == false &&
sortField.getType().equals(SortField.Type.FLOAT) == false &&
sortField.getType().equals(SortField.Type.DOUBLE) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case BINARY:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
case SORTED:
if (sortField.getType().equals(SortField.Type.STRING) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_NUMERIC:
if (sortField instanceof SortedNumericSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_SET:
if (sortField instanceof SortedSetSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
default:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
}
}
}
/** Called from processDocument to index one field's doc value */ /** Called from processDocument to index one field's doc value */
private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException { private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException {
@ -531,7 +575,12 @@ final class DefaultIndexingChain extends DocConsumer {
// This is the first time we are seeing this field indexed with doc values, so we // This is the first time we are seeing this field indexed with doc values, so we
// now record the DV type so that any future attempt to (illegally) change // now record the DV type so that any future attempt to (illegally) change
// the DV type of this field, will throw an IllegalArgExc: // the DV type of this field, will throw an IllegalArgExc:
if (docWriter.getSegmentInfo().getIndexSort() != null) {
final Sort indexSort = docWriter.getSegmentInfo().getIndexSort();
validateIndexSortDVType(indexSort, fp.fieldInfo.name, dvType);
}
fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType); fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType);
} }
fp.fieldInfo.setDocValuesType(dvType); fp.fieldInfo.setDocValuesType(dvType);

View File

@ -83,8 +83,10 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
try { try {
super.abort(); super.abort();
} finally { } finally {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory, if (tmpDirectory != null) {
tmpDirectory.getTemporaryFiles().values()); IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
}
} }
} }

View File

@ -83,8 +83,10 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
try { try {
super.abort(); super.abort();
} finally { } finally {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory, if (tmpDirectory != null) {
tmpDirectory.getTemporaryFiles().values()); IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
}
} }
} }

View File

@ -84,6 +84,7 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.junit.internal.matchers.StringContains.containsString;
public class TestIndexSorting extends LuceneTestCase { public class TestIndexSorting extends LuceneTestCase {
static class AssertingNeedsIndexSortCodec extends FilterCodec { static class AssertingNeedsIndexSortCodec extends FilterCodec {
@ -2472,4 +2473,43 @@ public class TestIndexSorting extends LuceneTestCase {
IOUtils.close(r, w, dir); IOUtils.close(r, w, dir);
} }
public void testWrongSortFieldType() throws Exception {
Directory dir = newDirectory();
List<Field> dvs = new ArrayList<>();
dvs.add(new SortedDocValuesField("field", new BytesRef("")));
dvs.add(new SortedSetDocValuesField("field", new BytesRef("")));
dvs.add(new NumericDocValuesField("field", 42));
dvs.add(new SortedNumericDocValuesField("field", 42));
List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("field", SortField.Type.STRING));
sortFields.add(new SortedSetSortField("field", false));
sortFields.add(new SortField("field", SortField.Type.INT));
sortFields.add(new SortedNumericSortField("field", SortField.Type.INT));
for (int i = 0; i < sortFields.size(); i++) {
for (int j = 0; j < dvs.size(); j++) {
if (i == j) {
continue;
}
Sort indexSort = new Sort(sortFields.get(i));
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(dvs.get(j));
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("invalid doc value type"));
doc.clear();
doc.add(dvs.get(i));
w.addDocument(doc);
doc.add(dvs.get(j));
exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("cannot change DocValues type"));
w.rollback();
IOUtils.close(w);
}
}
IOUtils.close(dir);
}
} }