LUCENE-8441: IndexWriter now checks doc value type of index sort fields and fails the document if they are not compatible.

This commit is contained in:
Jim Ferenczi 2018-08-01 18:28:51 +02:00
parent 1133bf98a5
commit 679b4aa71d
5 changed files with 100 additions and 4 deletions

View File

@ -199,6 +199,9 @@ Bug Fixes:
* LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
overflows by enforcing a maximum term length. (Adrien Grand)
* LUCENE-8441: IndexWriter now checks doc value type for index sort fields
and fails the document if they are not compatible. (Jim Ferenczi, Mike McCandless)
Changes in Runtime Behavior:
* LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

View File

@ -39,6 +39,8 @@ import org.apache.lucene.document.FieldType;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
@ -524,6 +526,48 @@ final class DefaultIndexingChain extends DocConsumer {
fp.pointValuesWriter.addPackedValue(docState.docID, field.binaryValue());
}
private void validateIndexSortDVType(Sort indexSort, String fieldName, DocValuesType dvType) {
for (SortField sortField : indexSort.getSort()) {
if (sortField.getField().equals(fieldName)) {
switch (dvType) {
case NUMERIC:
if (sortField.getType().equals(SortField.Type.INT) == false &&
sortField.getType().equals(SortField.Type.LONG) == false &&
sortField.getType().equals(SortField.Type.FLOAT) == false &&
sortField.getType().equals(SortField.Type.DOUBLE) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case BINARY:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
case SORTED:
if (sortField.getType().equals(SortField.Type.STRING) == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_NUMERIC:
if (sortField instanceof SortedNumericSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
case SORTED_SET:
if (sortField instanceof SortedSetSortField == false) {
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
default:
throw new IllegalArgumentException("invalid doc value type:" + dvType + " for sortField:" + sortField);
}
break;
}
}
}
/** Called from processDocument to index one field's doc value */
private void indexDocValue(PerField fp, DocValuesType dvType, IndexableField field) throws IOException {
@ -531,7 +575,12 @@ final class DefaultIndexingChain extends DocConsumer {
// This is the first time we are seeing this field indexed with doc values, so we
// now record the DV type so that any future attempt to (illegally) change
// the DV type of this field, will throw an IllegalArgExc:
if (docWriter.getSegmentInfo().getIndexSort() != null) {
final Sort indexSort = docWriter.getSegmentInfo().getIndexSort();
validateIndexSortDVType(indexSort, fp.fieldInfo.name, dvType);
}
fieldInfos.globalFieldNumbers.setDocValuesType(fp.fieldInfo.number, fp.fieldInfo.name, dvType);
}
fp.fieldInfo.setDocValuesType(dvType);

View File

@ -83,8 +83,10 @@ final class SortingStoredFieldsConsumer extends StoredFieldsConsumer {
try {
super.abort();
} finally {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
if (tmpDirectory != null) {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
}
}
}

View File

@ -83,8 +83,10 @@ final class SortingTermVectorsConsumer extends TermVectorsConsumer {
try {
super.abort();
} finally {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
if (tmpDirectory != null) {
IOUtils.deleteFilesIgnoringExceptions(tmpDirectory,
tmpDirectory.getTemporaryFiles().values());
}
}
}

View File

@ -84,6 +84,7 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.junit.internal.matchers.StringContains.containsString;
public class TestIndexSorting extends LuceneTestCase {
static class AssertingNeedsIndexSortCodec extends FilterCodec {
@ -2472,4 +2473,43 @@ public class TestIndexSorting extends LuceneTestCase {
IOUtils.close(r, w, dir);
}
public void testWrongSortFieldType() throws Exception {
Directory dir = newDirectory();
List<Field> dvs = new ArrayList<>();
dvs.add(new SortedDocValuesField("field", new BytesRef("")));
dvs.add(new SortedSetDocValuesField("field", new BytesRef("")));
dvs.add(new NumericDocValuesField("field", 42));
dvs.add(new SortedNumericDocValuesField("field", 42));
List<SortField> sortFields = new ArrayList<>();
sortFields.add(new SortField("field", SortField.Type.STRING));
sortFields.add(new SortedSetSortField("field", false));
sortFields.add(new SortField("field", SortField.Type.INT));
sortFields.add(new SortedNumericSortField("field", SortField.Type.INT));
for (int i = 0; i < sortFields.size(); i++) {
for (int j = 0; j < dvs.size(); j++) {
if (i == j) {
continue;
}
Sort indexSort = new Sort(sortFields.get(i));
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(dvs.get(j));
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("invalid doc value type"));
doc.clear();
doc.add(dvs.get(i));
w.addDocument(doc);
doc.add(dvs.get(j));
exc = expectThrows(IllegalArgumentException.class, () -> w.addDocument(doc));
assertThat(exc.getMessage(), containsString("cannot change DocValues type"));
w.rollback();
IOUtils.close(w);
}
}
IOUtils.close(dir);
}
}