diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b9deb7e1768..da6e3d29508 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -90,6 +90,10 @@ Bug Fixes that does not extend ReflectiveAccessException in Java 9. (Uwe Schindler) +* LUCENE-7581: Lucene now prevents updating a doc values field that is used + in the index sort, since this would lead to corruption. (Jim + Ferenczi via Mike McCandless) + Improvements * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery, diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 98687855231..3ee87b18304 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1619,6 +1619,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) { throw new IllegalArgumentException("can only update existing numeric-docvalues fields!"); } + if (config.getIndexSortFields().contains(field)) { + throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + field + ", sort=" + config.getIndexSort()); + } try { long seqNo = docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value)); if (seqNo < 0) { @@ -1713,6 +1716,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { if (!globalFieldNumberMap.contains(f.name(), dvType)) { throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType); } + if (config.getIndexSortFields().contains(f.name())) { + throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort()); + } switch (dvType) { case NUMERIC: dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue()); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java index 4f642eed52a..ce4f0a8e5c3 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java @@ -18,7 +18,9 @@ package org.apache.lucene.index; import java.io.PrintStream; +import java.util.Arrays; import java.util.EnumSet; +import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; @@ -474,6 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig { } } this.indexSort = sort; + this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet()); return this; } diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java index cec70c099aa..d9e1bc7bebb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java +++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java @@ -17,6 +17,9 @@ package org.apache.lucene.index; +import java.util.Collections; +import java.util.Set; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; @@ -98,6 +101,9 @@ public class LiveIndexWriterConfig { /** The sort order to use to write merged segments. */ protected Sort indexSort = null; + /** The field names involved in the index sort */ + protected Set indexSortFields = Collections.emptySet(); + // used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer) { this.analyzer = analyzer; @@ -457,6 +463,13 @@ public class LiveIndexWriterConfig { return indexSort; } + /** + * Returns the field names involved in the index sort + */ + public Set getIndexSortFields() { + return indexSortFields; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java index 5ebf8f481d1..08a85ef3e24 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java @@ -1700,6 +1700,29 @@ public class TestIndexSorting extends LuceneTestCase { dir.close(); } + + // docvalues fields involved in the index sort cannot be updated + public void testBadDVUpdate() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random())); + Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG)); + iwc.setIndexSort(indexSort); + IndexWriter w = new IndexWriter(dir, iwc); + Document doc = new Document(); + doc.add(new StringField("id", new BytesRef("0"), Store.NO)); + doc.add(new NumericDocValuesField("foo", random().nextInt())); + w.addDocument(doc); + w.commit(); + IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, + () -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1))); + assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort="); + exc = expectThrows(IllegalArgumentException.class, + () -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1)); + assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort="); + w.close(); + dir.close(); + } + static class DVUpdateRunnable implements Runnable { private final int numDocs; @@ -1727,7 +1750,7 @@ public class TestIndexSorting extends LuceneTestCase { final long value = random.nextInt(20); synchronized (values) { - w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value)); + w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("bar", value)); values.put(id, value); } @@ -1762,7 +1785,8 @@ public class TestIndexSorting extends LuceneTestCase { for (int i = 0; i < numDocs; ++i) { Document doc = new Document(); doc.add(new StringField("id", Integer.toString(i), Store.NO)); - doc.add(new NumericDocValuesField("foo", -1)); + doc.add(new NumericDocValuesField("foo", random().nextInt())); + doc.add(new NumericDocValuesField("bar", -1)); w.addDocument(doc); values.put(i, -1L); } @@ -1786,7 +1810,7 @@ public class TestIndexSorting extends LuceneTestCase { for (int i = 0; i < numDocs; ++i) { final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); assertEquals(1, topDocs.totalHits); - NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "foo"); + NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "bar"); int hitDoc = topDocs.scoreDocs[0].doc; assertEquals(hitDoc, dvs.advance(hitDoc)); assertEquals(values.get(i).longValue(), dvs.longValue());