LUCENE-7581: don't allow updating a doc values field if it's used in the index sort

This commit is contained in:
Mike McCandless 2016-12-09 18:05:13 -05:00
parent 22d04a7c11
commit 4efbde4e76
5 changed files with 53 additions and 3 deletions

View File

@ -90,6 +90,10 @@ Bug Fixes
that does not extend ReflectiveAccessException in Java 9. that does not extend ReflectiveAccessException in Java 9.
(Uwe Schindler) (Uwe Schindler)
* LUCENE-7581: Lucene now prevents updating a doc values field that is used
in the index sort, since this would lead to corruption. (Jim
Ferenczi via Mike McCandless)
Improvements Improvements
* LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery, * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

View File

@ -1619,6 +1619,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) { if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) {
throw new IllegalArgumentException("can only update existing numeric-docvalues fields!"); throw new IllegalArgumentException("can only update existing numeric-docvalues fields!");
} }
if (config.getIndexSortFields().contains(field)) {
throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + field + ", sort=" + config.getIndexSort());
}
try { try {
long seqNo = docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value)); long seqNo = docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value));
if (seqNo < 0) { if (seqNo < 0) {
@ -1713,6 +1716,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
if (!globalFieldNumberMap.contains(f.name(), dvType)) { if (!globalFieldNumberMap.contains(f.name(), dvType)) {
throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType); throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType);
} }
if (config.getIndexSortFields().contains(f.name())) {
throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort());
}
switch (dvType) { switch (dvType) {
case NUMERIC: case NUMERIC:
dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue()); dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue());

View File

@ -18,7 +18,9 @@ package org.apache.lucene.index;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Arrays;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
@ -474,6 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
} }
} }
this.indexSort = sort; this.indexSort = sort;
this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet());
return this; return this;
} }

View File

@ -17,6 +17,9 @@
package org.apache.lucene.index; package org.apache.lucene.index;
import java.util.Collections;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
@ -98,6 +101,9 @@ public class LiveIndexWriterConfig {
/** The sort order to use to write merged segments. */ /** The sort order to use to write merged segments. */
protected Sort indexSort = null; protected Sort indexSort = null;
/** The field names involved in the index sort */
protected Set<String> indexSortFields = Collections.emptySet();
// used by IndexWriterConfig // used by IndexWriterConfig
LiveIndexWriterConfig(Analyzer analyzer) { LiveIndexWriterConfig(Analyzer analyzer) {
this.analyzer = analyzer; this.analyzer = analyzer;
@ -457,6 +463,13 @@ public class LiveIndexWriterConfig {
return indexSort; return indexSort;
} }
/**
* Returns the field names involved in the index sort
*/
public Set<String> getIndexSortFields() {
return indexSortFields;
}
@Override @Override
public String toString() { public String toString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();

View File

@ -1700,6 +1700,29 @@ public class TestIndexSorting extends LuceneTestCase {
dir.close(); dir.close();
} }
// docvalues fields involved in the index sort cannot be updated
public void testBadDVUpdate() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
iwc.setIndexSort(indexSort);
IndexWriter w = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new StringField("id", new BytesRef("0"), Store.NO));
doc.add(new NumericDocValuesField("foo", random().nextInt()));
w.addDocument(doc);
w.commit();
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class,
() -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1)));
assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
exc = expectThrows(IllegalArgumentException.class,
() -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1));
assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
w.close();
dir.close();
}
static class DVUpdateRunnable implements Runnable { static class DVUpdateRunnable implements Runnable {
private final int numDocs; private final int numDocs;
@ -1727,7 +1750,7 @@ public class TestIndexSorting extends LuceneTestCase {
final long value = random.nextInt(20); final long value = random.nextInt(20);
synchronized (values) { synchronized (values) {
w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value)); w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("bar", value));
values.put(id, value); values.put(id, value);
} }
@ -1762,7 +1785,8 @@ public class TestIndexSorting extends LuceneTestCase {
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
Document doc = new Document(); Document doc = new Document();
doc.add(new StringField("id", Integer.toString(i), Store.NO)); doc.add(new StringField("id", Integer.toString(i), Store.NO));
doc.add(new NumericDocValuesField("foo", -1)); doc.add(new NumericDocValuesField("foo", random().nextInt()));
doc.add(new NumericDocValuesField("bar", -1));
w.addDocument(doc); w.addDocument(doc);
values.put(i, -1L); values.put(i, -1L);
} }
@ -1786,7 +1810,7 @@ public class TestIndexSorting extends LuceneTestCase {
for (int i = 0; i < numDocs; ++i) { for (int i = 0; i < numDocs; ++i) {
final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1); final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
assertEquals(1, topDocs.totalHits); assertEquals(1, topDocs.totalHits);
NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "foo"); NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "bar");
int hitDoc = topDocs.scoreDocs[0].doc; int hitDoc = topDocs.scoreDocs[0].doc;
assertEquals(hitDoc, dvs.advance(hitDoc)); assertEquals(hitDoc, dvs.advance(hitDoc));
assertEquals(values.get(i).longValue(), dvs.longValue()); assertEquals(values.get(i).longValue(), dvs.longValue());