From aaa64d7015998f28aaffac031c4032abf73bebd6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 6 Dec 2018 11:31:02 +0100 Subject: [PATCH] LUCENE-8594: DV update are broken for updates on new field A segmemnt written with Lucene70Codec failes if it ties to update a DV field that didn't exist in the index before it was upgraded to Lucene80Codec. We bake the DV format into the FieldInfo when it's used the first time and therefor never go to the codec if we need to update. yet on a field that didn't exist before and was added during an indexing operation we have to consult the coded and get an exception. This change fixes this issue and adds the relevant bwc tests. --- .../lucene/codecs/lucene70/Lucene70Codec.java | 5 +- .../index/TestBackwardsCompatibility.java | 76 ++++++++++++++++++- 2 files changed, 77 insertions(+), 4 deletions(-) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java index 6841345dc69..0f397b16f55 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene70/Lucene70Codec.java @@ -55,7 +55,8 @@ public class Lucene70Codec extends Codec { private final SegmentInfoFormat segmentInfosFormat = new Lucene70SegmentInfoFormat(); private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); - + private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene70"); + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -66,7 +67,7 @@ public class Lucene70Codec extends Codec { private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { - throw new IllegalStateException("This codec should only be used for reading, not writing"); + return defaultDVFormat; } }; diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index bc7b6d8f9e0..6f287978605 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -1538,7 +1538,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertEquals(ndvcf.longValue(), ndvf.longValue()*2); } } - + private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOException { BinaryDocValues bdvf = r.getBinaryDocValues(f); BinaryDocValues bdvcf = r.getBinaryDocValues(cf); @@ -1548,7 +1548,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertEquals(getValue(bdvcf), getValue(bdvf)*2); } } - + private void verifyDocValues(Directory dir) throws IOException { DirectoryReader reader = DirectoryReader.open(dir); for (LeafReaderContext context : reader.leaves()) { @@ -1576,6 +1576,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L); updateBinary(writer, "1", "bdv1", "bdv1_c", 300L); updateBinary(writer, "1", "bdv2", "bdv2_c", 300L); + writer.commit(); verifyDocValues(dir); @@ -1587,6 +1588,77 @@ public class TestBackwardsCompatibility extends LuceneTestCase { writer.close(); dir.close(); } + + public void testSoftDeletes() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete"); + IndexWriter writer = new IndexWriter(dir, conf); + int maxDoc = writer.maxDoc(); + writer.updateDocValues(new Term("id", "1"),new NumericDocValuesField("__soft_delete", 1)); + + if (random().nextBoolean()) { + writer.commit(); + } + writer.forceMerge(1); + writer.commit(); + assertEquals(maxDoc-1, writer.maxDoc()); + writer.close(); + dir.close(); + } + + public void testDocValuesUpdatesWithNewField() throws Exception { + Path oldIndexDir = createTempDir("dvupdates"); + TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir); + Directory dir = newFSDirectory(oldIndexDir); + verifyUsesDefaultCodec(dir, dvUpdatesIndex); + + // update fields and verify index + IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + // introduce a new field that we later update + writer.addDocument(Arrays.asList(new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO), + new NumericDocValuesField("new_numeric", 1), + new BinaryDocValuesField("new_binary", toBytes(1)))); + writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1); + writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1)); + + writer.commit(); + Runnable assertDV = () -> { + boolean found = false; + try (DirectoryReader reader = DirectoryReader.open(dir)) { + for (LeafReaderContext ctx : reader.leaves()) { + LeafReader leafReader = ctx.reader(); + TermsEnum id = leafReader.terms("id").iterator(); + if (id.seekExact(new BytesRef("1"))) { + PostingsEnum postings = id.postings(null, PostingsEnum.NONE); + NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric"); + BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary"); + int doc; + while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + found = true; + assertTrue(binaryDocValues.advanceExact(doc)); + assertTrue(numericDocValues.advanceExact(doc)); + assertEquals(1, numericDocValues.longValue()); + assertEquals(toBytes(1), binaryDocValues.binaryValue()); + } + } + } + } catch (IOException e) { + throw new AssertionError(e); + } + assertTrue(found); + }; + assertDV.run(); + // merge all segments + writer.forceMerge(1); + writer.commit(); + assertDV.run(); + writer.close(); + dir.close(); + } // LUCENE-5907 public void testUpgradeWithNRTReader() throws Exception {