From 6867430140427bcc84755d80670cfa517eb8eecd Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 17 Dec 2024 09:39:49 +0000 Subject: [PATCH] Revert "Introduces IndexInput#updateReadAdvice to change the ReadAdvice while merging vectors (#13985)" This reverts commit 46204f6b53c5bcbba89cc6acdd27f6cbe283f027. --- .../lucene/codecs/KnnVectorsReader.java | 7 --- .../lucene/codecs/KnnVectorsWriter.java | 9 ---- .../lucene/codecs/hnsw/FlatVectorsReader.java | 11 ---- .../lucene99/Lucene99FlatVectorsReader.java | 19 ------- .../lucene99/Lucene99HnswVectorsReader.java | 21 +------- .../perfield/PerFieldKnnVectorsFormat.java | 21 -------- .../org/apache/lucene/store/IndexInput.java | 8 --- .../lucene/store/MemorySegmentIndexInput.java | 14 ------ .../asserting/AssertingKnnVectorsFormat.java | 50 ------------------- .../tests/store/BaseDirectoryTestCase.java | 38 +------------- .../tests/store/MockIndexInputWrapper.java | 10 +--- 11 files changed, 4 insertions(+), 204 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java index 54d070fa599..e054ebeb2bb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java @@ -123,11 +123,4 @@ public abstract class KnnVectorsReader implements Closeable { public KnnVectorsReader getMergeInstance() { return this; } - - /** - * Optional: reset or close merge resources used in the reader - * - *

The default implementation is empty - */ - public void finishMerge() throws IOException {} } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java index 4bd10215c25..50af32a7e16 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java @@ -109,18 +109,9 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable { } } } - finishMerge(mergeState); finish(); } - private void finishMerge(MergeState mergeState) throws IOException { - for (KnnVectorsReader reader : mergeState.knnVectorsReaders) { - if (reader != null) { - reader.finishMerge(); - } - } - } - /** Tracks state of one sub-reader that we are merging */ private static class FloatVectorValuesSub extends DocIDMerger.Sub { diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java index b03e42cfbb4..9d776567883 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java @@ -88,15 +88,4 @@ public abstract class FlatVectorsReader extends KnnVectorsReader implements Acco */ public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] target) throws IOException; - - /** - * Returns an instance optimized for merging. This instance may only be consumed in the thread - * that called {@link #getMergeInstance()}. - * - *

The default implementation returns {@code this} - */ - @Override - public FlatVectorsReader getMergeInstance() { - return this; - } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java index 52839000137..9b42ddd0f26 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java @@ -21,7 +21,6 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSi import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; import java.io.IOException; -import java.io.UncheckedIOException; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; @@ -171,17 +170,6 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader { CodecUtil.checksumEntireFile(vectorData); } - @Override - public FlatVectorsReader getMergeInstance() { - try { - // Update the read advice since vectors are guaranteed to be accessed sequentially for merge - this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL); - return this; - } catch (IOException exception) { - throw new UncheckedIOException(exception); - } - } - private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) { final FieldInfo info = fieldInfos.fieldInfo(field); final FieldEntry fieldEntry; @@ -262,13 +250,6 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader { target); } - @Override - public void finishMerge() throws IOException { - // This makes sure that the access pattern hint is reverted back since HNSW implementation - // needs it - this.vectorData.updateReadAdvice(ReadAdvice.RANDOM); - } - @Override public void close() throws IOException { IOUtils.close(vectorData); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java index ed6388b53cb..2a3088527f5 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java @@ -69,12 +69,11 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader private final FlatVectorsReader flatVectorsReader; private final FieldInfos fieldInfos; - private final IntObjectHashMap fields; + private final IntObjectHashMap fields = new IntObjectHashMap<>(); private final IndexInput vectorIndex; public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatVectorsReader) throws IOException { - this.fields = new IntObjectHashMap<>(); this.flatVectorsReader = flatVectorsReader; boolean success = false; this.fieldInfos = state.fieldInfos; @@ -114,24 +113,6 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader } } - private Lucene99HnswVectorsReader( - Lucene99HnswVectorsReader reader, FlatVectorsReader flatVectorsReader) { - this.flatVectorsReader = flatVectorsReader; - this.fieldInfos = reader.fieldInfos; - this.fields = reader.fields; - this.vectorIndex = reader.vectorIndex; - } - - @Override - public KnnVectorsReader getMergeInstance() { - return new Lucene99HnswVectorsReader(this, this.flatVectorsReader.getMergeInstance()); - } - - @Override - public void finishMerge() throws IOException { - flatVectorsReader.finishMerge(); - } - private static IndexInput openDataInput( SegmentReadState state, int versionMeta, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java index bc18b231e74..63bad6d48da 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java @@ -239,27 +239,6 @@ public abstract class PerFieldKnnVectorsFormat extends KnnVectorsFormat { } } - private FieldsReader(final FieldsReader fieldsReader) { - this.fieldInfos = fieldsReader.fieldInfos; - for (FieldInfo fi : this.fieldInfos) { - if (fi.hasVectorValues() && fieldsReader.fields.containsKey(fi.number)) { - this.fields.put(fi.number, fieldsReader.fields.get(fi.number).getMergeInstance()); - } - } - } - - @Override - public KnnVectorsReader getMergeInstance() { - return new FieldsReader(this); - } - - @Override - public void finishMerge() throws IOException { - for (ObjectCursor knnVectorReader : fields.values()) { - knnVectorReader.value.finishMerge(); - } - } - /** * Return the underlying VectorReader for the given field * diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java index b649ace8c4d..4a42ef3139d 100644 --- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java +++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java @@ -228,14 +228,6 @@ public abstract class IndexInput extends DataInput implements Closeable { */ public void prefetch(long offset, long length) throws IOException {} - /** - * Optional method: Give a hint to this input about the change in read access pattern. IndexInput - * implementations may take advantage of this hint to optimize reads from storage. - * - *

The default implementation is a no-op. - */ - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {} - /** * Returns a hint whether all the contents of this input are resident in physical memory. It's a * hint because the operating system may have paged out some of the data by the time this method diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index 2424b53645b..8e72fa22585 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -360,20 +360,6 @@ abstract class MemorySegmentIndexInput extends IndexInput }); } - @Override - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException { - if (NATIVE_ACCESS.isEmpty()) { - return; - } - final NativeAccess nativeAccess = NATIVE_ACCESS.get(); - - long offset = 0; - for (MemorySegment seg : segments) { - advise(offset, seg.byteSize(), segment -> nativeAccess.madvise(segment, readAdvice)); - offset += seg.byteSize(); - } - } - void advise(long offset, long length, IOConsumer advice) throws IOException { if (NATIVE_ACCESS.isEmpty()) { return; diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java index bd2911fc50a..21c62090a69 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java @@ -18,7 +18,6 @@ package org.apache.lucene.tests.codecs.asserting; import java.io.IOException; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.KnnFieldVectorsWriter; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; @@ -106,19 +105,11 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat { static class AssertingKnnVectorsReader extends KnnVectorsReader implements HnswGraphProvider { final KnnVectorsReader delegate; final FieldInfos fis; - final boolean mergeInstance; - AtomicInteger mergeInstanceCount = new AtomicInteger(); - AtomicInteger finishMergeCount = new AtomicInteger(); AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) { - this(delegate, fis, false); - } - - AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) { assert delegate != null; this.delegate = delegate; this.fis = fis; - this.mergeInstance = mergeInstance; } @Override @@ -157,7 +148,6 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat { @Override public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - assert !mergeInstance; FieldInfo fi = fis.fieldInfo(field); assert fi != null && fi.getVectorDimension() > 0 @@ -168,7 +158,6 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat { @Override public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - assert !mergeInstance; FieldInfo fi = fis.fieldInfo(field); assert fi != null && fi.getVectorDimension() > 0 @@ -176,49 +165,10 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat { delegate.search(field, target, knnCollector, acceptDocs); } - @Override - public KnnVectorsReader getMergeInstance() { - assert !mergeInstance; - var mergeVectorsReader = delegate.getMergeInstance(); - assert mergeVectorsReader != null; - mergeInstanceCount.incrementAndGet(); - - final var parent = this; - return new AssertingKnnVectorsReader( - mergeVectorsReader, AssertingKnnVectorsReader.this.fis, true) { - @Override - public KnnVectorsReader getMergeInstance() { - assert false; // merging from a merge instance it not allowed - return null; - } - - @Override - public void finishMerge() throws IOException { - assert mergeInstance; - delegate.finishMerge(); - parent.finishMergeCount.incrementAndGet(); - } - - @Override - public void close() { - assert false; // closing the merge instance it not allowed - } - }; - } - - @Override - public void finishMerge() throws IOException { - assert mergeInstance; - delegate.finishMerge(); - finishMergeCount.incrementAndGet(); - } - @Override public void close() throws IOException { - assert !mergeInstance; delegate.close(); delegate.close(); - assert finishMergeCount.get() <= 0 || mergeInstanceCount.get() == finishMergeCount.get(); } @Override diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java index 6defa5eb8c7..35ecbddb2b8 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java @@ -16,11 +16,10 @@ */ package org.apache.lucene.tests.store; -import static com.carrotsearch.randomizedtesting.generators.RandomPicks.randomFrom; - import com.carrotsearch.randomizedtesting.RandomizedTest; import com.carrotsearch.randomizedtesting.generators.RandomBytes; import com.carrotsearch.randomizedtesting.generators.RandomNumbers; +import com.carrotsearch.randomizedtesting.generators.RandomPicks; import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; @@ -57,7 +56,6 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.RandomAccessInput; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.tests.mockfile.ExtrasFS; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TestUtil; @@ -644,7 +642,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { if (files.length > 0) { do { - String file = randomFrom(rnd, files); + String file = RandomPicks.randomFrom(rnd, files); try (IndexInput input = dir.openInput(file, newIOContext(random()))) { // Just open, nothing else. assert input != null; @@ -1559,38 +1557,6 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { doTestPrefetch(TestUtil.nextInt(random(), 1, 1024)); } - public void testUpdateReadAdvice() throws IOException { - try (Directory dir = getDirectory(createTempDir("testUpdateReadAdvice"))) { - final int totalLength = TestUtil.nextInt(random(), 16384, 65536); - byte[] arr = new byte[totalLength]; - random().nextBytes(arr); - try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) { - out.writeBytes(arr, arr.length); - } - - try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) { - IndexInput in = random().nextBoolean() ? orig.clone() : orig; - // Read advice updated at start - in.updateReadAdvice(randomFrom(random(), ReadAdvice.values())); - for (int i = 0; i < totalLength; i++) { - int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1); - in.seek(offset); - assertEquals(arr[offset], in.readByte()); - } - - // Updating readAdvice in the middle - for (int i = 0; i < 10_000; ++i) { - int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1); - in.seek(offset); - assertEquals(arr[offset], in.readByte()); - if (random().nextBoolean()) { - in.updateReadAdvice(randomFrom(random(), ReadAdvice.values())); - } - } - } - } - } - private void doTestPrefetch(int startOffset) throws IOException { try (Directory dir = getDirectory(createTempDir())) { final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536); diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java index 1dc5456d64e..3ee61189fc9 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java @@ -41,9 +41,9 @@ public class MockIndexInputWrapper extends FilterIndexInput { // Which MockIndexInputWrapper we were cloned from, or null if we are not a clone: private final MockIndexInputWrapper parent; + private final ReadAdvice readAdvice; private final boolean confined; private final Thread thread; - private ReadAdvice readAdvice; /** Sole constructor */ public MockIndexInputWrapper( @@ -192,14 +192,6 @@ public class MockIndexInputWrapper extends FilterIndexInput { return in.isLoaded(); } - @Override - public void updateReadAdvice(ReadAdvice readAdvice) throws IOException { - ensureOpen(); - ensureAccessible(); - this.readAdvice = readAdvice; - in.updateReadAdvice(readAdvice); - } - @Override public long length() { ensureOpen();