From 257d256defc47c446493ea99b841f58c543673c0 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Mon, 23 Aug 2021 15:54:26 -0400 Subject: [PATCH] LUCENE-10054 Make HnswGraph hierarchical (#250) Currently HNSW has only a single layer. This is the first part to make it multi-layered. To keep changes small, this PR only adds multiple layers in the HnswGraph class. TODO for following PRs: - modify graph construction and search algorithm for a hierarchical graph. - modify Lucene90HnswVectorsWriter and Lucene90HnswVectorsReader to write and read multiple layers\ --- .../lucene90/Lucene90HnswVectorsReader.java | 2 +- .../lucene90/Lucene90HnswVectorsWriter.java | 3 +- .../apache/lucene/index/KnnGraphValues.java | 7 ++- .../apache/lucene/util/hnsw/HnswGraph.java | 60 ++++++++++++------- .../lucene/util/hnsw/HnswGraphBuilder.java | 13 ++-- .../org/apache/lucene/index/TestKnnGraph.java | 4 +- .../lucene/util/hnsw/KnnGraphTester.java | 6 +- .../lucene/util/hnsw/TestHnswGraph.java | 18 +++--- 8 files changed, 67 insertions(+), 46 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java index 70e386d0b61..726bc4cb0ac 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java @@ -481,7 +481,7 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader { } @Override - public void seek(int targetOrd) throws IOException { + public void seek(int level, int targetOrd) throws IOException { // unsafe; no bounds checking dataIn.seek(entry.ordOffsets[targetOrd]); arcCount = dataIn.readInt(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsWriter.java index 0c2832bf5cf..f82278a21bd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsWriter.java @@ -208,11 +208,12 @@ public final class Lucene90HnswVectorsWriter extends KnnVectorsWriter { hnswGraphBuilder.setInfoStream(segmentWriteState.infoStream); HnswGraph graph = hnswGraphBuilder.build(vectorValues.randomAccess()); + // TODO: implement storing of hierarchical graph; for now stores only 0th level for (int ord = 0; ord < count; ord++) { // write graph offsets[ord] = graphData.getFilePointer() - graphDataOffset; - NeighborArray neighbors = graph.getNeighbors(ord); + NeighborArray neighbors = graph.getNeighbors(0, ord); int size = neighbors.size(); // Destructively modify; it's ok we are discarding it after this diff --git a/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java b/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java index f8f175acd04..4ff1e8654f1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/KnnGraphValues.java @@ -35,17 +35,18 @@ public abstract class KnnGraphValues { * Move the pointer to exactly {@code target}, the id of a node in the graph. After this method * returns, call {@link #nextNeighbor()} to return successive (ordered) connected node ordinals. * + * @param level level of the graph * @param target must be a valid node in the graph, ie. ≥ 0 and < {@link * VectorValues#size()}. */ - public abstract void seek(int target) throws IOException; + public abstract void seek(int level, int target) throws IOException; /** Returns the number of nodes in the graph */ public abstract int size(); /** * Iterates over the neighbor list. It is illegal to call this method after it returns - * NO_MORE_DOCS without calling {@link #seek(int)}, which resets the iterator. + * NO_MORE_DOCS without calling {@link #seek(int, int)}, which resets the iterator. * * @return a node ordinal in the graph, or NO_MORE_DOCS if the iteration is complete. */ @@ -61,7 +62,7 @@ public abstract class KnnGraphValues { } @Override - public void seek(int target) {} + public void seek(int level, int target) {} @Override public int size() { diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java index d1f0420d3cf..8f6a8f046a3 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java @@ -40,10 +40,10 @@ import org.apache.lucene.util.SparseFixedBitSet; *

Hyperparameters

* *