From 2f634b0d95ec24283999c61ec700d9e96a854633 Mon Sep 17 00:00:00 2001 From: Tomoko Uchida Date: Sun, 12 Dec 2021 15:31:18 +0900 Subject: [PATCH] LUCENE-10309: Minimum KnnVector codec support in Luke (#535) --- .../components/DocumentsPanelProvider.java | 29 +++++++++++++++++-- .../luke/models/documents/DocumentField.java | 18 ++++++++++++ .../models/documents/TestDocumentsImpl.java | 4 +++ 3 files changed, 48 insertions(+), 3 deletions(-) diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java index d7581f67cad..613cca415eb 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/DocumentsPanelProvider.java @@ -35,6 +35,7 @@ import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; import java.util.List; +import java.util.Locale; import java.util.Objects; import java.util.Optional; import javax.swing.BorderFactory; @@ -154,7 +155,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator { this.tableHeaderRenderer = new HelpHeaderRenderer( "About Flags", - "Format: IdfpoNPSB#txxVDtxxxxTx/x", + "Format: IdfpoNPSB#txxVDtxxxxTx/xKxxxx/xxx", createFlagsHelpDialog(), helpDialogFactory); @@ -173,7 +174,8 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator { "#txx - numeric stored values(type, precision)", "V - term vectors", "Dtxxxxx - doc values(type)", - "Tx/x - point values(num bytes/dimension)" + "Tx/x - point values(num bytes/dimension)", + "Kxxxx/xxx - knn vector values(dimension/similarity)" }; JList list = new JList<>(values); return new JScrollPane(list); @@ -1049,7 +1051,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator { enum Column implements TableColumnInfo { FIELD("Field", 0, String.class, 150), - FLAGS("Flags", 1, String.class, 200), + FLAGS("Flags", 1, String.class, 220), NORM("Norm", 2, Long.class, 80), VALUE("Value", 3, String.class, 500); @@ -1227,6 +1229,27 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator { sb.append("/"); sb.append(f.getPointDimensionCount()); } + // knn vector values + if (f.getVectorDimension() == 0) { + sb.append("---------"); + } else { + sb.append("K"); + sb.append(String.format(Locale.ENGLISH, "%04d", f.getVectorDimension())); + sb.append("/"); + switch (f.getVectorSimilarity()) { + case COSINE: + sb.append("cos"); + break; + case DOT_PRODUCT: + sb.append("dot"); + break; + case EUCLIDEAN: + sb.append("euc"); + break; + default: + sb.append("???"); + } + } return sb.toString(); } diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java index a27c8dba58f..460d1422649 100644 --- a/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java +++ b/lucene/luke/src/java/org/apache/lucene/luke/models/documents/DocumentField.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.util.BytesRef; /** Holder for a document field's information and data. */ @@ -54,6 +55,10 @@ public final class DocumentField { private int pointDimensionCount; private int pointNumBytes; + // knn vector values + private int vectorDimension; + private VectorSimilarityFunction vectorSimilarity; + static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException { return of(finfo, null, reader, docId); } @@ -84,6 +89,9 @@ public final class DocumentField { dfield.pointDimensionCount = finfo.getPointDimensionCount(); dfield.pointNumBytes = finfo.getPointNumBytes(); + dfield.vectorDimension = finfo.getVectorDimension(); + dfield.vectorSimilarity = finfo.getVectorSimilarityFunction(); + if (field != null) { dfield.isStored = field.fieldType().stored(); dfield.stringValue = field.stringValue(); @@ -148,6 +156,14 @@ public final class DocumentField { return pointNumBytes; } + public int getVectorDimension() { + return vectorDimension; + } + + public VectorSimilarityFunction getVectorSimilarity() { + return vectorSimilarity; + } + @Override public String toString() { return "DocumentField{" @@ -164,6 +180,8 @@ public final class DocumentField { + dvType + ", pointDimensionCount=" + pointDimensionCount + + ", vectorDimension=" + + vectorDimension + '}'; } diff --git a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java index ddddbef8fc0..8162b571ca6 100644 --- a/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java +++ b/lucene/luke/src/test/org/apache/lucene/luke/models/documents/TestDocumentsImpl.java @@ -68,6 +68,7 @@ public class TestDocumentsImpl extends DocumentsTestBase { assertEquals(DocValuesType.NONE, f1.getDvType()); assertEquals(0, f1.getPointDimensionCount()); assertEquals(0, f1.getPointNumBytes()); + assertEquals(0, f1.getVectorDimension()); DocumentField f2 = fields.get(1); assertEquals("author", f2.getName()); @@ -83,6 +84,7 @@ public class TestDocumentsImpl extends DocumentsTestBase { assertEquals(DocValuesType.NONE, f2.getDvType()); assertEquals(0, f2.getPointDimensionCount()); assertEquals(0, f2.getPointNumBytes()); + assertEquals(0, f2.getVectorDimension()); DocumentField f3 = fields.get(2); assertEquals("text", f3.getName()); @@ -98,6 +100,7 @@ public class TestDocumentsImpl extends DocumentsTestBase { assertEquals(DocValuesType.NONE, f3.getDvType()); assertEquals(0, f3.getPointDimensionCount()); assertEquals(0, f3.getPointNumBytes()); + assertEquals(0, f3.getVectorDimension()); DocumentField f4 = fields.get(3); assertEquals("subject", f4.getName()); @@ -113,6 +116,7 @@ public class TestDocumentsImpl extends DocumentsTestBase { assertEquals(DocValuesType.SORTED_SET, f4.getDvType()); assertEquals(0, f4.getPointDimensionCount()); assertEquals(0, f4.getPointNumBytes()); + assertEquals(0, f4.getVectorDimension()); DocumentField f5 = fields.get(4); assertEquals("downloads", f5.getName());