LUCENE-10309: Minimum KnnVector codec support in Luke (#535)

This commit is contained in:
Tomoko Uchida 2021-12-12 15:31:18 +09:00 committed by GitHub
parent e111182e12
commit 2f634b0d95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 3 deletions

View File

@ -35,6 +35,7 @@ import java.io.IOException;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import javax.swing.BorderFactory; import javax.swing.BorderFactory;
@ -154,7 +155,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
this.tableHeaderRenderer = this.tableHeaderRenderer =
new HelpHeaderRenderer( new HelpHeaderRenderer(
"About Flags", "About Flags",
"Format: IdfpoNPSB#txxVDtxxxxTx/x", "Format: IdfpoNPSB#txxVDtxxxxTx/xKxxxx/xxx",
createFlagsHelpDialog(), createFlagsHelpDialog(),
helpDialogFactory); helpDialogFactory);
@ -173,7 +174,8 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
"#txx - numeric stored values(type, precision)", "#txx - numeric stored values(type, precision)",
"V - term vectors", "V - term vectors",
"Dtxxxxx - doc values(type)", "Dtxxxxx - doc values(type)",
"Tx/x - point values(num bytes/dimension)" "Tx/x - point values(num bytes/dimension)",
"Kxxxx/xxx - knn vector values(dimension/similarity)"
}; };
JList<String> list = new JList<>(values); JList<String> list = new JList<>(values);
return new JScrollPane(list); return new JScrollPane(list);
@ -1049,7 +1051,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
enum Column implements TableColumnInfo { enum Column implements TableColumnInfo {
FIELD("Field", 0, String.class, 150), FIELD("Field", 0, String.class, 150),
FLAGS("Flags", 1, String.class, 200), FLAGS("Flags", 1, String.class, 220),
NORM("Norm", 2, Long.class, 80), NORM("Norm", 2, Long.class, 80),
VALUE("Value", 3, String.class, 500); VALUE("Value", 3, String.class, 500);
@ -1227,6 +1229,27 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
sb.append("/"); sb.append("/");
sb.append(f.getPointDimensionCount()); sb.append(f.getPointDimensionCount());
} }
// knn vector values
if (f.getVectorDimension() == 0) {
sb.append("---------");
} else {
sb.append("K");
sb.append(String.format(Locale.ENGLISH, "%04d", f.getVectorDimension()));
sb.append("/");
switch (f.getVectorSimilarity()) {
case COSINE:
sb.append("cos");
break;
case DOT_PRODUCT:
sb.append("dot");
break;
case EUCLIDEAN:
sb.append("euc");
break;
default:
sb.append("???");
}
}
return sb.toString(); return sb.toString();
} }

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
/** Holder for a document field's information and data. */ /** Holder for a document field's information and data. */
@ -54,6 +55,10 @@ public final class DocumentField {
private int pointDimensionCount; private int pointDimensionCount;
private int pointNumBytes; private int pointNumBytes;
// knn vector values
private int vectorDimension;
private VectorSimilarityFunction vectorSimilarity;
static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException { static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException {
return of(finfo, null, reader, docId); return of(finfo, null, reader, docId);
} }
@ -84,6 +89,9 @@ public final class DocumentField {
dfield.pointDimensionCount = finfo.getPointDimensionCount(); dfield.pointDimensionCount = finfo.getPointDimensionCount();
dfield.pointNumBytes = finfo.getPointNumBytes(); dfield.pointNumBytes = finfo.getPointNumBytes();
dfield.vectorDimension = finfo.getVectorDimension();
dfield.vectorSimilarity = finfo.getVectorSimilarityFunction();
if (field != null) { if (field != null) {
dfield.isStored = field.fieldType().stored(); dfield.isStored = field.fieldType().stored();
dfield.stringValue = field.stringValue(); dfield.stringValue = field.stringValue();
@ -148,6 +156,14 @@ public final class DocumentField {
return pointNumBytes; return pointNumBytes;
} }
public int getVectorDimension() {
return vectorDimension;
}
public VectorSimilarityFunction getVectorSimilarity() {
return vectorSimilarity;
}
@Override @Override
public String toString() { public String toString() {
return "DocumentField{" return "DocumentField{"
@ -164,6 +180,8 @@ public final class DocumentField {
+ dvType + dvType
+ ", pointDimensionCount=" + ", pointDimensionCount="
+ pointDimensionCount + pointDimensionCount
+ ", vectorDimension="
+ vectorDimension
+ '}'; + '}';
} }

View File

@ -68,6 +68,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f1.getDvType()); assertEquals(DocValuesType.NONE, f1.getDvType());
assertEquals(0, f1.getPointDimensionCount()); assertEquals(0, f1.getPointDimensionCount());
assertEquals(0, f1.getPointNumBytes()); assertEquals(0, f1.getPointNumBytes());
assertEquals(0, f1.getVectorDimension());
DocumentField f2 = fields.get(1); DocumentField f2 = fields.get(1);
assertEquals("author", f2.getName()); assertEquals("author", f2.getName());
@ -83,6 +84,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f2.getDvType()); assertEquals(DocValuesType.NONE, f2.getDvType());
assertEquals(0, f2.getPointDimensionCount()); assertEquals(0, f2.getPointDimensionCount());
assertEquals(0, f2.getPointNumBytes()); assertEquals(0, f2.getPointNumBytes());
assertEquals(0, f2.getVectorDimension());
DocumentField f3 = fields.get(2); DocumentField f3 = fields.get(2);
assertEquals("text", f3.getName()); assertEquals("text", f3.getName());
@ -98,6 +100,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f3.getDvType()); assertEquals(DocValuesType.NONE, f3.getDvType());
assertEquals(0, f3.getPointDimensionCount()); assertEquals(0, f3.getPointDimensionCount());
assertEquals(0, f3.getPointNumBytes()); assertEquals(0, f3.getPointNumBytes());
assertEquals(0, f3.getVectorDimension());
DocumentField f4 = fields.get(3); DocumentField f4 = fields.get(3);
assertEquals("subject", f4.getName()); assertEquals("subject", f4.getName());
@ -113,6 +116,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.SORTED_SET, f4.getDvType()); assertEquals(DocValuesType.SORTED_SET, f4.getDvType());
assertEquals(0, f4.getPointDimensionCount()); assertEquals(0, f4.getPointDimensionCount());
assertEquals(0, f4.getPointNumBytes()); assertEquals(0, f4.getPointNumBytes());
assertEquals(0, f4.getVectorDimension());
DocumentField f5 = fields.get(4); DocumentField f5 = fields.get(4);
assertEquals("downloads", f5.getName()); assertEquals("downloads", f5.getName());