LUCENE-10309: Minimum KnnVector codec support in Luke (#535)

This commit is contained in:
Tomoko Uchida 2021-12-12 15:31:18 +09:00 committed by GitHub
parent e111182e12
commit 2f634b0d95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 3 deletions

View File

@ -35,6 +35,7 @@ import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import javax.swing.BorderFactory;
@ -154,7 +155,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
this.tableHeaderRenderer =
new HelpHeaderRenderer(
"About Flags",
"Format: IdfpoNPSB#txxVDtxxxxTx/x",
"Format: IdfpoNPSB#txxVDtxxxxTx/xKxxxx/xxx",
createFlagsHelpDialog(),
helpDialogFactory);
@ -173,7 +174,8 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
"#txx - numeric stored values(type, precision)",
"V - term vectors",
"Dtxxxxx - doc values(type)",
"Tx/x - point values(num bytes/dimension)"
"Tx/x - point values(num bytes/dimension)",
"Kxxxx/xxx - knn vector values(dimension/similarity)"
};
JList<String> list = new JList<>(values);
return new JScrollPane(list);
@ -1049,7 +1051,7 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
enum Column implements TableColumnInfo {
FIELD("Field", 0, String.class, 150),
FLAGS("Flags", 1, String.class, 200),
FLAGS("Flags", 1, String.class, 220),
NORM("Norm", 2, Long.class, 80),
VALUE("Value", 3, String.class, 500);
@ -1227,6 +1229,27 @@ public final class DocumentsPanelProvider implements DocumentsTabOperator {
sb.append("/");
sb.append(f.getPointDimensionCount());
}
// knn vector values
if (f.getVectorDimension() == 0) {
sb.append("---------");
} else {
sb.append("K");
sb.append(String.format(Locale.ENGLISH, "%04d", f.getVectorDimension()));
sb.append("/");
switch (f.getVectorSimilarity()) {
case COSINE:
sb.append("cos");
break;
case DOT_PRODUCT:
sb.append("dot");
break;
case EUCLIDEAN:
sb.append("euc");
break;
default:
sb.append("???");
}
}
return sb.toString();
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
/** Holder for a document field's information and data. */
@ -54,6 +55,10 @@ public final class DocumentField {
private int pointDimensionCount;
private int pointNumBytes;
// knn vector values
private int vectorDimension;
private VectorSimilarityFunction vectorSimilarity;
static DocumentField of(FieldInfo finfo, IndexReader reader, int docId) throws IOException {
return of(finfo, null, reader, docId);
}
@ -84,6 +89,9 @@ public final class DocumentField {
dfield.pointDimensionCount = finfo.getPointDimensionCount();
dfield.pointNumBytes = finfo.getPointNumBytes();
dfield.vectorDimension = finfo.getVectorDimension();
dfield.vectorSimilarity = finfo.getVectorSimilarityFunction();
if (field != null) {
dfield.isStored = field.fieldType().stored();
dfield.stringValue = field.stringValue();
@ -148,6 +156,14 @@ public final class DocumentField {
return pointNumBytes;
}
public int getVectorDimension() {
return vectorDimension;
}
public VectorSimilarityFunction getVectorSimilarity() {
return vectorSimilarity;
}
@Override
public String toString() {
return "DocumentField{"
@ -164,6 +180,8 @@ public final class DocumentField {
+ dvType
+ ", pointDimensionCount="
+ pointDimensionCount
+ ", vectorDimension="
+ vectorDimension
+ '}';
}

View File

@ -68,6 +68,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f1.getDvType());
assertEquals(0, f1.getPointDimensionCount());
assertEquals(0, f1.getPointNumBytes());
assertEquals(0, f1.getVectorDimension());
DocumentField f2 = fields.get(1);
assertEquals("author", f2.getName());
@ -83,6 +84,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f2.getDvType());
assertEquals(0, f2.getPointDimensionCount());
assertEquals(0, f2.getPointNumBytes());
assertEquals(0, f2.getVectorDimension());
DocumentField f3 = fields.get(2);
assertEquals("text", f3.getName());
@ -98,6 +100,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.NONE, f3.getDvType());
assertEquals(0, f3.getPointDimensionCount());
assertEquals(0, f3.getPointNumBytes());
assertEquals(0, f3.getVectorDimension());
DocumentField f4 = fields.get(3);
assertEquals("subject", f4.getName());
@ -113,6 +116,7 @@ public class TestDocumentsImpl extends DocumentsTestBase {
assertEquals(DocValuesType.SORTED_SET, f4.getDvType());
assertEquals(0, f4.getPointDimensionCount());
assertEquals(0, f4.getPointNumBytes());
assertEquals(0, f4.getVectorDimension());
DocumentField f5 = fields.get(4);
assertEquals("downloads", f5.getName());