params = new ArrayList<>();
- params.add(new Object[] {Version.LUCENE_9_0_0, createPattern(INDEX_NAME, SUFFIX)});
+ params.add(new Object[] {Version.fromBits(9, 0, 0), createPattern(INDEX_NAME, SUFFIX)});
return params;
}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/dvupdates.10.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/dvupdates.10.0.0.zip
new file mode 100644
index 00000000000..db5d5260bcc
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/dvupdates.10.0.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/empty.10.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/empty.10.0.0.zip
new file mode 100644
index 00000000000..d906538645b
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/empty.10.0.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-cfs.zip
new file mode 100644
index 00000000000..73c79500c85
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-cfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-nocfs.zip
new file mode 100644
index 00000000000..d8b8216c639
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.10.0.0-nocfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.10.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.10.0.0.zip
new file mode 100644
index 00000000000..99a28f7631c
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int7_hnsw.10.0.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.10.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.10.0.zip
deleted file mode 100644
index 2799f04b65a..00000000000
Binary files a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.10.0.zip and /dev/null differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.0.zip
deleted file mode 100644
index 5fd94783427..00000000000
Binary files a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.0.zip and /dev/null differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.1.zip
deleted file mode 100644
index c4bb86b5f1b..00000000000
Binary files a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/int8_hnsw.9.11.1.zip and /dev/null differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/moreterms.10.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/moreterms.10.0.0.zip
new file mode 100644
index 00000000000..6ee086756cc
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/moreterms.10.0.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.10.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.10.0.0.zip
new file mode 100644
index 00000000000..e0896256896
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.10.0.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.12895.9.8.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.12895.9.8.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.12895.9.8.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.12895.9.8.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-cfs.zip
new file mode 100644
index 00000000000..bb3e4f01753
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-cfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-nocfs.zip
new file mode 100644
index 00000000000..a19fa717096
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.8.11.4-nocfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.0.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.0.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.0.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.0.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.0.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.0.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.0.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.0.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.1.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.1.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.1.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.1.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.1.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.1.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.1.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.1.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.10.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.10.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.10.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.10.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.10.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.1-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.1-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.1-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.1-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.1-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.1-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.11.1-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.11.1-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-cfs.zip
new file mode 100644
index 00000000000..6fc0118f222
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-cfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-nocfs.zip
new file mode 100644
index 00000000000..56b5c1325c8
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.12.0-nocfs.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.2.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.2.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.2.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.2.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.2.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.2.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.2.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.2.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.3.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.3.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.3.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.3.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.3.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.3.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.3.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.3.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.1-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.1-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.1-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.1-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.1-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.1-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.1-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.1-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.2-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.2-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.2-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.2-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.2-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.2-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.4.2-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.4.2-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.5.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.5.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.5.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.5.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.5.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.5.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.5.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.5.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.6.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.6.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.6.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.6.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.6.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.6.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.6.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.6.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.7.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.7.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.7.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.7.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.7.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.7.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.7.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.7.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.8.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.8.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.8.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.8.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.8.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.8.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.8.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.8.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.0-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.0-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.0-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.0-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.0-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.0-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.0-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.0-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.1-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.1-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.1-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.1-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.1-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.1-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.1-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.1-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.2-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.2-cfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.2-cfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.2-cfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.2-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.2-nocfs.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/index.9.9.2-nocfs.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.9.9.2-nocfs.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/dvupdates.9.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.dvupdates.9.0.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/dvupdates.9.0.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.dvupdates.9.0.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/empty.9.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.empty.9.0.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/empty.9.0.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.empty.9.0.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.10.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.10.0.zip
new file mode 100644
index 00000000000..0425b451fa0
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.10.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.0.zip
new file mode 100644
index 00000000000..9dd53d92a99
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.1.zip
new file mode 100644
index 00000000000..29aef1b909f
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.11.1.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.12.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.12.0.zip
new file mode 100644
index 00000000000..bfe07de8143
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.int7_hnsw.9.12.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/moreterms.9.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.moreterms.9.0.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/moreterms.9.0.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.moreterms.9.0.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.8.11.4.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.8.11.4.zip
new file mode 100644
index 00000000000..9736c6aca98
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.8.11.4.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.0.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.0.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.0.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.0.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.1.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.1.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.1.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.1.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.10.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.10.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.10.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.10.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.11.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.11.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.11.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.11.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.11.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.11.1.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.11.1.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.11.1.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.12.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.12.0.zip
new file mode 100644
index 00000000000..9ad1590e3e4
Binary files /dev/null and b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.12.0.zip differ
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.2.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.2.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.2.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.2.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.3.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.3.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.3.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.3.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.1.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.1.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.1.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.2.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.2.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.4.2.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.4.2.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.5.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.5.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.5.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.5.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.6.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.6.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.6.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.6.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.7.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.7.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.7.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.7.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.8.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.8.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.8.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.8.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.0.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.0.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.0.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.0.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.1.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.1.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.1.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.1.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.2.zip b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.2.zip
similarity index 100%
rename from lucene/backward-codecs/src/test/org/apache/lucene/backward_index/sorted.9.9.2.zip
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported.sorted.9.9.2.zip
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported_versions.txt b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported_versions.txt
index 8f298d3ae05..521f12c2804 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported_versions.txt
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/unsupported_versions.txt
@@ -119,4 +119,23 @@
8.11.0
8.11.1
8.11.2
-8.11.3
\ No newline at end of file
+8.11.3
+8.11.4
+9.0.0
+9.1.0
+9.2.0
+9.3.0
+9.4.0
+9.4.1
+9.4.2
+9.5.0
+9.6.0
+9.7.0
+9.8.0
+9.9.0
+9.9.1
+9.9.2
+9.10.0
+9.11.0
+9.11.1
+9.12.0
\ No newline at end of file
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
index 4572b6fadfe..7529186caca 100644
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/versions.txt
@@ -1,29 +1,3 @@
-8.0.0
-8.1.0
-8.1.1
-8.2.0
-8.3.0
-8.3.1
-8.4.0
-8.4.1
-8.5.0
-8.5.1
-8.5.2
-8.6.0
-8.6.1
-8.6.2
-8.6.3
-8.7.0
-8.8.0
-8.8.1
-8.8.2
-8.9.0
-8.10.0
-8.10.1
-8.11.0
-8.11.1
-8.11.2
-8.11.3
9.0.0
9.1.0
9.2.0
@@ -41,3 +15,5 @@
9.10.0
9.11.0
9.11.1
+9.12.0
+10.0.0
\ No newline at end of file
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java
index 0df0d7ecf50..48b95570694 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java
@@ -186,7 +186,7 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
- byteBufferGVIntIn.readGroupVInts(values, size);
+ GroupVIntUtil.readGroupVInts(byteBufferGVIntIn, values, size);
bh.consume(values);
}
@@ -209,14 +209,14 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException {
byteArrayGVIntIn.rewind();
- byteArrayGVIntIn.readGroupVInts(values, size);
+ GroupVIntUtil.readGroupVInts(byteArrayGVIntIn, values, size);
bh.consume(values);
}
@Benchmark
public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
nioGVIntIn.seek(0);
- nioGVIntIn.readGroupVInts(values, size);
+ GroupVIntUtil.readGroupVInts(nioGVIntIn, values, size);
bh.consume(values);
}
@@ -230,7 +230,7 @@ public class GroupVIntBenchmark {
@Benchmark
public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException {
byteBuffersGVIntIn.seek(0);
- byteBuffersGVIntIn.readGroupVInts(values, size);
+ GroupVIntUtil.readGroupVInts(byteBuffersGVIntIn, values, size);
bh.consume(values);
}
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java
index c4d3040f283..0a4da1f4886 100644
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/VectorScorerBenchmark.java
@@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -32,7 +33,6 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.openjdk.jmh.annotations.*;
@@ -55,7 +55,7 @@ public class VectorScorerBenchmark {
Directory dir;
IndexInput in;
- RandomAccessVectorValues vectorValues;
+ KnnVectorValues vectorValues;
byte[] vec1, vec2;
RandomVectorScorer scorer;
@@ -95,7 +95,7 @@ public class VectorScorerBenchmark {
return scorer.score(1);
}
- static RandomAccessVectorValues vectorValues(
+ static KnnVectorValues vectorValues(
int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
return new OffHeapByteVectorValues.DenseOffHeapVectorValues(
dims, size, in.slice("test", 0, in.length()), dims, new ThrowingFlatVectorScorer(), sim);
@@ -105,23 +105,19 @@ public class VectorScorerBenchmark {
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues) {
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues) {
throw new UnsupportedOperationException();
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target) {
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target) {
throw new UnsupportedOperationException();
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target) {
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target) {
throw new UnsupportedOperationException();
}
}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/FlatBitVectorsScorer.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/FlatBitVectorsScorer.java
index b8ff37c2654..8ffcc1c8d50 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/FlatBitVectorsScorer.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bitvectors/FlatBitVectorsScorer.java
@@ -19,10 +19,11 @@ package org.apache.lucene.codecs.bitvectors;
import java.io.IOException;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.VectorUtil;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
@@ -30,45 +31,39 @@ import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
public class FlatBitVectorsScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
throws IOException {
- assert vectorValues instanceof RandomAccessVectorValues.Bytes;
- if (vectorValues instanceof RandomAccessVectorValues.Bytes byteVectorValues) {
+ assert vectorValues instanceof ByteVectorValues;
+ if (vectorValues instanceof ByteVectorValues byteVectorValues) {
return new BitRandomVectorScorerSupplier(byteVectorValues);
}
- throw new IllegalArgumentException(
- "vectorValues must be an instance of RandomAccessVectorValues.Bytes");
+ throw new IllegalArgumentException("vectorValues must be an instance of ByteVectorValues");
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
throws IOException {
throw new IllegalArgumentException("bit vectors do not support float[] targets");
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
throws IOException {
- assert vectorValues instanceof RandomAccessVectorValues.Bytes;
- if (vectorValues instanceof RandomAccessVectorValues.Bytes byteVectorValues) {
+ assert vectorValues instanceof ByteVectorValues;
+ if (vectorValues instanceof ByteVectorValues byteVectorValues) {
return new BitRandomVectorScorer(byteVectorValues, target);
}
- throw new IllegalArgumentException(
- "vectorValues must be an instance of RandomAccessVectorValues.Bytes");
+ throw new IllegalArgumentException("vectorValues must be an instance of ByteVectorValues");
}
static class BitRandomVectorScorer implements RandomVectorScorer {
- private final RandomAccessVectorValues.Bytes vectorValues;
+ private final ByteVectorValues vectorValues;
private final int bitDimensions;
private final byte[] query;
- BitRandomVectorScorer(RandomAccessVectorValues.Bytes vectorValues, byte[] query) {
+ BitRandomVectorScorer(ByteVectorValues vectorValues, byte[] query) {
this.query = query;
this.bitDimensions = vectorValues.dimension() * Byte.SIZE;
this.vectorValues = vectorValues;
@@ -97,12 +92,11 @@ public class FlatBitVectorsScorer implements FlatVectorsScorer {
}
static class BitRandomVectorScorerSupplier implements RandomVectorScorerSupplier {
- protected final RandomAccessVectorValues.Bytes vectorValues;
- protected final RandomAccessVectorValues.Bytes vectorValues1;
- protected final RandomAccessVectorValues.Bytes vectorValues2;
+ protected final ByteVectorValues vectorValues;
+ protected final ByteVectorValues vectorValues1;
+ protected final ByteVectorValues vectorValues2;
- public BitRandomVectorScorerSupplier(RandomAccessVectorValues.Bytes vectorValues)
- throws IOException {
+ public BitRandomVectorScorerSupplier(ByteVectorValues vectorValues) throws IOException {
this.vectorValues = vectorValues;
this.vectorValues1 = vectorValues.copy();
this.vectorValues2 = vectorValues.copy();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
index 1daa1761fd8..2a0472fa028 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
@@ -54,8 +54,9 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
*
* A choice of {@link BloomFilterFactory} can be passed to tailor Bloom Filter settings on a
* per-field basis. The default configuration is {@link DefaultBloomFilterFactory} which allocates a
- * ~8mb bitset and hashes values using {@link MurmurHash64}. This should be suitable for most
- * purposes.
+ * ~8mb bitset and hashes values using {@link
+ * org.apache.lucene.util.StringHelper#murmurhash3_x64_128(BytesRef)}. This should be suitable for
+ * most purposes.
*
*
The format of the blm file is as follows:
*
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java
index f1d2dee65c7..7d6fd1b64b5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java
@@ -24,6 +24,7 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
/**
* A class used to represent a set of many, potentially large, values (e.g. many long strings such
@@ -53,7 +54,6 @@ public class FuzzySet implements Accountable {
NO
};
- private HashFunction hashFunction;
private FixedBitSet filter;
private int bloomSize;
private final int hashCount;
@@ -138,7 +138,6 @@ public class FuzzySet implements Accountable {
super();
this.filter = filter;
this.bloomSize = bloomSize;
- this.hashFunction = MurmurHash64.INSTANCE;
this.hashCount = hashCount;
}
@@ -150,11 +149,12 @@ public class FuzzySet implements Accountable {
* @return NO or MAYBE
*/
public ContainsResult contains(BytesRef value) {
- long hash = hashFunction.hash(value);
- int msb = (int) (hash >>> Integer.SIZE);
- int lsb = (int) hash;
+ long[] hash = StringHelper.murmurhash3_x64_128(value);
+
+ long msb = hash[0];
+ long lsb = hash[1];
for (int i = 0; i < hashCount; i++) {
- int bloomPos = (lsb + i * msb);
+ int bloomPos = ((int) (lsb + i * msb)) & bloomSize;
if (!mayContainValue(bloomPos)) {
return ContainsResult.NO;
}
@@ -216,15 +216,14 @@ public class FuzzySet implements Accountable {
* is modulo n'd where n is the chosen size of the internal bitset.
*
* @param value the key value to be hashed
- * @throws IOException If there is a low-level I/O error
*/
- public void addValue(BytesRef value) throws IOException {
- long hash = hashFunction.hash(value);
- int msb = (int) (hash >>> Integer.SIZE);
- int lsb = (int) hash;
+ public void addValue(BytesRef value) {
+ long[] hash = StringHelper.murmurhash3_x64_128(value);
+ long msb = hash[0];
+ long lsb = hash[1];
for (int i = 0; i < hashCount; i++) {
// Bitmasking using bloomSize is effectively a modulo operation.
- int bloomPos = (lsb + i * msb) & bloomSize;
+ int bloomPos = ((int) (lsb + i * msb)) & bloomSize;
filter.set(bloomPos);
}
}
@@ -302,9 +301,7 @@ public class FuzzySet implements Accountable {
@Override
public String toString() {
return getClass().getSimpleName()
- + "(hash="
- + hashFunction
- + ", k="
+ + "(k="
+ hashCount
+ ", bits="
+ filter.cardinality()
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/HashFunction.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/HashFunction.java
deleted file mode 100644
index eac514a7bb8..00000000000
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/HashFunction.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.bloom;
-
-import org.apache.lucene.util.BytesRef;
-
-/**
- * Base class for hashing functions that can be referred to by name. Subclasses are expected to
- * provide threadsafe implementations of the hash function on the range of bytes referenced in the
- * provided {@link BytesRef}
- *
- * @lucene.experimental
- */
-public abstract class HashFunction {
-
- /**
- * Hashes the contents of the referenced bytes
- *
- * @param bytes the data to be hashed
- * @return the hash of the bytes referenced by bytes.offset and length bytes.length
- */
- public abstract long hash(BytesRef bytes);
-}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/MurmurHash64.java b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/MurmurHash64.java
deleted file mode 100644
index 1d189773143..00000000000
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/MurmurHash64.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.bloom;
-
-import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.BytesRef;
-
-/**
- * This is a very fast, non-cryptographic hash suitable for general hash-based lookup. See
- * http://murmurhash.googlepages.com/ for more details.
- *
- *
The code from Apache Commons was adapted in the form here to work with BytesRefs with offsets
- * and lengths rather than raw byte arrays.
- */
-public class MurmurHash64 extends HashFunction {
- private static final long M64 = 0xc6a4a7935bd1e995L;
- private static final int R64 = 47;
- public static final HashFunction INSTANCE = new MurmurHash64();
-
- /**
- * Generates a 64-bit hash from byte array of the given length and seed.
- *
- * @param data The input byte array
- * @param seed The initial seed value
- * @param length The length of the array
- * @return The 64-bit hash of the given array
- */
- public static long hash64(byte[] data, int seed, int offset, int length) {
- long h = (seed & 0xffffffffL) ^ (length * M64);
-
- final int nblocks = length >> 3;
-
- // body
- for (int i = 0; i < nblocks; i++) {
-
- long k = (long) BitUtil.VH_LE_LONG.get(data, offset);
- k *= M64;
- k ^= k >>> R64;
- k *= M64;
-
- h ^= k;
- h *= M64;
-
- offset += Long.BYTES;
- }
-
- int remaining = length & 0x07;
- if (0 < remaining) {
- for (int i = 0; i < remaining; i++) {
- h ^= ((long) data[offset + i] & 0xff) << (Byte.SIZE * i);
- }
- h *= M64;
- }
-
- h ^= h >>> R64;
- h *= M64;
- h ^= h >>> R64;
-
- return h;
- }
-
- @Override
- public final long hash(BytesRef br) {
- return hash64(br.bytes, 0xe17a1465, br.offset, br.length);
- }
-
- @Override
- public String toString() {
- return getClass().getSimpleName();
- }
-}
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java
index bfb5888a56b..8cb48e36919 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextCompoundFormat.java
@@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.StringHelper;
@@ -52,10 +53,10 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
public SimpleTextCompoundFormat() {}
@Override
- public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
- throws IOException {
+ public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si) throws IOException {
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
- final IndexInput in = dir.openInput(dataFile, context);
+ final IndexInput in =
+ dir.openInput(dataFile, IOContext.DEFAULT.withReadAdvice(ReadAdvice.NORMAL));
BytesRefBuilder scratch = new BytesRefBuilder();
@@ -135,7 +136,11 @@ public class SimpleTextCompoundFormat extends CompoundFormat {
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
int index = getIndex(name);
- return in.slice(name, startOffsets[index], endOffsets[index] - startOffsets[index]);
+ return in.slice(
+ name,
+ startOffsets[index],
+ endOffsets[index] - startOffsets[index],
+ context.readAdvice());
}
@Override
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
index faba629715b..0a8c4836321 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
@@ -192,8 +192,8 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
}
FieldInfo info = readState.fieldInfos.fieldInfo(field);
VectorSimilarityFunction vectorSimilarity = info.getVectorSimilarityFunction();
- int doc;
- while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int ord = 0; ord < values.size(); ord++) {
+ int doc = values.ordToDoc(ord);
if (acceptDocs != null && acceptDocs.get(doc) == false) {
continue;
}
@@ -202,7 +202,7 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
break;
}
- float[] vector = values.vectorValue();
+ float[] vector = values.vectorValue(ord);
float score = vectorSimilarity.compare(vector, target);
knnCollector.collect(doc, score);
knnCollector.incVisitedCount(1);
@@ -223,8 +223,8 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
FieldInfo info = readState.fieldInfos.fieldInfo(field);
VectorSimilarityFunction vectorSimilarity = info.getVectorSimilarityFunction();
- int doc;
- while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ for (int ord = 0; ord < values.size(); ord++) {
+ int doc = values.ordToDoc(ord);
if (acceptDocs != null && acceptDocs.get(doc) == false) {
continue;
}
@@ -233,7 +233,7 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
break;
}
- byte[] vector = values.vectorValue();
+ byte[] vector = values.vectorValue(ord);
float score = vectorSimilarity.compare(vector, target);
knnCollector.collect(doc, score);
knnCollector.incVisitedCount(1);
@@ -327,35 +327,18 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
}
@Override
- public float[] vectorValue() {
- return values[curOrd];
+ public float[] vectorValue(int ord) {
+ return values[ord];
}
@Override
- public int docID() {
- if (curOrd == -1) {
- return -1;
- } else if (curOrd >= entry.size()) {
- // when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
- // immediately afterward should also return NO_MORE_DOCS
- // this is needed for TestSimpleTextKnnVectorsFormat.testAdvance test case
- return NO_MORE_DOCS;
- }
-
- return entry.ordToDoc[curOrd];
+ public int ordToDoc(int ord) {
+ return entry.ordToDoc[ord];
}
@Override
- public int nextDoc() throws IOException {
- if (++curOrd < entry.size()) {
- return docID();
- }
- return NO_MORE_DOCS;
- }
-
- @Override
- public int advance(int target) throws IOException {
- return slowAdvance(target);
+ public DocIndexIterator iterator() {
+ return createSparseIterator();
}
@Override
@@ -365,17 +348,19 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
}
SimpleTextFloatVectorValues simpleTextFloatVectorValues =
new SimpleTextFloatVectorValues(this);
+ DocIndexIterator iterator = simpleTextFloatVectorValues.iterator();
return new VectorScorer() {
@Override
public float score() throws IOException {
+ int ord = iterator.index();
return entry
.similarityFunction()
- .compare(simpleTextFloatVectorValues.vectorValue(), target);
+ .compare(simpleTextFloatVectorValues.vectorValue(ord), target);
}
@Override
public DocIdSetIterator iterator() {
- return simpleTextFloatVectorValues;
+ return iterator;
}
};
}
@@ -397,6 +382,11 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
value[i] = Float.parseFloat(floatStrings[i]);
}
}
+
+ @Override
+ public SimpleTextFloatVectorValues copy() {
+ return this;
+ }
}
private static class SimpleTextByteVectorValues extends ByteVectorValues {
@@ -439,36 +429,19 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
}
@Override
- public byte[] vectorValue() {
- binaryValue.bytes = values[curOrd];
+ public byte[] vectorValue(int ord) {
+ binaryValue.bytes = values[ord];
return binaryValue.bytes;
}
@Override
- public int docID() {
- if (curOrd == -1) {
- return -1;
- } else if (curOrd >= entry.size()) {
- // when call to advance / nextDoc below already returns NO_MORE_DOCS, calling docID
- // immediately afterward should also return NO_MORE_DOCS
- // this is needed for TestSimpleTextKnnVectorsFormat.testAdvance test case
- return NO_MORE_DOCS;
- }
-
- return entry.ordToDoc[curOrd];
+ public int ordToDoc(int ord) {
+ return entry.ordToDoc[ord];
}
@Override
- public int nextDoc() throws IOException {
- if (++curOrd < entry.size()) {
- return docID();
- }
- return NO_MORE_DOCS;
- }
-
- @Override
- public int advance(int target) throws IOException {
- return slowAdvance(target);
+ public DocIndexIterator iterator() {
+ return createSparseIterator();
}
@Override
@@ -478,16 +451,19 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
}
SimpleTextByteVectorValues simpleTextByteVectorValues = new SimpleTextByteVectorValues(this);
return new VectorScorer() {
+ DocIndexIterator it = simpleTextByteVectorValues.iterator();
+
@Override
public float score() throws IOException {
+ int ord = it.index();
return entry
.similarityFunction()
- .compare(simpleTextByteVectorValues.vectorValue(), target);
+ .compare(simpleTextByteVectorValues.vectorValue(ord), target);
}
@Override
public DocIdSetIterator iterator() {
- return simpleTextByteVectorValues;
+ return it;
}
};
}
@@ -509,6 +485,11 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
value[i] = (byte) Float.parseFloat(floatStrings[i]);
}
}
+
+ @Override
+ public SimpleTextByteVectorValues copy() {
+ return this;
+ }
}
private int readInt(IndexInput in, BytesRef field) throws IOException {
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
index a7a76ac1bb9..eaf4b657755 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsWriter.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
@@ -77,19 +78,18 @@ public class SimpleTextKnnVectorsWriter extends BufferingKnnVectorsWriter {
throws IOException {
long vectorDataOffset = vectorData.getFilePointer();
List docIds = new ArrayList<>();
- for (int docV = floatVectorValues.nextDoc();
- docV != NO_MORE_DOCS;
- docV = floatVectorValues.nextDoc()) {
- writeFloatVectorValue(floatVectorValues);
- docIds.add(docV);
+ KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
+ for (int docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) {
+ writeFloatVectorValue(floatVectorValues, iter.index());
+ docIds.add(docId);
}
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, docIds);
}
- private void writeFloatVectorValue(FloatVectorValues vectors) throws IOException {
+ private void writeFloatVectorValue(FloatVectorValues vectors, int ord) throws IOException {
// write vector value
- float[] value = vectors.vectorValue();
+ float[] value = vectors.vectorValue(ord);
assert value.length == vectors.dimension();
write(vectorData, Arrays.toString(value));
newline(vectorData);
@@ -100,19 +100,18 @@ public class SimpleTextKnnVectorsWriter extends BufferingKnnVectorsWriter {
throws IOException {
long vectorDataOffset = vectorData.getFilePointer();
List docIds = new ArrayList<>();
- for (int docV = byteVectorValues.nextDoc();
- docV != NO_MORE_DOCS;
- docV = byteVectorValues.nextDoc()) {
- writeByteVectorValue(byteVectorValues);
+ KnnVectorValues.DocIndexIterator it = byteVectorValues.iterator();
+ for (int docV = it.nextDoc(); docV != NO_MORE_DOCS; docV = it.nextDoc()) {
+ writeByteVectorValue(byteVectorValues, it.index());
docIds.add(docV);
}
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;
writeMeta(fieldInfo, vectorDataOffset, vectorDataLength, docIds);
}
- private void writeByteVectorValue(ByteVectorValues vectors) throws IOException {
+ private void writeByteVectorValue(ByteVectorValues vectors, int ord) throws IOException {
// write vector value
- byte[] value = vectors.vectorValue();
+ byte[] value = vectors.vectorValue(ord);
assert value.length == vectors.dimension();
write(vectorData, Arrays.toString(value));
newline(vectorData);
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java
index ab20ee67c8c..a0ea5833e2e 100644
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/bitvectors/TestHnswBitVectorsFormat.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnByteVectorField;
@@ -42,7 +42,7 @@ import org.apache.lucene.tests.index.BaseIndexFileFormatTestCase;
public class TestHnswBitVectorsFormat extends BaseIndexFileFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene912Codec() {
+ return new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new HnswBitVectorsFormat();
diff --git a/lucene/core/src/generated/checksums/generateForDeltaUtil.json b/lucene/core/src/generated/checksums/generateForDeltaUtil.json
index 6546e25c4be..26ebc1198d9 100644
--- a/lucene/core/src/generated/checksums/generateForDeltaUtil.json
+++ b/lucene/core/src/generated/checksums/generateForDeltaUtil.json
@@ -1,4 +1,4 @@
{
- "lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "5115b12ac31537ce31d73c0a279df92060749a3a",
- "lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "db6154406e68b80d2c90116b5d0bfa9ba220762a"
+ "lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java": "f561578ccb6a95364bb62c5ed86b38ff0b4a009d",
+ "lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py": "eea1a71be9da8a13fdd979354dc4a8c6edf21be1"
}
\ No newline at end of file
diff --git a/lucene/core/src/java/module-info.java b/lucene/core/src/java/module-info.java
index a0f0bad01eb..6fd1767aa34 100644
--- a/lucene/core/src/java/module-info.java
+++ b/lucene/core/src/java/module-info.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
/** Lucene Core. */
@SuppressWarnings("module") // the test framework is compiled after the core...
@@ -34,6 +34,7 @@ module org.apache.lucene.core {
exports org.apache.lucene.codecs.lucene95;
exports org.apache.lucene.codecs.lucene99;
exports org.apache.lucene.codecs.lucene912;
+ exports org.apache.lucene.codecs.lucene100;
exports org.apache.lucene.codecs.perfield;
exports org.apache.lucene.codecs;
exports org.apache.lucene.document;
@@ -72,7 +73,7 @@ module org.apache.lucene.core {
provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
provides org.apache.lucene.codecs.Codec with
- Lucene912Codec;
+ Lucene100Codec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
index 8a9b4816571..96b0f75a259 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BufferingKnnVectorsWriter.java
@@ -20,14 +20,16 @@ package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.function.Supplier;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.Sorter;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.index.SortingCodecReader;
+import org.apache.lucene.index.SortingCodecReader.SortingValuesIterator;
+import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -80,24 +82,26 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
case FLOAT32:
BufferedFloatVectorValues bufferedFloatVectorValues =
new BufferedFloatVectorValues(
- fieldData.docsWithField,
(List) fieldData.vectors,
- fieldData.fieldInfo.getVectorDimension());
+ fieldData.fieldInfo.getVectorDimension(),
+ fieldData.docsWithField);
FloatVectorValues floatVectorValues =
sortMap != null
- ? new SortingFloatVectorValues(bufferedFloatVectorValues, sortMap)
+ ? new SortingFloatVectorValues(
+ bufferedFloatVectorValues, fieldData.docsWithField, sortMap)
: bufferedFloatVectorValues;
writeField(fieldData.fieldInfo, floatVectorValues, maxDoc);
break;
case BYTE:
BufferedByteVectorValues bufferedByteVectorValues =
new BufferedByteVectorValues(
- fieldData.docsWithField,
(List) fieldData.vectors,
- fieldData.fieldInfo.getVectorDimension());
+ fieldData.fieldInfo.getVectorDimension(),
+ fieldData.docsWithField);
ByteVectorValues byteVectorValues =
sortMap != null
- ? new SortingByteVectorValues(bufferedByteVectorValues, sortMap)
+ ? new SortingByteVectorValues(
+ bufferedByteVectorValues, fieldData.docsWithField, sortMap)
: bufferedByteVectorValues;
writeField(fieldData.fieldInfo, byteVectorValues, maxDoc);
break;
@@ -107,125 +111,77 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
/** Sorting FloatVectorValues that iterate over documents in the order of the provided sortMap */
private static class SortingFloatVectorValues extends FloatVectorValues {
- private final BufferedFloatVectorValues randomAccess;
- private final int[] docIdOffsets;
- private int docId = -1;
+ private final BufferedFloatVectorValues delegate;
+ private final Supplier iteratorSupplier;
- SortingFloatVectorValues(BufferedFloatVectorValues delegate, Sorter.DocMap sortMap)
+ SortingFloatVectorValues(
+ BufferedFloatVectorValues delegate, DocsWithFieldSet docsWithField, Sorter.DocMap sortMap)
throws IOException {
- this.randomAccess = delegate.copy();
- this.docIdOffsets = new int[sortMap.size()];
-
- int offset = 1; // 0 means no vector for this (field, document)
- int docID;
- while ((docID = delegate.nextDoc()) != NO_MORE_DOCS) {
- int newDocID = sortMap.oldToNew(docID);
- docIdOffsets[newDocID] = offset++;
- }
+ this.delegate = delegate.copy();
+ iteratorSupplier = SortingCodecReader.iteratorSupplier(delegate, sortMap);
}
@Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- while (docId < docIdOffsets.length - 1) {
- ++docId;
- if (docIdOffsets[docId] != 0) {
- return docId;
- }
- }
- docId = NO_MORE_DOCS;
- return docId;
- }
-
- @Override
- public float[] vectorValue() throws IOException {
- return randomAccess.vectorValue(docIdOffsets[docId] - 1);
+ public float[] vectorValue(int ord) throws IOException {
+ return delegate.vectorValue(ord);
}
@Override
public int dimension() {
- return randomAccess.dimension();
+ return delegate.dimension();
}
@Override
public int size() {
- return randomAccess.size();
+ return delegate.size();
}
@Override
- public int advance(int target) throws IOException {
+ public SortingFloatVectorValues copy() {
throw new UnsupportedOperationException();
}
@Override
- public VectorScorer scorer(float[] target) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return iteratorSupplier.get();
}
}
- /** Sorting FloatVectorValues that iterate over documents in the order of the provided sortMap */
+ /** Sorting ByteVectorValues that iterate over documents in the order of the provided sortMap */
private static class SortingByteVectorValues extends ByteVectorValues {
- private final BufferedByteVectorValues randomAccess;
- private final int[] docIdOffsets;
- private int docId = -1;
+ private final BufferedByteVectorValues delegate;
+ private final Supplier iteratorSupplier;
- SortingByteVectorValues(BufferedByteVectorValues delegate, Sorter.DocMap sortMap)
+ SortingByteVectorValues(
+ BufferedByteVectorValues delegate, DocsWithFieldSet docsWithField, Sorter.DocMap sortMap)
throws IOException {
- this.randomAccess = delegate.copy();
- this.docIdOffsets = new int[sortMap.size()];
-
- int offset = 1; // 0 means no vector for this (field, document)
- int docID;
- while ((docID = delegate.nextDoc()) != NO_MORE_DOCS) {
- int newDocID = sortMap.oldToNew(docID);
- docIdOffsets[newDocID] = offset++;
- }
+ this.delegate = delegate;
+ iteratorSupplier = SortingCodecReader.iteratorSupplier(delegate, sortMap);
}
@Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- while (docId < docIdOffsets.length - 1) {
- ++docId;
- if (docIdOffsets[docId] != 0) {
- return docId;
- }
- }
- docId = NO_MORE_DOCS;
- return docId;
- }
-
- @Override
- public byte[] vectorValue() throws IOException {
- return randomAccess.vectorValue(docIdOffsets[docId] - 1);
+ public byte[] vectorValue(int ord) throws IOException {
+ return delegate.vectorValue(ord);
}
@Override
public int dimension() {
- return randomAccess.dimension();
+ return delegate.dimension();
}
@Override
public int size() {
- return randomAccess.size();
+ return delegate.size();
}
@Override
- public int advance(int target) throws IOException {
+ public SortingByteVectorValues copy() {
throw new UnsupportedOperationException();
}
@Override
- public VectorScorer scorer(byte[] target) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return iteratorSupplier.get();
}
}
@@ -296,7 +252,9 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
@Override
public final long ramBytesUsed() {
- if (vectors.size() == 0) return 0;
+ if (vectors.isEmpty()) {
+ return 0;
+ }
return docsWithField.ramBytesUsed()
+ vectors.size()
* (long)
@@ -307,25 +265,18 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
}
private static class BufferedFloatVectorValues extends FloatVectorValues {
- final DocsWithFieldSet docsWithField;
-
// These are always the vectors of a VectorValuesWriter, which are copied when added to it
final List vectors;
final int dimension;
+ private final DocIdSet docsWithField;
+ private final DocIndexIterator iterator;
- DocIdSetIterator docsWithFieldIter;
- int ord = -1;
-
- BufferedFloatVectorValues(
- DocsWithFieldSet docsWithField, List vectors, int dimension) {
- this.docsWithField = docsWithField;
+ BufferedFloatVectorValues(List vectors, int dimension, DocIdSet docsWithField)
+ throws IOException {
this.vectors = vectors;
this.dimension = dimension;
- docsWithFieldIter = docsWithField.iterator();
- }
-
- public BufferedFloatVectorValues copy() {
- return new BufferedFloatVectorValues(docsWithField, vectors, dimension);
+ this.docsWithField = docsWithField;
+ this.iterator = fromDISI(docsWithField.iterator());
}
@Override
@@ -339,58 +290,39 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
}
@Override
- public float[] vectorValue() {
- return vectors.get(ord);
+ public int ordToDoc(int ord) {
+ return ord;
}
- float[] vectorValue(int targetOrd) {
+ @Override
+ public float[] vectorValue(int targetOrd) {
return vectors.get(targetOrd);
}
@Override
- public int docID() {
- return docsWithFieldIter.docID();
+ public DocIndexIterator iterator() {
+ return iterator;
}
@Override
- public int nextDoc() throws IOException {
- int docID = docsWithFieldIter.nextDoc();
- if (docID != NO_MORE_DOCS) {
- ++ord;
- }
- return docID;
- }
-
- @Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public VectorScorer scorer(float[] target) {
- throw new UnsupportedOperationException();
+ public BufferedFloatVectorValues copy() throws IOException {
+ return new BufferedFloatVectorValues(vectors, dimension, docsWithField);
}
}
private static class BufferedByteVectorValues extends ByteVectorValues {
- final DocsWithFieldSet docsWithField;
-
// These are always the vectors of a VectorValuesWriter, which are copied when added to it
final List vectors;
final int dimension;
+ private final DocIdSet docsWithField;
+ private final DocIndexIterator iterator;
- DocIdSetIterator docsWithFieldIter;
- int ord = -1;
-
- BufferedByteVectorValues(DocsWithFieldSet docsWithField, List vectors, int dimension) {
- this.docsWithField = docsWithField;
+ BufferedByteVectorValues(List vectors, int dimension, DocIdSet docsWithField)
+ throws IOException {
this.vectors = vectors;
this.dimension = dimension;
- docsWithFieldIter = docsWithField.iterator();
- }
-
- public BufferedByteVectorValues copy() {
- return new BufferedByteVectorValues(docsWithField, vectors, dimension);
+ this.docsWithField = docsWithField;
+ iterator = fromDISI(docsWithField.iterator());
}
@Override
@@ -404,36 +336,18 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
}
@Override
- public byte[] vectorValue() {
- return vectors.get(ord);
- }
-
- byte[] vectorValue(int targetOrd) {
+ public byte[] vectorValue(int targetOrd) {
return vectors.get(targetOrd);
}
@Override
- public int docID() {
- return docsWithFieldIter.docID();
+ public DocIndexIterator iterator() {
+ return iterator;
}
@Override
- public int nextDoc() throws IOException {
- int docID = docsWithFieldIter.nextDoc();
- if (docID != NO_MORE_DOCS) {
- ++ord;
- }
- return docID;
- }
-
- @Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public VectorScorer scorer(byte[] target) {
- throw new UnsupportedOperationException();
+ public BufferedByteVectorValues copy() throws IOException {
+ return new BufferedByteVectorValues(vectors, dimension, docsWithField);
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index e5a5dac8ff5..ff5a5bb21c0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -55,7 +55,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
return LOADER;
}
- static Codec defaultCodec = LOADER.lookup("Lucene912");
+ static Codec defaultCodec = LOADER.lookup("Lucene100");
}
private final String name;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
index 371e192887b..6a7e75f267e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
@@ -35,8 +35,8 @@ public abstract class CompoundFormat {
// we can add 'producer' classes.
/** Returns a Directory view (read-only) for the compound files in this segment */
- public abstract CompoundDirectory getCompoundReader(
- Directory dir, SegmentInfo si, IOContext context) throws IOException;
+ public abstract CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si)
+ throws IOException;
/**
* Packs the provided segment's files into a compound format. All files referenced by the provided
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index cbb906788e5..08c08ec5075 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -613,7 +613,7 @@ public abstract class DocValuesConsumer implements Closeable {
if (docValuesProducer != null) {
FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) {
- values = docValuesProducer.getSorted(fieldInfo);
+ values = docValuesProducer.getSorted(readerFieldInfo);
}
}
if (values == null) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
index 3b185fd13a0..50af32a7e16 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
@@ -30,6 +30,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.VectorEncoding;
@@ -55,28 +56,26 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
@SuppressWarnings("unchecked")
public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
switch (fieldInfo.getVectorEncoding()) {
- case BYTE:
+ case BYTE -> {
KnnFieldVectorsWriter byteWriter =
(KnnFieldVectorsWriter) addField(fieldInfo);
ByteVectorValues mergedBytes =
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
- for (int doc = mergedBytes.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = mergedBytes.nextDoc()) {
- byteWriter.addValue(doc, mergedBytes.vectorValue());
+ KnnVectorValues.DocIndexIterator iter = mergedBytes.iterator();
+ for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
+ byteWriter.addValue(doc, mergedBytes.vectorValue(iter.index()));
}
- break;
- case FLOAT32:
+ }
+ case FLOAT32 -> {
KnnFieldVectorsWriter floatWriter =
(KnnFieldVectorsWriter) addField(fieldInfo);
FloatVectorValues mergedFloats =
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
- for (int doc = mergedFloats.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = mergedFloats.nextDoc()) {
- floatWriter.addValue(doc, mergedFloats.vectorValue());
+ KnnVectorValues.DocIndexIterator iter = mergedFloats.iterator();
+ for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
+ floatWriter.addValue(doc, mergedFloats.vectorValue(iter.index()));
}
- break;
+ }
}
}
@@ -117,32 +116,44 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
private static class FloatVectorValuesSub extends DocIDMerger.Sub {
final FloatVectorValues values;
+ final KnnVectorValues.DocIndexIterator iterator;
FloatVectorValuesSub(MergeState.DocMap docMap, FloatVectorValues values) {
super(docMap);
this.values = values;
- assert values.docID() == -1;
+ this.iterator = values.iterator();
+ assert iterator.docID() == -1;
}
@Override
public int nextDoc() throws IOException {
- return values.nextDoc();
+ return iterator.nextDoc();
+ }
+
+ public int index() {
+ return iterator.index();
}
}
private static class ByteVectorValuesSub extends DocIDMerger.Sub {
final ByteVectorValues values;
+ final KnnVectorValues.DocIndexIterator iterator;
ByteVectorValuesSub(MergeState.DocMap docMap, ByteVectorValues values) {
super(docMap);
this.values = values;
- assert values.docID() == -1;
+ iterator = values.iterator();
+ assert iterator.docID() == -1;
}
@Override
public int nextDoc() throws IOException {
- return values.nextDoc();
+ return iterator.nextDoc();
+ }
+
+ int index() {
+ return iterator.index();
}
}
@@ -287,7 +298,8 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
private final List subs;
private final DocIDMerger docIdMerger;
private final int size;
- private int docId;
+ private int docId = -1;
+ private int lastOrd = -1;
FloatVectorValuesSub current;
private MergedFloat32VectorValues(List subs, MergeState mergeState)
@@ -299,33 +311,59 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
totalSize += sub.values.size();
}
size = totalSize;
- docId = -1;
}
@Override
- public int docID() {
- return docId;
+ public DocIndexIterator iterator() {
+ return new DocIndexIterator() {
+ private int index = -1;
+
+ @Override
+ public int docID() {
+ return docId;
+ }
+
+ @Override
+ public int index() {
+ return index;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIdMerger.next();
+ if (current == null) {
+ docId = NO_MORE_DOCS;
+ index = NO_MORE_DOCS;
+ } else {
+ docId = current.mappedDocID;
+ ++lastOrd;
+ ++index;
+ }
+ return docId;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return size;
+ }
+ };
}
@Override
- public int nextDoc() throws IOException {
- current = docIdMerger.next();
- if (current == null) {
- docId = NO_MORE_DOCS;
- } else {
- docId = current.mappedDocID;
+ public float[] vectorValue(int ord) throws IOException {
+ if (ord != lastOrd) {
+ throw new IllegalStateException(
+ "only supports forward iteration with a single iterator: ord="
+ + ord
+ + ", lastOrd="
+ + lastOrd);
}
- return docId;
- }
-
- @Override
- public float[] vectorValue() throws IOException {
- return current.values.vectorValue();
- }
-
- @Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
+ return current.values.vectorValue(current.index());
}
@Override
@@ -338,10 +376,20 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
return subs.get(0).values.dimension();
}
+ @Override
+ public int ordToDoc(int ord) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public VectorScorer scorer(float[] target) {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public FloatVectorValues copy() {
+ throw new UnsupportedOperationException();
+ }
}
static class MergedByteVectorValues extends ByteVectorValues {
@@ -349,7 +397,8 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
private final DocIDMerger docIdMerger;
private final int size;
- private int docId;
+ private int lastOrd = -1;
+ private int docId = -1;
ByteVectorValuesSub current;
private MergedByteVectorValues(List subs, MergeState mergeState)
@@ -361,33 +410,57 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
totalSize += sub.values.size();
}
size = totalSize;
- docId = -1;
}
@Override
- public byte[] vectorValue() throws IOException {
- return current.values.vectorValue();
- }
-
- @Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- current = docIdMerger.next();
- if (current == null) {
- docId = NO_MORE_DOCS;
+ public byte[] vectorValue(int ord) throws IOException {
+ if (ord != lastOrd + 1) {
+ throw new IllegalStateException(
+ "only supports forward iteration: ord=" + ord + ", lastOrd=" + lastOrd);
} else {
- docId = current.mappedDocID;
+ lastOrd = ord;
}
- return docId;
+ return current.values.vectorValue(current.index());
}
@Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return new DocIndexIterator() {
+ private int index = -1;
+
+ @Override
+ public int docID() {
+ return docId;
+ }
+
+ @Override
+ public int index() {
+ return index;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIdMerger.next();
+ if (current == null) {
+ docId = NO_MORE_DOCS;
+ index = NO_MORE_DOCS;
+ } else {
+ docId = current.mappedDocID;
+ ++index;
+ }
+ return docId;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return size;
+ }
+ };
}
@Override
@@ -400,10 +473,20 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
return subs.get(0).values.dimension();
}
+ @Override
+ public int ordToDoc(int ord) {
+ throw new UnsupportedOperationException();
+ }
+
@Override
public VectorScorer scorer(byte[] target) {
throw new UnsupportedOperationException();
}
+
+ @Override
+ public ByteVectorValues copy() {
+ throw new UnsupportedOperationException();
+ }
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/DefaultFlatVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/DefaultFlatVectorScorer.java
index 1274e1c789e..3e506037969 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/DefaultFlatVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/DefaultFlatVectorScorer.java
@@ -18,8 +18,10 @@
package org.apache.lucene.codecs.hnsw;
import java.io.IOException;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
@@ -34,24 +36,26 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
throws IOException {
- if (vectorValues instanceof RandomAccessVectorValues.Floats floatVectorValues) {
- return new FloatScoringSupplier(floatVectorValues, similarityFunction);
- } else if (vectorValues instanceof RandomAccessVectorValues.Bytes byteVectorValues) {
- return new ByteScoringSupplier(byteVectorValues, similarityFunction);
+ switch (vectorValues.getEncoding()) {
+ case FLOAT32 -> {
+ return new FloatScoringSupplier((FloatVectorValues) vectorValues, similarityFunction);
+ }
+ case BYTE -> {
+ return new ByteScoringSupplier((ByteVectorValues) vectorValues, similarityFunction);
+ }
}
throw new IllegalArgumentException(
- "vectorValues must be an instance of RandomAccessVectorValues.Floats or RandomAccessVectorValues.Bytes");
+ "vectorValues must be an instance of FloatVectorValues or ByteVectorValues, got a "
+ + vectorValues.getClass().getName());
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
throws IOException {
- assert vectorValues instanceof RandomAccessVectorValues.Floats;
+ assert vectorValues instanceof FloatVectorValues;
if (target.length != vectorValues.dimension()) {
throw new IllegalArgumentException(
"vector query dimension: "
@@ -59,17 +63,14 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
+ " differs from field dimension: "
+ vectorValues.dimension());
}
- return new FloatVectorScorer(
- (RandomAccessVectorValues.Floats) vectorValues, target, similarityFunction);
+ return new FloatVectorScorer((FloatVectorValues) vectorValues, target, similarityFunction);
}
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
throws IOException {
- assert vectorValues instanceof RandomAccessVectorValues.Bytes;
+ assert vectorValues instanceof ByteVectorValues;
if (target.length != vectorValues.dimension()) {
throw new IllegalArgumentException(
"vector query dimension: "
@@ -77,8 +78,7 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
+ " differs from field dimension: "
+ vectorValues.dimension());
}
- return new ByteVectorScorer(
- (RandomAccessVectorValues.Bytes) vectorValues, target, similarityFunction);
+ return new ByteVectorScorer((ByteVectorValues) vectorValues, target, similarityFunction);
}
@Override
@@ -88,14 +88,13 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
/** RandomVectorScorerSupplier for bytes vector */
private static final class ByteScoringSupplier implements RandomVectorScorerSupplier {
- private final RandomAccessVectorValues.Bytes vectors;
- private final RandomAccessVectorValues.Bytes vectors1;
- private final RandomAccessVectorValues.Bytes vectors2;
+ private final ByteVectorValues vectors;
+ private final ByteVectorValues vectors1;
+ private final ByteVectorValues vectors2;
private final VectorSimilarityFunction similarityFunction;
private ByteScoringSupplier(
- RandomAccessVectorValues.Bytes vectors, VectorSimilarityFunction similarityFunction)
- throws IOException {
+ ByteVectorValues vectors, VectorSimilarityFunction similarityFunction) throws IOException {
this.vectors = vectors;
vectors1 = vectors.copy();
vectors2 = vectors.copy();
@@ -125,14 +124,13 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
/** RandomVectorScorerSupplier for Float vector */
private static final class FloatScoringSupplier implements RandomVectorScorerSupplier {
- private final RandomAccessVectorValues.Floats vectors;
- private final RandomAccessVectorValues.Floats vectors1;
- private final RandomAccessVectorValues.Floats vectors2;
+ private final FloatVectorValues vectors;
+ private final FloatVectorValues vectors1;
+ private final FloatVectorValues vectors2;
private final VectorSimilarityFunction similarityFunction;
private FloatScoringSupplier(
- RandomAccessVectorValues.Floats vectors, VectorSimilarityFunction similarityFunction)
- throws IOException {
+ FloatVectorValues vectors, VectorSimilarityFunction similarityFunction) throws IOException {
this.vectors = vectors;
vectors1 = vectors.copy();
vectors2 = vectors.copy();
@@ -162,14 +160,12 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
/** A {@link RandomVectorScorer} for float vectors. */
private static class FloatVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
- private final RandomAccessVectorValues.Floats values;
+ private final FloatVectorValues values;
private final float[] query;
private final VectorSimilarityFunction similarityFunction;
public FloatVectorScorer(
- RandomAccessVectorValues.Floats values,
- float[] query,
- VectorSimilarityFunction similarityFunction) {
+ FloatVectorValues values, float[] query, VectorSimilarityFunction similarityFunction) {
super(values);
this.values = values;
this.query = query;
@@ -184,14 +180,12 @@ public class DefaultFlatVectorScorer implements FlatVectorsScorer {
/** A {@link RandomVectorScorer} for byte vectors. */
private static class ByteVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer {
- private final RandomAccessVectorValues.Bytes values;
+ private final ByteVectorValues values;
private final byte[] query;
private final VectorSimilarityFunction similarityFunction;
public ByteVectorScorer(
- RandomAccessVectorValues.Bytes values,
- byte[] query,
- VectorSimilarityFunction similarityFunction) {
+ ByteVectorValues values, byte[] query, VectorSimilarityFunction similarityFunction) {
super(values);
this.values = values;
this.query = query;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsScorer.java
index 17430c24f27..6ed170731de 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsScorer.java
@@ -18,8 +18,8 @@
package org.apache.lucene.codecs.hnsw;
import java.io.IOException;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
@@ -40,7 +40,19 @@ public interface FlatVectorsScorer {
* @throws IOException if an I/O error occurs
*/
RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues) throws IOException;
+
+ /**
+ * Returns a {@link RandomVectorScorer} for the given set of vectors and target vector.
+ *
+ * @param similarityFunction the similarity function to use
+ * @param vectorValues the vector values to score
+ * @param target the target vector
+ * @return a {@link RandomVectorScorer} for the given field and target vector.
+ * @throws IOException if an I/O error occurs when reading from the index.
+ */
+ RandomVectorScorer getRandomVectorScorer(
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
throws IOException;
/**
@@ -53,23 +65,6 @@ public interface FlatVectorsScorer {
* @throws IOException if an I/O error occurs when reading from the index.
*/
RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target)
- throws IOException;
-
- /**
- * Returns a {@link RandomVectorScorer} for the given set of vectors and target vector.
- *
- * @param similarityFunction the similarity function to use
- * @param vectorValues the vector values to score
- * @param target the target vector
- * @return a {@link RandomVectorScorer} for the given field and target vector.
- * @throws IOException if an I/O error occurs when reading from the index.
- */
- RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/ScalarQuantizedVectorScorer.java
index 4b73e1f7a4a..ceb826aa3a1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/hnsw/ScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/hnsw/ScalarQuantizedVectorScorer.java
@@ -18,13 +18,13 @@
package org.apache.lucene.codecs.hnsw;
import java.io.IOException;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.VectorUtil;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
import org.apache.lucene.util.quantization.ScalarQuantizer;
@@ -60,9 +60,9 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
throws IOException {
- if (vectorValues instanceof RandomAccessQuantizedByteVectorValues quantizedByteVectorValues) {
+ if (vectorValues instanceof QuantizedByteVectorValues quantizedByteVectorValues) {
return new ScalarQuantizedRandomVectorScorerSupplier(
similarityFunction,
quantizedByteVectorValues.getScalarQuantizer(),
@@ -74,11 +74,9 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
throws IOException {
- if (vectorValues instanceof RandomAccessQuantizedByteVectorValues quantizedByteVectorValues) {
+ if (vectorValues instanceof QuantizedByteVectorValues quantizedByteVectorValues) {
ScalarQuantizer scalarQuantizer = quantizedByteVectorValues.getScalarQuantizer();
byte[] targetBytes = new byte[target.length];
float offsetCorrection =
@@ -104,9 +102,7 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
throws IOException {
return nonQuantizedDelegate.getRandomVectorScorer(similarityFunction, vectorValues, target);
}
@@ -124,14 +120,14 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
public static class ScalarQuantizedRandomVectorScorerSupplier
implements RandomVectorScorerSupplier {
- private final RandomAccessQuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values;
private final ScalarQuantizedVectorSimilarity similarity;
private final VectorSimilarityFunction vectorSimilarityFunction;
public ScalarQuantizedRandomVectorScorerSupplier(
VectorSimilarityFunction similarityFunction,
ScalarQuantizer scalarQuantizer,
- RandomAccessQuantizedByteVectorValues values) {
+ QuantizedByteVectorValues values) {
this.similarity =
ScalarQuantizedVectorSimilarity.fromVectorSimilarity(
similarityFunction,
@@ -144,7 +140,7 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
private ScalarQuantizedRandomVectorScorerSupplier(
ScalarQuantizedVectorSimilarity similarity,
VectorSimilarityFunction vectorSimilarityFunction,
- RandomAccessQuantizedByteVectorValues values) {
+ QuantizedByteVectorValues values) {
this.similarity = similarity;
this.values = values;
this.vectorSimilarityFunction = vectorSimilarityFunction;
@@ -152,7 +148,7 @@ public class ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorer scorer(int ord) throws IOException {
- final RandomAccessQuantizedByteVectorValues vectorsCopy = values.copy();
+ final QuantizedByteVectorValues vectorsCopy = values.copy();
final byte[] queryVector = values.vectorValue(ord);
final float queryOffset = values.getScoreCorrectionConstant(ord);
return new RandomVectorScorer.AbstractRandomVectorScorer(vectorsCopy) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene100/Lucene100Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene100/Lucene100Codec.java
new file mode 100644
index 00000000000..97dc23bc07b
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene100/Lucene100Codec.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene100;
+
+import java.util.Objects;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CompoundFormat;
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.FieldInfosFormat;
+import org.apache.lucene.codecs.FilterCodec;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.LiveDocsFormat;
+import org.apache.lucene.codecs.NormsFormat;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.SegmentInfoFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.TermVectorsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90NormsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90PointsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat;
+import org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat;
+import org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat;
+import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+
+/**
+ * Implements the Lucene 10.0 index format
+ *
+ * If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
+ *
+ * @see org.apache.lucene.codecs.lucene100 package documentation for file format details.
+ * @lucene.experimental
+ */
+public class Lucene100Codec extends Codec {
+
+ /** Configuration option for the codec. */
+ public enum Mode {
+ /** Trade compression ratio for retrieval speed. */
+ BEST_SPEED(Lucene90StoredFieldsFormat.Mode.BEST_SPEED),
+ /** Trade retrieval speed for compression ratio. */
+ BEST_COMPRESSION(Lucene90StoredFieldsFormat.Mode.BEST_COMPRESSION);
+
+ private final Lucene90StoredFieldsFormat.Mode storedMode;
+
+ private Mode(Lucene90StoredFieldsFormat.Mode storedMode) {
+ this.storedMode = Objects.requireNonNull(storedMode);
+ }
+ }
+
+ private final TermVectorsFormat vectorsFormat = new Lucene90TermVectorsFormat();
+ private final FieldInfosFormat fieldInfosFormat = new Lucene94FieldInfosFormat();
+ private final SegmentInfoFormat segmentInfosFormat = new Lucene99SegmentInfoFormat();
+ private final LiveDocsFormat liveDocsFormat = new Lucene90LiveDocsFormat();
+ private final CompoundFormat compoundFormat = new Lucene90CompoundFormat();
+ private final NormsFormat normsFormat = new Lucene90NormsFormat();
+
+ private final PostingsFormat defaultPostingsFormat;
+ private final PostingsFormat postingsFormat =
+ new PerFieldPostingsFormat() {
+ @Override
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return Lucene100Codec.this.getPostingsFormatForField(field);
+ }
+ };
+
+ private final DocValuesFormat defaultDVFormat;
+ private final DocValuesFormat docValuesFormat =
+ new PerFieldDocValuesFormat() {
+ @Override
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return Lucene100Codec.this.getDocValuesFormatForField(field);
+ }
+ };
+
+ private final KnnVectorsFormat defaultKnnVectorsFormat;
+ private final KnnVectorsFormat knnVectorsFormat =
+ new PerFieldKnnVectorsFormat() {
+ @Override
+ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+ return Lucene100Codec.this.getKnnVectorsFormatForField(field);
+ }
+ };
+
+ private final StoredFieldsFormat storedFieldsFormat;
+
+ /** Instantiates a new codec. */
+ public Lucene100Codec() {
+ this(Mode.BEST_SPEED);
+ }
+
+ /**
+ * Instantiates a new codec, specifying the stored fields compression mode to use.
+ *
+ * @param mode stored fields compression mode to use for newly flushed/merged segments.
+ */
+ public Lucene100Codec(Mode mode) {
+ super("Lucene100");
+ this.storedFieldsFormat =
+ new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
+ this.defaultPostingsFormat = new Lucene912PostingsFormat();
+ this.defaultDVFormat = new Lucene90DocValuesFormat();
+ this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+ }
+
+ @Override
+ public final StoredFieldsFormat storedFieldsFormat() {
+ return storedFieldsFormat;
+ }
+
+ @Override
+ public final TermVectorsFormat termVectorsFormat() {
+ return vectorsFormat;
+ }
+
+ @Override
+ public final PostingsFormat postingsFormat() {
+ return postingsFormat;
+ }
+
+ @Override
+ public final FieldInfosFormat fieldInfosFormat() {
+ return fieldInfosFormat;
+ }
+
+ @Override
+ public final SegmentInfoFormat segmentInfoFormat() {
+ return segmentInfosFormat;
+ }
+
+ @Override
+ public final LiveDocsFormat liveDocsFormat() {
+ return liveDocsFormat;
+ }
+
+ @Override
+ public final CompoundFormat compoundFormat() {
+ return compoundFormat;
+ }
+
+ @Override
+ public final PointsFormat pointsFormat() {
+ return new Lucene90PointsFormat();
+ }
+
+ @Override
+ public final KnnVectorsFormat knnVectorsFormat() {
+ return knnVectorsFormat;
+ }
+
+ /**
+ * Returns the postings format that should be used for writing new segments of field
.
+ *
+ *
The default implementation always returns "Lucene912".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation,
+ */
+ public PostingsFormat getPostingsFormatForField(String field) {
+ return defaultPostingsFormat;
+ }
+
+ /**
+ * Returns the docvalues format that should be used for writing new segments of field
+ * .
+ *
+ *
The default implementation always returns "Lucene90".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation.
+ */
+ public DocValuesFormat getDocValuesFormatForField(String field) {
+ return defaultDVFormat;
+ }
+
+ /**
+ * Returns the vectors format that should be used for writing new segments of field
+ *
+ *
The default implementation always returns "Lucene99HnswVectorsFormat".
+ *
+ *
WARNING: if you subclass, you are responsible for index backwards compatibility:
+ * future version of Lucene are only guaranteed to be able to read the default implementation.
+ */
+ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+ return defaultKnnVectorsFormat;
+ }
+
+ @Override
+ public final DocValuesFormat docValuesFormat() {
+ return docValuesFormat;
+ }
+
+ @Override
+ public final NormsFormat normsFormat() {
+ return normsFormat;
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene100/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene100/package-info.java
new file mode 100644
index 00000000000..64189bfa9d1
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene100/package-info.java
@@ -0,0 +1,433 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Lucene 10.0 file format.
+ *
+ *
Apache Lucene - Index File Formats
+ *
+ *
+ *
+ * Introduction
+ *
+ *
+ *
+ *
This document defines the index file formats used in this version of Lucene. If you are using
+ * a different version of Lucene, please consult the copy of docs/
that was distributed
+ * with the version you are using.
+ *
+ *
This document attempts to provide a high-level definition of the Apache Lucene file formats.
+ *
+ *
+ * Definitions
+ *
+ *
+ *
+ *
The fundamental concepts in Lucene are index, document, field and term.
+ *
+ *
An index contains a sequence of documents.
+ *
+ *
+ * A document is a sequence of fields.
+ * A field is a named sequence of terms.
+ * A term is a sequence of bytes.
+ *
+ *
+ *
The same sequence of bytes in two different fields is considered a different term. Thus terms
+ * are represented as a pair: the string naming the field, and the bytes within the field.
+ *
+ *
Inverted Indexing
+ *
+ *
Lucene's index stores terms and statistics about those terms in order to make term-based
+ * search more efficient. Lucene's terms index falls into the family of indexes known as an
+ * inverted index. This is because it can list, for a term, the documents that contain it.
+ * This is the inverse of the natural relationship, in which documents list terms.
+ *
+ *
Types of Fields
+ *
+ *
In Lucene, fields may be stored , in which case their text is stored in the index
+ * literally, in a non-inverted manner. Fields that are inverted are called indexed . A field
+ * may be both stored and indexed.
+ *
+ *
The text of a field may be tokenized into terms to be indexed, or the text of a field
+ * may be used literally as a term to be indexed. Most fields are tokenized, but sometimes it is
+ * useful for certain identifier fields to be indexed literally.
+ *
+ *
See the {@link org.apache.lucene.document.Field Field} java docs for more information on
+ * Fields.
+ *
+ *
Segments
+ *
+ *
Lucene indexes may be composed of multiple sub-indexes, or segments . Each segment is a
+ * fully independent index, which could be searched separately. Indexes evolve by:
+ *
+ *
+ * Creating new segments for newly added documents.
+ * Merging existing segments.
+ *
+ *
+ *
Searches may involve multiple segments and/or multiple indexes, each index potentially
+ * composed of a set of segments.
+ *
+ *
Document Numbers
+ *
+ *
Internally, Lucene refers to documents by an integer document number . The first
+ * document added to an index is numbered zero, and each subsequent document added gets a number one
+ * greater than the previous.
+ *
+ *
Note that a document's number may change, so caution should be taken when storing these
+ * numbers outside of Lucene. In particular, numbers may change in the following situations:
+ *
+ *
+ *
+ * The numbers stored in each segment are unique only within the segment, and must be
+ * converted before they can be used in a larger context. The standard technique is to
+ * allocate each segment a range of values, based on the range of numbers used in that
+ * segment. To convert a document number from a segment to an external value, the segment's
+ * base document number is added. To convert an external value back to a
+ * segment-specific value, the segment is identified by the range that the external value is
+ * in, and the segment's base value is subtracted. For example two five document segments
+ * might be combined, so that the first segment has a base value of zero, and the second of
+ * five. Document three from the second segment would have an external value of eight.
+ *
+ * When documents are deleted, gaps are created in the numbering. These are eventually
+ * removed as the index evolves through merging. Deleted documents are dropped when segments
+ * are merged. A freshly-merged segment thus has no gaps in its numbering.
+ *
+ *
+ *
+ *
+ * Index Structure Overview
+ *
+ *
+ *
+ *
Each segment index maintains the following:
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment info}. This
+ * contains metadata about a segment, such as the number of documents, what files it uses, and
+ * information about how the segment is sorted
+ * {@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Field names}. This
+ * contains metadata about the set of named fields used in the index.
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Stored Field values}.
+ * This contains, for each document, a list of attribute-value pairs, where the attributes are
+ * field names. These are used to store auxiliary information about the document, such as its
+ * title, url, or an identifier to access a database. The set of stored fields are what is
+ * returned for each hit when searching. This is keyed by document number.
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term dictionary}. A
+ * dictionary containing all of the terms used in all of the indexed fields of all of the
+ * documents. The dictionary also contains the number of documents which contain the term, and
+ * pointers to the term's frequency and proximity data.
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Frequency data}. For
+ * each term in the dictionary, the numbers of all the documents that contain that term, and
+ * the frequency of the term in that document, unless frequencies are omitted ({@link
+ * org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Proximity data}. For
+ * each term in the dictionary, the positions that the term occurs in each document. Note that
+ * this will not exist if all fields in all documents omit position data.
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
+ * each field in each document, a value is stored that is multiplied into the score for hits
+ * on that field.
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vectors}. For each
+ * field in each document, the term vector (sometimes called document vector) may be stored. A
+ * term vector consists of term text and term frequency. To add Term Vectors to your index see
+ * the {@link org.apache.lucene.document.Field Field} constructors
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-document values}. Like
+ * stored values, these are also keyed by document number, but are generally intended to be
+ * loaded into main memory for fast access. Whereas stored values are generally intended for
+ * summary results from searches, per-document values are useful for things like scoring
+ * factors.
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
+ * optional file indicating which documents are live.
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}. Optional pair
+ * of files, recording dimensionally indexed fields, to enable fast numeric range filtering
+ * and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
+ * intersection (2D, 3D).
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}. The
+ * vector format stores numeric vectors in a format optimized for random access and
+ * computation, supporting high-dimensional nearest-neighbor search.
+ *
+ *
+ *
Details on each of these are provided in their linked pages.
+ *
+ * File Naming
+ *
+ *
+ *
+ *
All files belonging to a segment have the same name with varying extensions. The extensions
+ * correspond to the different file formats described below. When using the Compound File format
+ * (default for small segments) these files (except for the Segment info file, the Lock file, and
+ * Deleted documents file) are collapsed into a single .cfs file (see below for details)
+ *
+ *
Typically, all segments in an index are stored in a single directory, although this is not
+ * required.
+ *
+ *
File names are never re-used. That is, when any file is saved to the Directory it is given a
+ * never before used filename. This is achieved using a simple generations approach. For example,
+ * the first segments file is segments_1, then segments_2, etc. The generation is a sequential long
+ * integer represented in alpha-numeric (base 36) form.
+ *
+ * Summary of File Extensions
+ *
+ *
+ *
+ *
The following table summarizes the names and extensions of the files in Lucene:
+ *
+ *
+ * lucene filenames by extension
+ *
+ * Name
+ * Extension
+ * Brief Description
+ *
+ *
+ * {@link org.apache.lucene.index.SegmentInfos Segments File}
+ * segments_N
+ * Stores information about a commit point
+ *
+ *
+ * Lock File
+ * write.lock
+ * The Write lock prevents multiple IndexWriters from writing to the same
+ * file.
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment Info}
+ * .si
+ * Stores metadata about a segment
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat Compound File}
+ * .cfs, .cfe
+ * An optional "virtual" file consisting of all the other index files for
+ * systems that frequently run out of file handles.
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Fields}
+ * .fnm
+ * Stores information about the fields
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Index}
+ * .fdx
+ * Contains pointers to field data
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Data}
+ * .fdt
+ * The stored fields for documents
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Dictionary}
+ * .tim
+ * The term dictionary, stores term info
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Index}
+ * .tip
+ * The index into the Term Dictionary
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Frequencies}
+ * .doc
+ * Contains the list of docs which contain each term along with frequency
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Positions}
+ * .pos
+ * Stores position information about where a term occurs in the index
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Payloads}
+ * .pay
+ * Stores additional per-position metadata information such as character offsets and user payloads
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Norms}
+ * .nvd, .nvm
+ * Encodes length and boost factors for docs and fields
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-Document Values}
+ * .dvd, .dvm
+ * Encodes additional scoring factors or other per-document information.
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Index}
+ * .tvx
+ * Stores offset into the document data file
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Data}
+ * .tvd
+ * Contains term vector data.
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}
+ * .liv
+ * Info about what documents are live
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}
+ * .kdd, .kdi, .kdm
+ * Holds indexed points
+ *
+ *
+ * {@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}
+ * .vec, .vem, .veq, vex
+ * Holds indexed vectors; .vec
files contain the raw vector data,
+ * .vem
the vector metadata, .veq
the quantized vector data, and .vex
the
+ * hnsw graph data.
+ *
+ *
+ *
+ *
+ *
+ * Lock File
+ *
+ * The write lock, which is stored in the index directory by default, is named "write.lock". If the
+ * lock directory is different from the index directory then the write lock will be named
+ * "XXXX-write.lock" where XXXX is a unique prefix derived from the full path to the index
+ * directory. When this file is present, a writer is currently modifying the index (adding or
+ * removing documents). This lock file ensures that only one writer is modifying the index at a
+ * time.
+ *
+ * History
+ *
+ * Compatibility notes are provided in this document, describing how file formats have changed
+ * from prior versions:
+ *
+ *
+ * In version 2.1, the file format was changed to allow lock-less commits (ie, no more commit
+ * lock). The change is fully backwards compatible: you can open a pre-2.1 index for searching
+ * or adding/deleting of docs. When the new segments file is saved (committed), it will be
+ * written in the new file format (meaning no specific "upgrade" process is needed). But note
+ * that once a commit has occurred, pre-2.1 Lucene will not be able to read the index.
+ * In version 2.3, the file format was changed to allow segments to share a single set of doc
+ * store (vectors & stored fields) files. This allows for faster indexing in certain
+ * cases. The change is fully backwards compatible (in the same way as the lock-less commits
+ * change in 2.1).
+ * In version 2.4, Strings are now written as true UTF-8 byte sequence, not Java's modified
+ * UTF-8. See LUCENE-510 for
+ * details.
+ * In version 2.9, an optional opaque Map<String,String> CommitUserData may be passed to
+ * IndexWriter's commit methods (and later retrieved), which is recorded in the segments_N
+ * file. See LUCENE-1382 for
+ * details. Also, diagnostics were added to each segment written recording details about why
+ * it was written (due to flush, merge; which OS/JRE was used; etc.). See issue LUCENE-1654 for details.
+ * In version 3.0, compressed fields are no longer written to the index (they can still be
+ * read, but on merge the new segment will write them, uncompressed). See issue LUCENE-1960 for details.
+ * In version 3.1, segments records the code version that created them. See LUCENE-2720 for details.
+ * Additionally segments track explicitly whether or not they have term vectors. See LUCENE-2811 for details.
+ * In version 3.2, numeric fields are written as natively to stored fields file, previously
+ * they were stored in text format only.
+ * In version 3.4, fields can omit position data while still indexing term frequencies.
+ * In version 4.0, the format of the inverted index became extensible via the {@link
+ * org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage ({@code DocValues})
+ * was introduced. Normalization factors need no longer be a single byte, they can be any
+ * {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. Terms need not be
+ * unicode strings, they can be any byte sequence. Term offsets can optionally be indexed into
+ * the postings lists. Payloads can be stored in the term vectors.
+ * In version 4.1, the format of the postings list changed to use either of FOR compression or
+ * variable-byte encoding, depending upon the frequency of the term. Terms appearing only once
+ * were changed to inline directly into the term dictionary. Stored fields are compressed by
+ * default.
+ * In version 4.2, term vectors are compressed by default. DocValues has a new multi-valued
+ * type (SortedSet), that can be used for faceting/grouping/joining on multi-valued fields.
+ * In version 4.5, DocValues were extended to explicitly represent missing values.
+ * In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
+ * allow updating NumericDocValues fields.
+ * In version 4.8, checksum footers were added to the end of each index file for improved data
+ * integrity. Specifically, the last 8 bytes of every index file contain the zlib-crc32
+ * checksum of the file.
+ * In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) that is
+ * suitable for faceting/sorting/analytics.
+ * In version 5.4, DocValues have been improved to store more information on disk: addresses
+ * for binary fields and ord indexes for multi-valued fields.
+ * In version 6.0, Points were added, for multi-dimensional range/distance search.
+ * In version 6.2, new Segment info format that reads/writes the index sort, to support index
+ * sorting.
+ * In version 7.0, DocValues have been improved to better support sparse doc values thanks to
+ * an iterator API.
+ * In version 8.0, postings have been enhanced to record, for each block of doc ids, the (term
+ * freq, normalization factor) pairs that may trigger the maximum score of the block. This
+ * information is recorded alongside skip data in order to be able to skip blocks of doc ids
+ * if they may not produce high enough scores. Additionally doc values and norms has been
+ * extended with jump-tables to make access O(1) instead of O(n), where n is the number of
+ * elements to skip when advancing in the data.
+ * In version 8.4, postings, positions, offsets and payload lengths have move to a more
+ * performant encoding that is vectorized.
+ * In version 8.6, index sort serialization is delegated to the sorts themselves, to allow
+ * user-defined sorts to be used
+ * In version 8.6, points fields split the index tree and leaf data into separate files, to
+ * allow for different access patterns to the different data structures
+ * In version 8.7, stored fields compression became adaptive to better handle documents with
+ * smaller stored fields.
+ * In version 9.0, vector-valued fields were added.
+ * In version 9.1, vector-valued fields were modified to add a graph hierarchy.
+ * In version 9.2, docs of vector-valued fields were moved from .vem to .vec and encoded by
+ * IndexDISI. ordToDoc mappings was added to .vem.
+ * In version 9.5, HNSW graph connections were changed to be delta-encoded with vints.
+ * Additionally, metadata file size improvements were made by delta-encoding nodes by graph
+ * layer and not writing the node ids for the zeroth layer.
+ * In version 9.9, Vector scalar quantization support was added. Allowing the HNSW vector
+ * format to utilize int8 quantized vectors for float32 vector search.
+ * In version 9.12, skip data was refactored to have only two levels: every 128 docs and every
+ * 4,06 docs, and to be inlined in postings lists. This resulted in a speedup for queries that
+ * need skipping, especially conjunctions.
+ *
+ *
+ *
+ *
+ * Limitations
+ *
+ *
+ *
+ *
Lucene uses a Java int
to refer to document numbers, and the index file format
+ * uses an Int32
on-disk to store document numbers. This is a limitation of both the
+ * index file format and the current implementation. Eventually these should be replaced with either
+ * UInt64
values, or better yet, {@link org.apache.lucene.store.DataOutput#writeVInt
+ * VInt} values which have no limit.
+ */
+package org.apache.lucene.codecs.lucene100;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
index a2b2c84e12a..dbd56125fcd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/IndexedDISI.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs.lucene90;
import java.io.DataInput;
import java.io.IOException;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
@@ -439,6 +440,40 @@ public final class IndexedDISI extends DocIdSetIterator {
// ALL variables
int gap;
+ /**
+ * Returns an iterator that delegates to the IndexedDISI. Advancing this iterator will advance the
+ * underlying IndexedDISI, and vice-versa.
+ */
+ public static KnnVectorValues.DocIndexIterator asDocIndexIterator(IndexedDISI disi) {
+ // can we replace with fromDISI?
+ return new KnnVectorValues.DocIndexIterator() {
+ @Override
+ public int docID() {
+ return disi.docID();
+ }
+
+ @Override
+ public int index() {
+ return disi.index();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return disi.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return disi.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return disi.cost();
+ }
+ };
+ }
+
@Override
public int docID() {
return doc;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java
index fd9ec5f9c28..80b98e0a4c5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundFormat.java
@@ -82,9 +82,8 @@ public final class Lucene90CompoundFormat extends CompoundFormat {
public Lucene90CompoundFormat() {}
@Override
- public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context)
- throws IOException {
- return new Lucene90CompoundReader(dir, si, context);
+ public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si) throws IOException {
+ return new Lucene90CompoundReader(dir, si);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java
index ee9c9ae40fa..8f6211bc959 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90CompoundReader.java
@@ -30,6 +30,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.IOUtils;
@@ -56,8 +57,7 @@ final class Lucene90CompoundReader extends CompoundDirectory {
/** Create a new CompoundFileDirectory. */
// TODO: we should just pre-strip "entries" and append segment name up-front like simpletext?
// this need not be a "general purpose" directory anymore (it only writes index files)
- public Lucene90CompoundReader(Directory directory, SegmentInfo si, IOContext context)
- throws IOException {
+ public Lucene90CompoundReader(Directory directory, SegmentInfo si) throws IOException {
this.directory = directory;
this.segmentName = si.name;
String dataFileName =
@@ -75,7 +75,7 @@ final class Lucene90CompoundReader extends CompoundDirectory {
.orElseGet(() -> CodecUtil.indexHeaderLength(Lucene90CompoundFormat.DATA_CODEC, ""))
+ CodecUtil.footerLength();
- handle = directory.openInput(dataFileName, context);
+ handle = directory.openInput(dataFileName, IOContext.DEFAULT.withReadAdvice(ReadAdvice.NORMAL));
try {
CodecUtil.checkIndexHeader(
handle, Lucene90CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
@@ -169,7 +169,7 @@ final class Lucene90CompoundReader extends CompoundDirectory {
+ entries.keySet()
+ ")");
}
- return handle.slice(name, entry.offset, entry.length);
+ return handle.slice(name, entry.offset, entry.length, context.readAdvice());
}
/** Returns an array of strings, one for each file in the directory. */
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index fb8d578acdf..da027a35f17 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -21,6 +21,8 @@ import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_IND
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BaseTermsEnum;
@@ -41,7 +43,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
-import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
@@ -58,12 +59,12 @@ import org.apache.lucene.util.packed.DirectReader;
/** reader for {@link Lucene90DocValuesFormat} */
final class Lucene90DocValuesProducer extends DocValuesProducer {
- private final IntObjectHashMap numerics;
- private final IntObjectHashMap binaries;
- private final IntObjectHashMap sorted;
- private final IntObjectHashMap sortedSets;
- private final IntObjectHashMap sortedNumerics;
- private final IntObjectHashMap skippers;
+ private final Map numerics;
+ private final Map binaries;
+ private final Map sorted;
+ private final Map sortedSets;
+ private final Map sortedNumerics;
+ private final Map skippers;
private final IndexInput data;
private final int maxDoc;
private int version = -1;
@@ -80,12 +81,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
String metaName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
this.maxDoc = state.segmentInfo.maxDoc();
- numerics = new IntObjectHashMap<>();
- binaries = new IntObjectHashMap<>();
- sorted = new IntObjectHashMap<>();
- sortedSets = new IntObjectHashMap<>();
- sortedNumerics = new IntObjectHashMap<>();
- skippers = new IntObjectHashMap<>();
+ numerics = new HashMap<>();
+ binaries = new HashMap<>();
+ sorted = new HashMap<>();
+ sortedSets = new HashMap<>();
+ sortedNumerics = new HashMap<>();
+ skippers = new HashMap<>();
merging = false;
// read in the entries from the metadata file.
@@ -148,12 +149,12 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
// Used for cloning
private Lucene90DocValuesProducer(
- IntObjectHashMap numerics,
- IntObjectHashMap binaries,
- IntObjectHashMap sorted,
- IntObjectHashMap sortedSets,
- IntObjectHashMap sortedNumerics,
- IntObjectHashMap skippers,
+ Map numerics,
+ Map binaries,
+ Map sorted,
+ Map sortedSets,
+ Map sortedNumerics,
+ Map skippers,
IndexInput data,
int maxDoc,
int version,
@@ -193,18 +194,18 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
byte type = meta.readByte();
if (info.docValuesSkipIndexType() != DocValuesSkipIndexType.NONE) {
- skippers.put(info.number, readDocValueSkipperMeta(meta));
+ skippers.put(info.name, readDocValueSkipperMeta(meta));
}
if (type == Lucene90DocValuesFormat.NUMERIC) {
- numerics.put(info.number, readNumeric(meta));
+ numerics.put(info.name, readNumeric(meta));
} else if (type == Lucene90DocValuesFormat.BINARY) {
- binaries.put(info.number, readBinary(meta));
+ binaries.put(info.name, readBinary(meta));
} else if (type == Lucene90DocValuesFormat.SORTED) {
- sorted.put(info.number, readSorted(meta));
+ sorted.put(info.name, readSorted(meta));
} else if (type == Lucene90DocValuesFormat.SORTED_SET) {
- sortedSets.put(info.number, readSortedSet(meta));
+ sortedSets.put(info.name, readSortedSet(meta));
} else if (type == Lucene90DocValuesFormat.SORTED_NUMERIC) {
- sortedNumerics.put(info.number, readSortedNumeric(meta));
+ sortedNumerics.put(info.name, readSortedNumeric(meta));
} else {
throw new CorruptIndexException("invalid type: " + type, meta);
}
@@ -429,7 +430,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
- NumericEntry entry = numerics.get(field.number);
+ NumericEntry entry = numerics.get(field.name);
return getNumeric(entry);
}
@@ -785,13 +786,13 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
- BinaryEntry entry = binaries.get(field.number);
+ BinaryEntry entry = binaries.get(field.name);
if (entry.docsWithFieldOffset == -2) {
return DocValues.emptyBinary();
}
- final IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength);
+ final RandomAccessInput bytesSlice = data.randomAccessSlice(entry.dataOffset, entry.dataLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (bytesSlice.length() > 0) {
@@ -808,8 +809,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public BytesRef binaryValue() throws IOException {
- bytesSlice.seek((long) doc * length);
- bytesSlice.readBytes(bytes.bytes, 0, length);
+ bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
return bytes;
}
};
@@ -831,8 +831,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
public BytesRef binaryValue() throws IOException {
long startOffset = addresses.get(doc);
bytes.length = (int) (addresses.get(doc + 1L) - startOffset);
- bytesSlice.seek(startOffset);
- bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
+ bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
return bytes;
}
};
@@ -855,8 +854,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public BytesRef binaryValue() throws IOException {
- bytesSlice.seek((long) disi.index() * length);
- bytesSlice.readBytes(bytes.bytes, 0, length);
+ bytesSlice.readBytes((long) disi.index() * length, bytes.bytes, 0, length);
return bytes;
}
};
@@ -879,8 +877,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final int index = disi.index();
long startOffset = addresses.get(index);
bytes.length = (int) (addresses.get(index + 1L) - startOffset);
- bytesSlice.seek(startOffset);
- bytesSlice.readBytes(bytes.bytes, 0, bytes.length);
+ bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
return bytes;
}
};
@@ -890,7 +887,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
- SortedEntry entry = sorted.get(field.number);
+ SortedEntry entry = sorted.get(field.name);
return getSorted(entry);
}
@@ -1124,7 +1121,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final IndexInput bytes;
final long blockMask;
final LongValues indexAddresses;
- final IndexInput indexBytes;
+ final RandomAccessInput indexBytes;
final BytesRef term;
long ord = -1;
@@ -1146,7 +1143,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
indexAddresses =
DirectMonotonicReader.getInstance(
entry.termsIndexAddressesMeta, indexAddressesSlice, merging);
- indexBytes = data.slice("terms-index", entry.termsIndexOffset, entry.termsIndexLength);
+ indexBytes = data.randomAccessSlice(entry.termsIndexOffset, entry.termsIndexLength);
term = new BytesRef(entry.maxTermLength);
// add the max term length for the dictionary
@@ -1204,8 +1201,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
assert index >= 0 && index <= (entry.termsDictSize - 1) >>> entry.termsDictIndexShift;
final long start = indexAddresses.get(index);
term.length = (int) (indexAddresses.get(index + 1) - start);
- indexBytes.seek(start);
- indexBytes.readBytes(term.bytes, 0, term.length);
+ indexBytes.readBytes(start, term.bytes, 0, term.length);
return term;
}
@@ -1367,7 +1363,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
- SortedNumericEntry entry = sortedNumerics.get(field.number);
+ SortedNumericEntry entry = sortedNumerics.get(field.name);
return getSortedNumeric(entry);
}
@@ -1512,7 +1508,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
- SortedSetEntry entry = sortedSets.get(field.number);
+ SortedSetEntry entry = sortedSets.get(field.name);
if (entry.singleValueEntry != null) {
return DocValues.singleton(getSorted(entry.singleValueEntry));
}
@@ -1786,7 +1782,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
- final DocValuesSkipperEntry entry = skippers.get(field.number);
+ final DocValuesSkipperEntry entry = skippers.get(field.name);
final IndexInput input = data.slice("doc value skipper", entry.offset, entry.length);
// Prefetch the first page of data. Following pages are expected to get prefetched through
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
index d3f256cbf00..82910e23ab9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@@ -33,9 +33,9 @@ import org.apache.lucene.util.bkd.BKDReader;
/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
- final IndexInput indexIn, dataIn;
- final SegmentReadState readState;
- final IntObjectHashMap readers = new IntObjectHashMap<>();
+ private final IndexInput indexIn, dataIn;
+ private final SegmentReadState readState;
+ private final IntObjectHashMap readers = new IntObjectHashMap<>();
/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
index e50d6a0fdb5..45a946e8ac4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
@@ -253,7 +253,7 @@ public class Lucene90PointsWriter extends PointsWriter {
FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
if (readerFieldInfo != null && readerFieldInfo.getPointDimensionCount() > 0) {
- PointValues aPointValues = reader90.readers.get(readerFieldInfo.number);
+ PointValues aPointValues = reader90.getValues(readerFieldInfo.name);
if (aPointValues != null) {
pointValues.add(aPointValues);
docMaps.add(mergeState.docMaps[i]);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormat.java
index ce0310d6396..9e367a3d9d8 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90StoredFieldsFormat.java
@@ -49,9 +49,9 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
*
*
* // the default: for high performance
- * indexWriterConfig.setCodec(new Lucene912Codec(Mode.BEST_SPEED));
+ * indexWriterConfig.setCodec(new Lucene100Codec(Mode.BEST_SPEED));
* // instead for higher performance (but slower):
- * // indexWriterConfig.setCodec(new Lucene912Codec(Mode.BEST_COMPRESSION));
+ * // indexWriterConfig.setCodec(new Lucene100Codec(Mode.BEST_COMPRESSION));
*
*
* File formats
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java
index 9988c45bdf7..85d23a489fe 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java
@@ -598,7 +598,7 @@ final class SegmentTermsEnumFrame {
startBytePos = suffixesReader.getPosition();
suffixesReader.skipBytes(suffixLength);
- // Loop over bytes in the suffix, comparing to the target
+ // Compare suffix and target.
final int cmp =
Arrays.compareUnsigned(
suffixBytes,
@@ -686,7 +686,7 @@ final class SegmentTermsEnumFrame {
nextEnt = mid + 1;
startBytePos = mid * suffixLength;
- // Binary search bytes in the suffix, comparing to the target.
+ // Compare suffix and target.
cmp =
Arrays.compareUnsigned(
suffixBytes,
@@ -792,6 +792,7 @@ final class SegmentTermsEnumFrame {
lastSubFP = fp - subCode;
}
+ // Compare suffix and target.
final int cmp =
Arrays.compareUnsigned(
suffixBytes,
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java
index 25601388a0f..f13b3cde69c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/ForDeltaUtil.java
@@ -23,7 +23,6 @@ import static org.apache.lucene.codecs.lucene912.ForUtil.*;
import java.io.IOException;
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.packed.PackedInts;
/**
@@ -282,11 +281,6 @@ public final class ForDeltaUtil {
}
}
- void skip(IndexInput in) throws IOException {
- final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
- in.skipBytes(numBytes(bitsPerValue));
- }
-
/** Delta-decode 128 integers into {@code longs}. */
void decodeAndPrefixSum(int bitsPerValue, PostingDecodingUtil pdu, long base, long[] longs)
throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java
index 9c65078cfa9..bdb4dc4db08 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java
@@ -47,7 +47,6 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.internal.vectorization.PostingDecodingUtil;
import org.apache.lucene.internal.vectorization.VectorizationProvider;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
@@ -56,7 +55,6 @@ import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IOUtils;
/**
@@ -67,6 +65,12 @@ import org.apache.lucene.util.IOUtils;
public final class Lucene912PostingsReader extends PostingsReaderBase {
static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
+ // Dummy impacts, composed of the maximum possible term frequency and the lowest possible
+ // (unsigned) norm value. This is typically used on tail blocks, which don't actually record
+ // impacts as the storage overhead would not be worth any query evaluation speedup, since there's
+ // less than 128 docs left to evaluate anyway.
+ private static final List DUMMY_IMPACTS =
+ Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
private final IndexInput docIn;
private final IndexInput posIn;
@@ -77,8 +81,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private final int maxNumImpactsAtLevel1;
private final int maxImpactNumBytesAtLevel1;
- private final int version;
-
/** Sole constructor. */
public Lucene912PostingsReader(SegmentReadState state) throws IOException {
String metaName =
@@ -87,6 +89,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
final long expectedDocFileLength, expectedPosFileLength, expectedPayFileLength;
ChecksumIndexInput metaIn = null;
boolean success = false;
+ int version;
try {
metaIn = state.directory.openChecksumInput(metaName);
version =
@@ -236,13 +239,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
DataInput in, FieldInfo fieldInfo, BlockTermState _termState, boolean absolute)
throws IOException {
final IntBlockTermState termState = (IntBlockTermState) _termState;
- final boolean fieldHasPositions =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- final boolean fieldHasOffsets =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0;
- final boolean fieldHasPayloads = fieldInfo.hasPayloads();
-
if (absolute) {
termState.docStartFP = 0;
termState.posStartFP = 0;
@@ -263,9 +259,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
termState.singletonDocID += BitUtil.zigZagDecode(l >>> 1);
}
- if (fieldHasPositions) {
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) {
termState.posStartFP += in.readVLong();
- if (fieldHasOffsets || fieldHasPayloads) {
+ if (fieldInfo
+ .getIndexOptions()
+ .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
+ >= 0
+ || fieldInfo.hasPayloads()) {
termState.payStartFP += in.readVLong();
}
if (termState.totalTermFreq > BLOCK_SIZE) {
@@ -280,156 +280,115 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
public PostingsEnum postings(
FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags)
throws IOException {
-
- boolean indexHasPositions =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
-
- if (indexHasPositions == false
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0
|| PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
- BlockDocsEnum docsEnum;
- if (reuse instanceof BlockDocsEnum) {
- docsEnum = (BlockDocsEnum) reuse;
- if (!docsEnum.canReuse(docIn, fieldInfo)) {
- docsEnum = new BlockDocsEnum(fieldInfo);
- }
- } else {
- docsEnum = new BlockDocsEnum(fieldInfo);
- }
- return docsEnum.reset((IntBlockTermState) termState, flags);
+ return (reuse instanceof BlockDocsEnum blockDocsEnum
+ && blockDocsEnum.canReuse(docIn, fieldInfo)
+ ? blockDocsEnum
+ : new BlockDocsEnum(fieldInfo))
+ .reset((IntBlockTermState) termState, flags);
} else {
- EverythingEnum everythingEnum;
- if (reuse instanceof EverythingEnum) {
- everythingEnum = (EverythingEnum) reuse;
- if (!everythingEnum.canReuse(docIn, fieldInfo)) {
- everythingEnum = new EverythingEnum(fieldInfo);
- }
- } else {
- everythingEnum = new EverythingEnum(fieldInfo);
- }
- return everythingEnum.reset((IntBlockTermState) termState, flags);
+ return (reuse instanceof EverythingEnum everythingEnum
+ && everythingEnum.canReuse(docIn, fieldInfo)
+ ? everythingEnum
+ : new EverythingEnum(fieldInfo))
+ .reset((IntBlockTermState) termState, flags);
}
}
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags)
throws IOException {
- final boolean indexHasFreqs =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+ final IndexOptions options = fieldInfo.getIndexOptions();
final boolean indexHasPositions =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- if (state.docFreq >= BLOCK_SIZE
- && indexHasFreqs
- && (indexHasPositions == false
- || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
- return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
- }
+ if (state.docFreq >= BLOCK_SIZE) {
+ if (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0
+ && (indexHasPositions == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false)) {
+ return new BlockImpactsDocsEnum(indexHasPositions, (IntBlockTermState) state);
+ }
- final boolean indexHasOffsets =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0;
- final boolean indexHasPayloads = fieldInfo.hasPayloads();
-
- if (state.docFreq >= BLOCK_SIZE
- && indexHasPositions
- && (indexHasOffsets == false
- || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
- && (indexHasPayloads == false
- || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
- return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
+ if (indexHasPositions
+ && (options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0
+ || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false)
+ && (fieldInfo.hasPayloads() == false
+ || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
+ return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
+ }
}
return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
}
- final class BlockDocsEnum extends PostingsEnum {
+ private static long sumOverRange(long[] arr, int start, int end) {
+ long res = 0L;
+ for (int i = start; i < end; i++) {
+ res += arr[i];
+ }
+ return res;
+ }
- final ForUtil forUtil = new ForUtil();
- final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
- final PForUtil pforUtil = new PForUtil(forUtil);
+ private abstract class AbstractPostingsEnum extends PostingsEnum {
- private final long[] docBuffer = new long[BLOCK_SIZE + 1];
- private final long[] freqBuffer = new long[BLOCK_SIZE];
+ protected ForDeltaUtil forDeltaUtil;
+ protected PForUtil pforUtil;
- private int docBufferUpto;
+ protected final long[] docBuffer = new long[BLOCK_SIZE + 1];
+ protected final boolean indexHasFreq;
- final IndexInput startDocIn;
-
- IndexInput docIn;
- PostingDecodingUtil docInUtil;
- final boolean indexHasFreq;
- final boolean indexHasPos;
- final boolean indexHasOffsetsOrPayloads;
-
- private int docFreq; // number of docs in this posting list
- private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
- private int docCountUpto; // number of docs in or before the current block
- private int doc; // doc we last read
- private long prevDocID; // last doc ID of the previous block
+ protected int doc; // doc we last read
// level 0 skip data
- private int level0LastDocID;
+ protected int level0LastDocID;
+
// level 1 skip data
- private int level1LastDocID;
- private long level1DocEndFP;
- private int level1DocCountUpto;
+ protected int level1LastDocID;
+ protected long level1DocEndFP;
+ protected int level1DocCountUpto;
- private boolean needsFreq; // true if the caller actually needs frequencies
- private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
- private long freqFP;
+ protected int docFreq; // number of docs in this posting list
+ protected long
+ totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
- public BlockDocsEnum(FieldInfo fieldInfo) throws IOException {
- this.startDocIn = Lucene912PostingsReader.this.docIn;
- this.docIn = null;
+ protected int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+
+ protected int docCountUpto; // number of docs in or before the current block
+ protected long prevDocID; // last doc ID of the previous block
+
+ protected int docBufferUpto;
+
+ protected IndexInput docIn;
+ protected PostingDecodingUtil docInUtil;
+
+ protected AbstractPostingsEnum(FieldInfo fieldInfo) {
indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- indexHasPos =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- indexHasOffsetsOrPayloads =
- fieldInfo
- .getIndexOptions()
- .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0
- || fieldInfo.hasPayloads();
// We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
// advance()
docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
}
- public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
- return docIn == startDocIn
- && indexHasFreq
- == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0)
- && indexHasPos
- == (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
- >= 0)
- && indexHasOffsetsOrPayloads
- == (fieldInfo
- .getIndexOptions()
- .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0
- || fieldInfo.hasPayloads());
+ @Override
+ public int docID() {
+ return doc;
}
- public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
+ protected void resetIndexInput(IntBlockTermState termState) throws IOException {
docFreq = termState.docFreq;
- totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
singletonDocID = termState.singletonDocID;
if (docFreq > 1) {
if (docIn == null) {
// lazy init
- docIn = startDocIn.clone();
+ docIn = Lucene912PostingsReader.this.docIn.clone();
docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
}
prefetchPostings(docIn, termState);
}
+ }
+ protected PostingsEnum resetIdsAndLevelParams(IntBlockTermState termState) throws IOException {
doc = -1;
- this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
- if (indexHasFreq == false || needsFreq == false) {
- // Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
- // not fill more than `docFreq` entries.
- Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
- }
prevDocID = -1;
docCountUpto = 0;
level0LastDocID = -1;
@@ -444,9 +403,44 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
level1DocCountUpto = 0;
docBufferUpto = BLOCK_SIZE;
- freqFP = -1;
return this;
}
+ }
+
+ final class BlockDocsEnum extends AbstractPostingsEnum {
+
+ private final long[] freqBuffer = new long[BLOCK_SIZE];
+
+ private boolean needsFreq; // true if the caller actually needs frequencies
+ private long freqFP;
+
+ public BlockDocsEnum(FieldInfo fieldInfo) {
+ super(fieldInfo);
+ }
+
+ public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
+ final IndexOptions options = fieldInfo.getIndexOptions();
+ return docIn == Lucene912PostingsReader.this.docIn
+ && indexHasFreq == (options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0);
+ }
+
+ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
+ resetIndexInput(termState);
+ if (pforUtil == null && docFreq >= BLOCK_SIZE) {
+ pforUtil = new PForUtil(new ForUtil());
+ forDeltaUtil = new ForDeltaUtil();
+ }
+ totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq;
+
+ this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS);
+ if (indexHasFreq == false || needsFreq == false) {
+ // Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
+ // not fill more than `docFreq` entries.
+ Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
+ }
+ freqFP = -1;
+ return resetIdsAndLevelParams(termState);
+ }
@Override
public int freq() throws IOException {
@@ -460,30 +454,25 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
@Override
- public int nextPosition() throws IOException {
+ public int nextPosition() {
return -1;
}
@Override
- public int startOffset() throws IOException {
+ public int startOffset() {
return -1;
}
@Override
- public int endOffset() throws IOException {
+ public int endOffset() {
return -1;
}
@Override
- public BytesRef getPayload() throws IOException {
+ public BytesRef getPayload() {
return null;
}
- @Override
- public int docID() {
- return doc;
- }
-
private void refillFullBlock() throws IOException {
assert docFreq - docCountUpto >= BLOCK_SIZE;
@@ -493,7 +482,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (needsFreq) {
freqFP = docIn.getFilePointer();
}
- pforUtil.skip(docIn);
+ PForUtil.skip(docIn);
}
docCountUpto += BLOCK_SIZE;
prevDocID = docBuffer[BLOCK_SIZE - 1];
@@ -531,7 +520,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
- level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level1LastDocID = NO_MORE_DOCS;
break;
}
@@ -567,7 +556,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.skipBytes(readVLong15(docIn));
docCountUpto += BLOCK_SIZE;
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
break;
}
}
@@ -584,7 +573,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
refillFullBlock();
level0LastDocID = (int) docBuffer[BLOCK_SIZE - 1];
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
refillRemainder();
}
}
@@ -627,13 +616,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
- final class EverythingEnum extends PostingsEnum {
+ final class EverythingEnum extends AbstractPostingsEnum {
- final ForUtil forUtil = new ForUtil();
- final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
- final PForUtil pforUtil = new PForUtil(forUtil);
-
- private final long[] docBuffer = new long[BLOCK_SIZE + 1];
private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
@@ -649,30 +633,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private int startOffset;
private int endOffset;
- private int docBufferUpto;
private int posBufferUpto;
- final IndexInput startDocIn;
-
- IndexInput docIn;
- PostingDecodingUtil docInUtil;
final IndexInput posIn;
final PostingDecodingUtil posInUtil;
final IndexInput payIn;
final PostingDecodingUtil payInUtil;
final BytesRef payload;
- final boolean indexHasFreq;
- final boolean indexHasPos;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
final boolean indexHasOffsetsOrPayloads;
- private int docFreq; // number of docs in this posting list
- private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
- private int docCountUpto; // number of docs in or before the current block
- private int doc; // doc we last read
- private long prevDocID; // last doc ID of the previous block
private int freq; // freq we last read
private int position; // current position
@@ -680,28 +652,16 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// skip these to "catch up":
private long posPendingCount;
- // Where this term's postings start in the .pos file:
- private long posTermStartFP;
-
- // Where this term's payloads/offsets start in the .pay
- // file:
- private long payTermStartFP;
-
// File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk
// decode vs vInt decode the block:
private long lastPosBlockFP;
- // level 0 skip data
- private int level0LastDocID;
private long level0PosEndFP;
private int level0BlockPosUpto;
private long level0PayEndFP;
private int level0BlockPayUpto;
- // level 1 skip data
- private int level1LastDocID;
- private long level1DocEndFP;
- private int level1DocCountUpto;
+
private long level1PosEndFP;
private int level1BlockPosUpto;
private long level1PayEndFP;
@@ -710,14 +670,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
private boolean needsOffsets; // true if we actually need offsets
private boolean needsPayloads; // true if we actually need payloads
- private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
-
public EverythingEnum(FieldInfo fieldInfo) throws IOException {
- this.startDocIn = Lucene912PostingsReader.this.docIn;
- this.docIn = null;
- indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- indexHasPos =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ super(fieldInfo);
indexHasOffsets =
fieldInfo
.getIndexOptions()
@@ -754,14 +708,10 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
payloadBytes = null;
payload = null;
}
-
- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
- // advance()
- docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
}
public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
- return docIn == startDocIn
+ return docIn == Lucene912PostingsReader.this.docIn
&& indexHasOffsets
== (fieldInfo
.getIndexOptions()
@@ -771,19 +721,19 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException {
- docFreq = termState.docFreq;
- posTermStartFP = termState.posStartFP;
- payTermStartFP = termState.payStartFP;
- totalTermFreq = termState.totalTermFreq;
- singletonDocID = termState.singletonDocID;
- if (docFreq > 1) {
- if (docIn == null) {
- // lazy init
- docIn = startDocIn.clone();
- docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
- }
- prefetchPostings(docIn, termState);
+ resetIndexInput(termState);
+ if (forDeltaUtil == null && docFreq >= BLOCK_SIZE) {
+ forDeltaUtil = new ForDeltaUtil();
}
+ totalTermFreq = termState.totalTermFreq;
+ if (pforUtil == null && totalTermFreq >= BLOCK_SIZE) {
+ pforUtil = new PForUtil(new ForUtil());
+ }
+ // Where this term's postings start in the .pos file:
+ final long posTermStartFP = termState.posStartFP;
+ // Where this term's payloads/offsets start in the .pay
+ // file:
+ final long payTermStartFP = termState.payStartFP;
posIn.seek(posTermStartFP);
if (indexHasOffsetsOrPayloads) {
payIn.seek(payTermStartFP);
@@ -805,39 +755,20 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
- doc = -1;
- prevDocID = -1;
- docCountUpto = 0;
- level0LastDocID = -1;
- if (docFreq < LEVEL1_NUM_DOCS) {
- level1LastDocID = NO_MORE_DOCS;
- if (docFreq > 1) {
- docIn.seek(termState.docStartFP);
- }
- } else {
- level1LastDocID = -1;
- level1DocEndFP = termState.docStartFP;
- }
- level1DocCountUpto = 0;
level1BlockPosUpto = 0;
level1BlockPayUpto = 0;
level0BlockPosUpto = 0;
level0BlockPayUpto = 0;
- docBufferUpto = BLOCK_SIZE;
posBufferUpto = BLOCK_SIZE;
- return this;
+
+ return resetIdsAndLevelParams(termState);
}
@Override
- public int freq() throws IOException {
+ public int freq() {
return freq;
}
- @Override
- public int docID() {
- return doc;
- }
-
private void refillDocs() throws IOException {
final int left = docFreq - docCountUpto;
assert left >= 0;
@@ -878,7 +809,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
- level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level1LastDocID = NO_MORE_DOCS;
break;
}
@@ -936,7 +867,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0BlockPayUpto = docIn.readVInt();
}
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
}
refillDocs();
@@ -975,9 +906,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
posBufferUpto = BLOCK_SIZE;
} else {
- for (int i = docBufferUpto; i < BLOCK_SIZE; ++i) {
- posPendingCount += freqBuffer[i];
- }
+ posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
}
if (docFreq - docCountUpto >= BLOCK_SIZE) {
@@ -1003,7 +932,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.seek(blockEndFP);
docCountUpto += BLOCK_SIZE;
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
break;
}
}
@@ -1023,9 +952,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
int next = findFirstGreater(docBuffer, target, docBufferUpto);
- for (int i = docBufferUpto; i <= next; ++i) {
- posPendingCount += freqBuffer[i];
- }
+ posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
this.freq = (int) freqBuffer[next];
this.docBufferUpto = next + 1;
position = 0;
@@ -1045,20 +972,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (toSkip < leftInBlock) {
int end = (int) (posBufferUpto + toSkip);
if (indexHasPayloads) {
- for (int i = posBufferUpto; i < end; ++i) {
- payloadByteUpto += payloadLengthBuffer[i];
- }
+ payloadByteUpto += sumOverRange(payloadLengthBuffer, posBufferUpto, end);
}
posBufferUpto = end;
} else {
toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP;
- pforUtil.skip(posIn);
+ PForUtil.skip(posIn);
if (indexHasPayloads) {
// Skip payloadLength block:
- pforUtil.skip(payIn);
+ PForUtil.skip(payIn);
// Skip payloadBytes block:
int numBytes = payIn.readVInt();
@@ -1066,19 +991,16 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
if (indexHasOffsets) {
- pforUtil.skip(payIn);
- pforUtil.skip(payIn);
+ PForUtil.skip(payIn);
+ PForUtil.skip(payIn);
}
toSkip -= BLOCK_SIZE;
}
refillPositions();
payloadByteUpto = 0;
- posBufferUpto = 0;
final int toSkipInt = (int) toSkip;
if (indexHasPayloads) {
- for (int i = 0; i < toSkipInt; ++i) {
- payloadByteUpto += payloadLengthBuffer[i];
- }
+ payloadByteUpto += sumOverRange(payloadLengthBuffer, 0, toSkipInt);
}
posBufferUpto = toSkipInt;
}
@@ -1137,7 +1059,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else {
// this works, because when writing a vint block we always force the first length to be
// written
- pforUtil.skip(payIn); // skip over lengths
+ PForUtil.skip(payIn); // skip over lengths
int numBytes = payIn.readVInt(); // read length of payloadBytes
payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
}
@@ -1151,8 +1073,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else {
// this works, because when writing a vint block we always force the first length to be
// written
- pforUtil.skip(payIn); // skip over starts
- pforUtil.skip(payIn); // skip over lengths
+ PForUtil.skip(payIn); // skip over starts
+ PForUtil.skip(payIn); // skip over lengths
}
}
}
@@ -1217,83 +1139,48 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
- final class BlockImpactsDocsEnum extends ImpactsEnum {
+ private abstract class BlockImpactsEnum extends ImpactsEnum {
- final ForUtil forUtil = new ForUtil();
- final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
- final PForUtil pforUtil = new PForUtil(forUtil);
+ protected final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
+ protected final PForUtil pforUtil = new PForUtil(new ForUtil());
- private final long[] docBuffer = new long[BLOCK_SIZE + 1];
- private final long[] freqBuffer = new long[BLOCK_SIZE];
+ protected final long[] docBuffer = new long[BLOCK_SIZE + 1];
+ protected final long[] freqBuffer = new long[BLOCK_SIZE];
- private int docBufferUpto;
+ protected final int docFreq; // number of docs in this posting list
- final IndexInput startDocIn;
+ protected final IndexInput docIn;
+ protected final PostingDecodingUtil docInUtil;
- final IndexInput docIn;
- final PostingDecodingUtil docInUtil;
- final boolean indexHasFreq;
- final boolean indexHasPos;
- final boolean indexHasOffsetsOrPayloads;
-
- private int docFreq; // number of docs in this posting list
- private int docCountUpto; // number of docs in or before the current block
- private int doc; // doc we last read
- private long prevDocID; // last doc ID of the previous block
- private long freqFP;
+ protected int docCountUpto; // number of docs in or before the current block
+ protected int doc = -1; // doc we last read
+ protected long prevDocID = -1; // last doc ID of the previous block
+ protected int docBufferUpto = BLOCK_SIZE;
// true if we shallow-advanced to a new block that we have not decoded yet
- private boolean needsRefilling;
+ protected boolean needsRefilling;
// level 0 skip data
- private int level0LastDocID;
- private long level0DocEndFP;
- private final BytesRef level0SerializedImpacts;
- private final ByteArrayDataInput level0SerializedImpactsIn = new ByteArrayDataInput();
- private final MutableImpactList level0Impacts;
+ protected int level0LastDocID = -1;
+ protected long level0DocEndFP;
+ protected final BytesRef level0SerializedImpacts;
+ protected final MutableImpactList level0Impacts;
// level 1 skip data
- private int level1LastDocID;
- private long level1DocEndFP;
- private int level1DocCountUpto;
- private final BytesRef level1SerializedImpacts;
- private final ByteArrayDataInput level1SerializedImpactsIn = new ByteArrayDataInput();
- private final MutableImpactList level1Impacts;
+ protected int level1LastDocID;
+ protected long level1DocEndFP;
+ protected int level1DocCountUpto = 0;
+ protected final BytesRef level1SerializedImpacts;
+ protected final MutableImpactList level1Impacts;
- public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
- throws IOException {
- this.startDocIn = Lucene912PostingsReader.this.docIn;
- indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- indexHasPos =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
- indexHasOffsetsOrPayloads =
- fieldInfo
- .getIndexOptions()
- .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0
- || fieldInfo.hasPayloads();
- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
- // advance()
- docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
-
- docFreq = termState.docFreq;
- if (docFreq > 1) {
- docIn = startDocIn.clone();
- docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
- prefetchPostings(docIn, termState);
- } else {
- docIn = null;
- docInUtil = null;
- }
-
- doc = -1;
- if (indexHasFreq == false) {
- // Filling this buffer may not be cheap when doing primary key lookups, so we make sure to
- // not fill more than `docFreq` entries.
- Arrays.fill(freqBuffer, 0, Math.min(ForUtil.BLOCK_SIZE, docFreq), 1);
- }
- prevDocID = -1;
- docCountUpto = 0;
- level0LastDocID = -1;
+ private BlockImpactsEnum(IntBlockTermState termState) throws IOException {
+ this.docFreq = termState.docFreq;
+ this.docIn = Lucene912PostingsReader.this.docIn.clone();
+ this.docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
+ prefetchPostings(docIn, termState);
+ level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
+ level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
+ level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
+ level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
if (docFreq < LEVEL1_NUM_DOCS) {
level1LastDocID = NO_MORE_DOCS;
if (docFreq > 1) {
@@ -1303,13 +1190,89 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1LastDocID = -1;
level1DocEndFP = termState.docStartFP;
}
- level1DocCountUpto = 0;
- docBufferUpto = BLOCK_SIZE;
+ // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+ // advance()
+ docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int startOffset() {
+ return -1;
+ }
+
+ @Override
+ public int endOffset() {
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() {
+ return null;
+ }
+
+ @Override
+ public long cost() {
+ return docFreq;
+ }
+
+ private final Impacts impacts =
+ new Impacts() {
+
+ private final ByteArrayDataInput scratch = new ByteArrayDataInput();
+
+ @Override
+ public int numLevels() {
+ return level1LastDocID == NO_MORE_DOCS ? 1 : 2;
+ }
+
+ @Override
+ public int getDocIdUpTo(int level) {
+ if (level == 0) {
+ return level0LastDocID;
+ }
+ return level == 1 ? level1LastDocID : NO_MORE_DOCS;
+ }
+
+ @Override
+ public List getImpacts(int level) {
+ if (level == 0 && level0LastDocID != NO_MORE_DOCS) {
+ return readImpacts(level0SerializedImpacts, level0Impacts);
+ }
+ if (level == 1) {
+ return readImpacts(level1SerializedImpacts, level1Impacts);
+ }
+ return DUMMY_IMPACTS;
+ }
+
+ private List readImpacts(BytesRef serialized, MutableImpactList impactsList) {
+ var scratch = this.scratch;
+ scratch.reset(serialized.bytes, 0, serialized.length);
+ Lucene912PostingsReader.readImpacts(scratch, impactsList);
+ return impactsList;
+ }
+ };
+
+ @Override
+ public Impacts getImpacts() {
+ return impacts;
+ }
+ }
+
+ final class BlockImpactsDocsEnum extends BlockImpactsEnum {
+ final boolean indexHasPos;
+
+ private long freqFP;
+
+ public BlockImpactsDocsEnum(boolean indexHasPos, IntBlockTermState termState)
+ throws IOException {
+ super(termState);
+ this.indexHasPos = indexHasPos;
freqFP = -1;
- level0SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel0);
- level1SerializedImpacts = new BytesRef(maxImpactNumBytesAtLevel1);
- level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
- level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
}
@Override
@@ -1323,45 +1286,22 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
@Override
- public int nextPosition() throws IOException {
+ public int nextPosition() {
return -1;
}
- @Override
- public int startOffset() throws IOException {
- return -1;
- }
-
- @Override
- public int endOffset() throws IOException {
- return -1;
- }
-
- @Override
- public BytesRef getPayload() throws IOException {
- return null;
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
private void refillDocs() throws IOException {
final int left = docFreq - docCountUpto;
assert left >= 0;
if (left >= BLOCK_SIZE) {
forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
-
- if (indexHasFreq) {
- freqFP = docIn.getFilePointer();
- pforUtil.skip(docIn);
- }
+ freqFP = docIn.getFilePointer();
+ PForUtil.skip(docIn);
docCountUpto += BLOCK_SIZE;
} else {
// Read vInts:
- PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
+ PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
prefixSum(docBuffer, left, prevDocID);
docBuffer[left] = NO_MORE_DOCS;
freqFP = -1;
@@ -1381,7 +1321,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
- level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level1LastDocID = NO_MORE_DOCS;
break;
}
@@ -1425,7 +1365,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.skipBytes(blockLength);
docCountUpto += BLOCK_SIZE;
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
break;
}
}
@@ -1468,7 +1408,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level0SerializedImpacts.length = numImpactBytes;
docIn.seek(skip0End);
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
}
refillDocs();
@@ -1500,109 +1440,22 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docBufferUpto = next + 1;
return doc;
}
-
- @Override
- public Impacts getImpacts() throws IOException {
- return new Impacts() {
-
- @Override
- public int numLevels() {
- int numLevels = 0;
- if (level0LastDocID != NO_MORE_DOCS) {
- numLevels++;
- }
- if (level1LastDocID != NO_MORE_DOCS) {
- numLevels++;
- }
- if (numLevels == 0) {
- numLevels++;
- }
- return numLevels;
- }
-
- @Override
- public int getDocIdUpTo(int level) {
- if (level0LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- return level0LastDocID;
- }
- level--;
- }
-
- if (level1LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- return level1LastDocID;
- }
- level--;
- }
-
- return NO_MORE_DOCS;
- }
-
- @Override
- public List getImpacts(int level) {
- if (level0LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- level0SerializedImpactsIn.reset(
- level0SerializedImpacts.bytes, 0, level0SerializedImpacts.length);
- readImpacts(level0SerializedImpactsIn, level0Impacts);
- return level0Impacts;
- }
- level--;
- }
-
- if (level1LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- level1SerializedImpactsIn.reset(
- level1SerializedImpacts.bytes, 0, level1SerializedImpacts.length);
- readImpacts(level1SerializedImpactsIn, level1Impacts);
- return level1Impacts;
- }
- level--;
- }
-
- return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
- }
- };
- }
-
- @Override
- public long cost() {
- return docFreq;
- }
}
- final class BlockImpactsPostingsEnum extends ImpactsEnum {
-
- final ForUtil forUtil = new ForUtil();
- final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
- final PForUtil pforUtil = new PForUtil(forUtil);
-
- private final long[] docBuffer = new long[BLOCK_SIZE + 1];
- private final long[] freqBuffer = new long[BLOCK_SIZE];
+ final class BlockImpactsPostingsEnum extends BlockImpactsEnum {
private final long[] posDeltaBuffer = new long[BLOCK_SIZE];
- private int docBufferUpto;
private int posBufferUpto;
-
- final IndexInput startDocIn;
-
- final IndexInput docIn;
- final PostingDecodingUtil docInUtil;
final IndexInput posIn;
final PostingDecodingUtil posInUtil;
final boolean indexHasFreq;
- final boolean indexHasPos;
final boolean indexHasOffsets;
final boolean indexHasPayloads;
final boolean indexHasOffsetsOrPayloads;
- private int docFreq; // number of docs in this posting list
- private long totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
- private int docCountUpto; // number of docs in or before the current block
- private int doc; // doc we last read
- private long prevDocID; // last doc ID of the previous block
+ private final long
+ totalTermFreq; // sum of freqBuffer in this posting list (or docFreq when omitted)
private int freq; // freq we last read
private int position; // current position
@@ -1610,70 +1463,37 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
// skip these to "catch up":
private long posPendingCount;
- // Where this term's postings start in the .pos file:
- private long posTermStartFP;
-
// File pointer where the last (vInt encoded) pos delta
// block is. We need this to know whether to bulk
// decode vs vInt decode the block:
- private long lastPosBlockFP;
-
- // true if we shallow-advanced to a new block that we have not decoded yet
- private boolean needsRefilling;
+ private final long lastPosBlockFP;
// level 0 skip data
- private int level0LastDocID;
- private long level0DocEndFP;
private long level0PosEndFP;
private int level0BlockPosUpto;
- private final BytesRefBuilder level0SerializedImpacts = new BytesRefBuilder();
- private final ByteArrayDataInput level0SerializedImpactsIn = new ByteArrayDataInput();
- private final MutableImpactList level0Impacts;
// level 1 skip data
- private int level1LastDocID;
- private long level1DocEndFP;
- private int level1DocCountUpto;
private long level1PosEndFP;
private int level1BlockPosUpto;
- private final BytesRefBuilder level1SerializedImpacts = new BytesRefBuilder();
- private final ByteArrayDataInput level1SerializedImpactsIn = new ByteArrayDataInput();
- private final MutableImpactList level1Impacts;
- private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
+ private final int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
throws IOException {
- this.startDocIn = Lucene912PostingsReader.this.docIn;
- indexHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
- indexHasPos =
- fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ super(termState);
+ final IndexOptions options = fieldInfo.getIndexOptions();
+ indexHasFreq = options.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
indexHasOffsets =
- fieldInfo
- .getIndexOptions()
- .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
- >= 0;
+ options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
indexHasPayloads = fieldInfo.hasPayloads();
indexHasOffsetsOrPayloads = indexHasOffsets || indexHasPayloads;
this.posIn = Lucene912PostingsReader.this.posIn.clone();
posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);
- // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
- // advance()
- docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
-
- docFreq = termState.docFreq;
- posTermStartFP = termState.posStartFP;
+ // Where this term's postings start in the .pos file:
+ final long posTermStartFP = termState.posStartFP;
totalTermFreq = termState.totalTermFreq;
singletonDocID = termState.singletonDocID;
- if (docFreq > 1) {
- docIn = startDocIn.clone();
- docInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(docIn);
- prefetchPostings(docIn, termState);
- } else {
- docIn = null;
- docInUtil = null;
- }
posIn.seek(posTermStartFP);
level1PosEndFP = posTermStartFP;
level0PosEndFP = posTermStartFP;
@@ -1685,40 +1505,15 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
} else {
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
}
-
- doc = -1;
- prevDocID = -1;
- docCountUpto = 0;
- level0LastDocID = -1;
- if (docFreq < LEVEL1_NUM_DOCS) {
- level1LastDocID = NO_MORE_DOCS;
- if (docFreq > 1) {
- docIn.seek(termState.docStartFP);
- }
- } else {
- level1LastDocID = -1;
- level1DocEndFP = termState.docStartFP;
- }
- level1DocCountUpto = 0;
level1BlockPosUpto = 0;
- docBufferUpto = BLOCK_SIZE;
posBufferUpto = BLOCK_SIZE;
- level0SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel0);
- level1SerializedImpacts.growNoCopy(maxImpactNumBytesAtLevel1);
- level0Impacts = new MutableImpactList(maxNumImpactsAtLevel0);
- level1Impacts = new MutableImpactList(maxNumImpactsAtLevel1);
}
@Override
- public int freq() throws IOException {
+ public int freq() {
return freq;
}
- @Override
- public int docID() {
- return doc;
- }
-
private void refillDocs() throws IOException {
final int left = docFreq - docCountUpto;
assert left >= 0;
@@ -1755,7 +1550,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
level1DocCountUpto += LEVEL1_NUM_DOCS;
if (docFreq - docCountUpto < LEVEL1_NUM_DOCS) {
- level1LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level1LastDocID = NO_MORE_DOCS;
break;
}
@@ -1765,8 +1560,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
long skip1EndFP = docIn.readShort() + docIn.getFilePointer();
int numImpactBytes = docIn.readShort();
if (level1LastDocID >= target) {
- docIn.readBytes(level1SerializedImpacts.bytes(), 0, numImpactBytes);
- level1SerializedImpacts.setLength(numImpactBytes);
+ docIn.readBytes(level1SerializedImpacts.bytes, 0, numImpactBytes);
+ level1SerializedImpacts.length = numImpactBytes;
} else {
docIn.skipBytes(numImpactBytes);
}
@@ -1794,9 +1589,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posPendingCount = level0BlockPosUpto;
posBufferUpto = BLOCK_SIZE;
} else {
- for (int i = docBufferUpto; i < BLOCK_SIZE; ++i) {
- posPendingCount += freqBuffer[i];
- }
+ posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
}
if (docFreq - docCountUpto >= BLOCK_SIZE) {
@@ -1809,8 +1602,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
if (target <= level0LastDocID) {
int numImpactBytes = docIn.readVInt();
- docIn.readBytes(level0SerializedImpacts.bytes(), 0, numImpactBytes);
- level0SerializedImpacts.setLength(numImpactBytes);
+ docIn.readBytes(level0SerializedImpacts.bytes, 0, numImpactBytes);
+ level0SerializedImpacts.length = numImpactBytes;
level0PosEndFP += docIn.readVLong();
level0BlockPosUpto = docIn.readByte();
if (indexHasOffsetsOrPayloads) {
@@ -1826,7 +1619,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
docIn.seek(level0DocEndFP);
docCountUpto += BLOCK_SIZE;
} else {
- level0LastDocID = DocIdSetIterator.NO_MORE_DOCS;
+ level0LastDocID = NO_MORE_DOCS;
break;
}
}
@@ -1849,71 +1642,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
- @Override
- public Impacts getImpacts() throws IOException {
- return new Impacts() {
-
- @Override
- public int numLevels() {
- int numLevels = 0;
- if (level0LastDocID != NO_MORE_DOCS) {
- numLevels++;
- }
- if (level1LastDocID != NO_MORE_DOCS) {
- numLevels++;
- }
- if (numLevels == 0) {
- numLevels++;
- }
- return numLevels;
- }
-
- @Override
- public int getDocIdUpTo(int level) {
- if (level0LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- return level0LastDocID;
- }
- level--;
- }
-
- if (level1LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- return level1LastDocID;
- }
- level--;
- }
-
- return NO_MORE_DOCS;
- }
-
- @Override
- public List getImpacts(int level) {
- if (level0LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- level0SerializedImpactsIn.reset(
- level0SerializedImpacts.bytes(), 0, level0SerializedImpacts.length());
- readImpacts(level0SerializedImpactsIn, level0Impacts);
- return level0Impacts;
- }
- level--;
- }
-
- if (level1LastDocID != NO_MORE_DOCS) {
- if (level == 0) {
- level1SerializedImpactsIn.reset(
- level1SerializedImpacts.bytes(), 0, level1SerializedImpacts.length());
- readImpacts(level1SerializedImpactsIn, level1Impacts);
- return level1Impacts;
- }
- level--;
- }
-
- return Collections.singletonList(new Impact(Integer.MAX_VALUE, 1L));
- }
- };
- }
-
@Override
public int nextDoc() throws IOException {
advanceShallow(doc + 1);
@@ -1939,9 +1667,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
int next = findFirstGreater(docBuffer, target, docBufferUpto);
- for (int i = docBufferUpto; i <= next; ++i) {
- posPendingCount += freqBuffer[i];
- }
+ posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
freq = (int) freqBuffer[next];
docBufferUpto = next + 1;
position = 0;
@@ -1962,7 +1688,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
toSkip -= leftInBlock;
while (toSkip >= BLOCK_SIZE) {
assert posIn.getFilePointer() != lastPosBlockFP;
- pforUtil.skip(posIn);
+ PForUtil.skip(posIn);
toSkip -= BLOCK_SIZE;
}
refillPositions();
@@ -2021,26 +1747,6 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
posPendingCount--;
return position;
}
-
- @Override
- public int startOffset() {
- return -1;
- }
-
- @Override
- public int endOffset() {
- return -1;
- }
-
- @Override
- public BytesRef getPayload() {
- return null;
- }
-
- @Override
- public long cost() {
- return docFreq;
- }
}
/**
@@ -2067,7 +1773,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
}
}
- private void prefetchPostings(IndexInput docIn, IntBlockTermState state) throws IOException {
+ private static void prefetchPostings(IndexInput docIn, IntBlockTermState state)
+ throws IOException {
assert state.docFreq > 1; // Singletons are inlined in the terms dict, nothing to prefetch
if (docIn.getFilePointer() != state.docStartFP) {
// Don't prefetch if the input is already positioned at the right offset, which suggests that
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsWriter.java
index 3d493622c05..df34510de07 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsWriter.java
@@ -342,7 +342,7 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
}
@Override
- public void finishDoc() throws IOException {
+ public void finishDoc() {
docBufferUpto++;
docCount++;
@@ -443,7 +443,6 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
private void writeLevel1SkipData() throws IOException {
docOut.writeVInt(docID - level1LastDocID);
- long numImpactBytes = scratchOutput.size();
final long level1End;
if (writeFreqs) {
List impacts = level1CompetitiveFreqNormAccumulator.getCompetitiveFreqNormPairs();
@@ -451,7 +450,7 @@ public class Lucene912PostingsWriter extends PushPostingsWriterBase {
maxNumImpactsAtLevel1 = impacts.size();
}
writeImpacts(impacts, scratchOutput);
- numImpactBytes = scratchOutput.size();
+ long numImpactBytes = scratchOutput.size();
if (numImpactBytes > maxImpactNumBytesAtLevel1) {
maxImpactNumBytesAtLevel1 = Math.toIntExact(numImpactBytes);
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PForUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PForUtil.java
index 798101b6531..3857eabbe44 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PForUtil.java
@@ -121,7 +121,7 @@ final class PForUtil {
}
/** Skip 128 integers. */
- void skip(DataInput in) throws IOException {
+ static void skip(DataInput in) throws IOException {
final int token = Byte.toUnsignedInt(in.readByte());
final int bitsPerValue = token & 0x1f;
final int numExceptions = token >>> 5;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java
index 4834dd73e22..1ae808d308f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/PostingsUtil.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.lucene912;
import java.io.IOException;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.GroupVIntUtil;
/** Utility class to encode/decode postings block. */
final class PostingsUtil {
@@ -35,7 +36,7 @@ final class PostingsUtil {
boolean indexHasFreq,
boolean decodeFreq)
throws IOException {
- docIn.readGroupVInts(docBuffer, num);
+ GroupVIntUtil.readGroupVInts(docIn, docBuffer, num);
if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py
index b3bf493c86b..56c402372a6 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/gen_ForDeltaUtil.py
@@ -308,11 +308,6 @@ public final class ForDeltaUtil {
}
}
- void skip(IndexInput in) throws IOException {
- final int bitsPerValue = Byte.toUnsignedInt(in.readByte());
- in.skipBytes(numBytes(bitsPerValue));
- }
-
"""
def primitive_size_for_bpv(bpv):
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/package-info.java
index 853f86a855a..b9ddb1227b1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/package-info.java
@@ -15,417 +15,5 @@
* limitations under the License.
*/
-/**
- * Lucene 9.12 file format.
- *
- * Apache Lucene - Index File Formats
- *
- *
- *
- * Introduction
- *
- *
- *
- *
This document defines the index file formats used in this version of Lucene. If you are using
- * a different version of Lucene, please consult the copy of docs/
that was distributed
- * with the version you are using.
- *
- *
This document attempts to provide a high-level definition of the Apache Lucene file formats.
- *
- *
- * Definitions
- *
- *
- *
- *
The fundamental concepts in Lucene are index, document, field and term.
- *
- *
An index contains a sequence of documents.
- *
- *
- * A document is a sequence of fields.
- * A field is a named sequence of terms.
- * A term is a sequence of bytes.
- *
- *
- *
The same sequence of bytes in two different fields is considered a different term. Thus terms
- * are represented as a pair: the string naming the field, and the bytes within the field.
- *
- *
Inverted Indexing
- *
- *
Lucene's index stores terms and statistics about those terms in order to make term-based
- * search more efficient. Lucene's terms index falls into the family of indexes known as an
- * inverted index. This is because it can list, for a term, the documents that contain it.
- * This is the inverse of the natural relationship, in which documents list terms.
- *
- *
Types of Fields
- *
- *
In Lucene, fields may be stored , in which case their text is stored in the index
- * literally, in a non-inverted manner. Fields that are inverted are called indexed . A field
- * may be both stored and indexed.
- *
- *
The text of a field may be tokenized into terms to be indexed, or the text of a field
- * may be used literally as a term to be indexed. Most fields are tokenized, but sometimes it is
- * useful for certain identifier fields to be indexed literally.
- *
- *
See the {@link org.apache.lucene.document.Field Field} java docs for more information on
- * Fields.
- *
- *
Segments
- *
- *
Lucene indexes may be composed of multiple sub-indexes, or segments . Each segment is a
- * fully independent index, which could be searched separately. Indexes evolve by:
- *
- *
- * Creating new segments for newly added documents.
- * Merging existing segments.
- *
- *
- *
Searches may involve multiple segments and/or multiple indexes, each index potentially
- * composed of a set of segments.
- *
- *
Document Numbers
- *
- *
Internally, Lucene refers to documents by an integer document number . The first
- * document added to an index is numbered zero, and each subsequent document added gets a number one
- * greater than the previous.
- *
- *
Note that a document's number may change, so caution should be taken when storing these
- * numbers outside of Lucene. In particular, numbers may change in the following situations:
- *
- *
- *
- * The numbers stored in each segment are unique only within the segment, and must be
- * converted before they can be used in a larger context. The standard technique is to
- * allocate each segment a range of values, based on the range of numbers used in that
- * segment. To convert a document number from a segment to an external value, the segment's
- * base document number is added. To convert an external value back to a
- * segment-specific value, the segment is identified by the range that the external value is
- * in, and the segment's base value is subtracted. For example two five document segments
- * might be combined, so that the first segment has a base value of zero, and the second of
- * five. Document three from the second segment would have an external value of eight.
- *
- * When documents are deleted, gaps are created in the numbering. These are eventually
- * removed as the index evolves through merging. Deleted documents are dropped when segments
- * are merged. A freshly-merged segment thus has no gaps in its numbering.
- *
- *
- *
- *
- * Index Structure Overview
- *
- *
- *
- *
Each segment index maintains the following:
- *
- *
- * {@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment info}. This
- * contains metadata about a segment, such as the number of documents, what files it uses, and
- * information about how the segment is sorted
- * {@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Field names}. This
- * contains metadata about the set of named fields used in the index.
- * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Stored Field values}.
- * This contains, for each document, a list of attribute-value pairs, where the attributes are
- * field names. These are used to store auxiliary information about the document, such as its
- * title, url, or an identifier to access a database. The set of stored fields are what is
- * returned for each hit when searching. This is keyed by document number.
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term dictionary}. A
- * dictionary containing all of the terms used in all of the indexed fields of all of the
- * documents. The dictionary also contains the number of documents which contain the term, and
- * pointers to the term's frequency and proximity data.
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Frequency data}. For
- * each term in the dictionary, the numbers of all the documents that contain that term, and
- * the frequency of the term in that document, unless frequencies are omitted ({@link
- * org.apache.lucene.index.IndexOptions#DOCS IndexOptions.DOCS})
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Proximity data}. For
- * each term in the dictionary, the positions that the term occurs in each document. Note that
- * this will not exist if all fields in all documents omit position data.
- * {@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Normalization factors}. For
- * each field in each document, a value is stored that is multiplied into the score for hits
- * on that field.
- * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vectors}. For each
- * field in each document, the term vector (sometimes called document vector) may be stored. A
- * term vector consists of term text and term frequency. To add Term Vectors to your index see
- * the {@link org.apache.lucene.document.Field Field} constructors
- * {@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-document values}. Like
- * stored values, these are also keyed by document number, but are generally intended to be
- * loaded into main memory for fast access. Whereas stored values are generally intended for
- * summary results from searches, per-document values are useful for things like scoring
- * factors.
- * {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live documents}. An
- * optional file indicating which documents are live.
- * {@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}. Optional pair
- * of files, recording dimensionally indexed fields, to enable fast numeric range filtering
- * and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
- * intersection (2D, 3D).
- * {@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}. The
- * vector format stores numeric vectors in a format optimized for random access and
- * computation, supporting high-dimensional nearest-neighbor search.
- *
- *
- *
Details on each of these are provided in their linked pages.
- *
- * File Naming
- *
- *
- *
- *
All files belonging to a segment have the same name with varying extensions. The extensions
- * correspond to the different file formats described below. When using the Compound File format
- * (default for small segments) these files (except for the Segment info file, the Lock file, and
- * Deleted documents file) are collapsed into a single .cfs file (see below for details)
- *
- *
Typically, all segments in an index are stored in a single directory, although this is not
- * required.
- *
- *
File names are never re-used. That is, when any file is saved to the Directory it is given a
- * never before used filename. This is achieved using a simple generations approach. For example,
- * the first segments file is segments_1, then segments_2, etc. The generation is a sequential long
- * integer represented in alpha-numeric (base 36) form.
- *
- * Summary of File Extensions
- *
- *
- *
- *
The following table summarizes the names and extensions of the files in Lucene:
- *
- *
- * lucene filenames by extension
- *
- * Name
- * Extension
- * Brief Description
- *
- *
- * {@link org.apache.lucene.index.SegmentInfos Segments File}
- * segments_N
- * Stores information about a commit point
- *
- *
- * Lock File
- * write.lock
- * The Write lock prevents multiple IndexWriters from writing to the same
- * file.
- *
- *
- * {@link org.apache.lucene.codecs.lucene99.Lucene99SegmentInfoFormat Segment Info}
- * .si
- * Stores metadata about a segment
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90CompoundFormat Compound File}
- * .cfs, .cfe
- * An optional "virtual" file consisting of all the other index files for
- * systems that frequently run out of file handles.
- *
- *
- * {@link org.apache.lucene.codecs.lucene94.Lucene94FieldInfosFormat Fields}
- * .fnm
- * Stores information about the fields
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Index}
- * .fdx
- * Contains pointers to field data
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90StoredFieldsFormat Field Data}
- * .fdt
- * The stored fields for documents
- *
- *
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Dictionary}
- * .tim
- * The term dictionary, stores term info
- *
- *
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Term Index}
- * .tip
- * The index into the Term Dictionary
- *
- *
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Frequencies}
- * .doc
- * Contains the list of docs which contain each term along with frequency
- *
- *
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Positions}
- * .pos
- * Stores position information about where a term occurs in the index
- *
- *
- * {@link org.apache.lucene.codecs.lucene912.Lucene912PostingsFormat Payloads}
- * .pay
- * Stores additional per-position metadata information such as character offsets and user payloads
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90NormsFormat Norms}
- * .nvd, .nvm
- * Encodes length and boost factors for docs and fields
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat Per-Document Values}
- * .dvd, .dvm
- * Encodes additional scoring factors or other per-document information.
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Index}
- * .tvx
- * Stores offset into the document data file
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90TermVectorsFormat Term Vector Data}
- * .tvd
- * Contains term vector data.
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90LiveDocsFormat Live Documents}
- * .liv
- * Info about what documents are live
- *
- *
- * {@link org.apache.lucene.codecs.lucene90.Lucene90PointsFormat Point values}
- * .dii, .dim
- * Holds indexed points
- *
- *
- * {@link org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat Vector values}
- * .vec, .vem, .veq, vex
- * Holds indexed vectors; .vec
files contain the raw vector data,
- * .vem
the vector metadata, .veq
the quantized vector data, and .vex
the
- * hnsw graph data.
- *
- *
- *
- *
- *
- * Lock File
- *
- * The write lock, which is stored in the index directory by default, is named "write.lock". If the
- * lock directory is different from the index directory then the write lock will be named
- * "XXXX-write.lock" where XXXX is a unique prefix derived from the full path to the index
- * directory. When this file is present, a writer is currently modifying the index (adding or
- * removing documents). This lock file ensures that only one writer is modifying the index at a
- * time.
- *
- * History
- *
- * Compatibility notes are provided in this document, describing how file formats have changed
- * from prior versions:
- *
- *
- * In version 2.1, the file format was changed to allow lock-less commits (ie, no more commit
- * lock). The change is fully backwards compatible: you can open a pre-2.1 index for searching
- * or adding/deleting of docs. When the new segments file is saved (committed), it will be
- * written in the new file format (meaning no specific "upgrade" process is needed). But note
- * that once a commit has occurred, pre-2.1 Lucene will not be able to read the index.
- * In version 2.3, the file format was changed to allow segments to share a single set of doc
- * store (vectors & stored fields) files. This allows for faster indexing in certain
- * cases. The change is fully backwards compatible (in the same way as the lock-less commits
- * change in 2.1).
- * In version 2.4, Strings are now written as true UTF-8 byte sequence, not Java's modified
- * UTF-8. See LUCENE-510 for
- * details.
- * In version 2.9, an optional opaque Map<String,String> CommitUserData may be passed to
- * IndexWriter's commit methods (and later retrieved), which is recorded in the segments_N
- * file. See LUCENE-1382 for
- * details. Also, diagnostics were added to each segment written recording details about why
- * it was written (due to flush, merge; which OS/JRE was used; etc.). See issue LUCENE-1654 for details.
- * In version 3.0, compressed fields are no longer written to the index (they can still be
- * read, but on merge the new segment will write them, uncompressed). See issue LUCENE-1960 for details.
- * In version 3.1, segments records the code version that created them. See LUCENE-2720 for details.
- * Additionally segments track explicitly whether or not they have term vectors. See LUCENE-2811 for details.
- * In version 3.2, numeric fields are written as natively to stored fields file, previously
- * they were stored in text format only.
- * In version 3.4, fields can omit position data while still indexing term frequencies.
- * In version 4.0, the format of the inverted index became extensible via the {@link
- * org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage ({@code DocValues})
- * was introduced. Normalization factors need no longer be a single byte, they can be any
- * {@link org.apache.lucene.index.NumericDocValues NumericDocValues}. Terms need not be
- * unicode strings, they can be any byte sequence. Term offsets can optionally be indexed into
- * the postings lists. Payloads can be stored in the term vectors.
- * In version 4.1, the format of the postings list changed to use either of FOR compression or
- * variable-byte encoding, depending upon the frequency of the term. Terms appearing only once
- * were changed to inline directly into the term dictionary. Stored fields are compressed by
- * default.
- * In version 4.2, term vectors are compressed by default. DocValues has a new multi-valued
- * type (SortedSet), that can be used for faceting/grouping/joining on multi-valued fields.
- * In version 4.5, DocValues were extended to explicitly represent missing values.
- * In version 4.6, FieldInfos were extended to support per-field DocValues generation, to
- * allow updating NumericDocValues fields.
- * In version 4.8, checksum footers were added to the end of each index file for improved data
- * integrity. Specifically, the last 8 bytes of every index file contain the zlib-crc32
- * checksum of the file.
- * In version 4.9, DocValues has a new multi-valued numeric type (SortedNumeric) that is
- * suitable for faceting/sorting/analytics.
- * In version 5.4, DocValues have been improved to store more information on disk: addresses
- * for binary fields and ord indexes for multi-valued fields.
- * In version 6.0, Points were added, for multi-dimensional range/distance search.
- * In version 6.2, new Segment info format that reads/writes the index sort, to support index
- * sorting.
- * In version 7.0, DocValues have been improved to better support sparse doc values thanks to
- * an iterator API.
- * In version 8.0, postings have been enhanced to record, for each block of doc ids, the (term
- * freq, normalization factor) pairs that may trigger the maximum score of the block. This
- * information is recorded alongside skip data in order to be able to skip blocks of doc ids
- * if they may not produce high enough scores. Additionally doc values and norms has been
- * extended with jump-tables to make access O(1) instead of O(n), where n is the number of
- * elements to skip when advancing in the data.
- * In version 8.4, postings, positions, offsets and payload lengths have move to a more
- * performant encoding that is vectorized.
- * In version 8.6, index sort serialization is delegated to the sorts themselves, to allow
- * user-defined sorts to be used
- * In version 8.7, stored fields compression became adaptive to better handle documents with
- * smaller stored fields.
- * In version 9.0, vector-valued fields were added.
- * In version 9.1, vector-valued fields were modified to add a graph hierarchy.
- * In version 9.2, docs of vector-valued fields were moved from .vem to .vec and encoded by
- * IndexDISI. ordToDoc mappings was added to .vem.
- * In version 9.5, HNSW graph connections were changed to be delta-encoded with vints.
- * Additionally, metadata file size improvements were made by delta-encoding nodes by graph
- * layer and not writing the node ids for the zeroth layer.
- * In version 9.9, Vector scalar quantization support was added. Allowing the HNSW vector
- * format to utilize int8 quantized vectors for float32 vector search.
- * In version 9.12, skip data was refactored to have only two levels: every 128 docs and every
- * 4,06 docs, and to be inlined in postings lists. This resulted in a speedup for queries that
- * need skipping, especially conjunctions.
- *
- *
- *
- *
- * Limitations
- *
- *
- *
- *
Lucene uses a Java int
to refer to document numbers, and the index file format
- * uses an Int32
on-disk to store document numbers. This is a limitation of both the
- * index file format and the current implementation. Eventually these should be replaced with either
- * UInt64
values, or better yet, {@link org.apache.lucene.store.DataOutput#writeVInt
- * VInt} values which have no limit.
- */
+/** Lucene 9.12 file format. */
package org.apache.lucene.codecs.lucene912;
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/RandomAccessQuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/HasIndexSlice.java
similarity index 57%
rename from lucene/core/src/java/org/apache/lucene/util/quantization/RandomAccessQuantizedByteVectorValues.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene95/HasIndexSlice.java
index b86009a690e..2bfe72386a0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/quantization/RandomAccessQuantizedByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/HasIndexSlice.java
@@ -14,23 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.util.quantization;
+package org.apache.lucene.codecs.lucene95;
-import java.io.IOException;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
+import org.apache.lucene.store.IndexInput;
/**
- * Random access values for byte[]
, but also includes accessing the score correction
- * constant for the current vector in the buffer.
- *
- * @lucene.experimental
+ * Implementors can return the IndexInput from which their values are read. For use by vector
+ * quantizers.
*/
-public interface RandomAccessQuantizedByteVectorValues extends RandomAccessVectorValues.Bytes {
+public interface HasIndexSlice {
- ScalarQuantizer getScalarQuantizer();
-
- float getScoreCorrectionConstant(int vectorOrd) throws IOException;
-
- @Override
- RandomAccessQuantizedByteVectorValues copy() throws IOException;
+ /** Returns an IndexInput from which to read this instance's values. */
+ IndexInput getSlice();
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java
index f45158eadac..1e78c8ea7aa 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java
@@ -29,13 +29,11 @@ import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
-public abstract class OffHeapByteVectorValues extends ByteVectorValues
- implements RandomAccessVectorValues.Bytes {
+public abstract class OffHeapByteVectorValues extends ByteVectorValues implements HasIndexSlice {
protected final int dimension;
protected final int size;
@@ -132,9 +130,6 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
* vector.
*/
public static class DenseOffHeapVectorValues extends OffHeapByteVectorValues {
-
- private int doc = -1;
-
public DenseOffHeapVectorValues(
int dimension,
int size,
@@ -145,36 +140,17 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
super(dimension, size, slice, byteSize, flatVectorsScorer, vectorSimilarityFunction);
}
- @Override
- public byte[] vectorValue() throws IOException {
- return vectorValue(doc);
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- if (target >= size) {
- return doc = NO_MORE_DOCS;
- }
- return doc = target;
- }
-
@Override
public DenseOffHeapVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(
dimension, size, slice.clone(), byteSize, flatVectorsScorer, similarityFunction);
}
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
+
@Override
public Bits getAcceptOrds(Bits acceptDocs) {
return acceptDocs;
@@ -183,17 +159,18 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
@Override
public VectorScorer scorer(byte[] query) throws IOException {
DenseOffHeapVectorValues copy = copy();
+ DocIndexIterator iterator = copy.iterator();
RandomVectorScorer scorer =
flatVectorsScorer.getRandomVectorScorer(similarityFunction, copy, query);
return new VectorScorer() {
@Override
public float score() throws IOException {
- return scorer.score(copy.doc);
+ return scorer.score(iterator.docID());
}
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return iterator;
}
};
}
@@ -238,27 +215,6 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
configuration.size);
}
- @Override
- public byte[] vectorValue() throws IOException {
- return vectorValue(disi.index());
- }
-
- @Override
- public int docID() {
- return disi.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return disi.nextDoc();
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- return disi.advance(target);
- }
-
@Override
public SparseOffHeapVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(
@@ -276,6 +232,11 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
return (int) ordToDoc.get(ord);
}
+ @Override
+ public DocIndexIterator iterator() {
+ return IndexedDISI.asDocIndexIterator(disi);
+ }
+
@Override
public Bits getAcceptOrds(Bits acceptDocs) {
if (acceptDocs == null) {
@@ -307,7 +268,7 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return copy.disi;
}
};
}
@@ -322,8 +283,6 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
super(dimension, 0, null, 0, flatVectorsScorer, vectorSimilarityFunction);
}
- private int doc = -1;
-
@Override
public int dimension() {
return super.dimension();
@@ -335,23 +294,13 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
}
@Override
- public byte[] vectorValue() throws IOException {
+ public byte[] vectorValue(int ord) throws IOException {
throw new UnsupportedOperationException();
}
@Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- return doc = NO_MORE_DOCS;
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
}
@Override
@@ -359,11 +308,6 @@ public abstract class OffHeapByteVectorValues extends ByteVectorValues
throw new UnsupportedOperationException();
}
- @Override
- public byte[] vectorValue(int targetOrd) throws IOException {
- throw new UnsupportedOperationException();
- }
-
@Override
public int ordToDoc(int ord) {
throw new UnsupportedOperationException();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java
index 1f61283b500..2384657e93e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java
@@ -28,13 +28,11 @@ import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
-public abstract class OffHeapFloatVectorValues extends FloatVectorValues
- implements RandomAccessVectorValues.Floats {
+public abstract class OffHeapFloatVectorValues extends FloatVectorValues implements HasIndexSlice {
protected final int dimension;
protected final int size;
@@ -128,8 +126,6 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
*/
public static class DenseOffHeapVectorValues extends OffHeapFloatVectorValues {
- private int doc = -1;
-
public DenseOffHeapVectorValues(
int dimension,
int size,
@@ -140,55 +136,42 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
super(dimension, size, slice, byteSize, flatVectorsScorer, similarityFunction);
}
- @Override
- public float[] vectorValue() throws IOException {
- return vectorValue(doc);
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- if (target >= size) {
- return doc = NO_MORE_DOCS;
- }
- return doc = target;
- }
-
@Override
public DenseOffHeapVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(
dimension, size, slice.clone(), byteSize, flatVectorsScorer, similarityFunction);
}
+ @Override
+ public int ordToDoc(int ord) {
+ return ord;
+ }
+
@Override
public Bits getAcceptOrds(Bits acceptDocs) {
return acceptDocs;
}
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
+
@Override
public VectorScorer scorer(float[] query) throws IOException {
DenseOffHeapVectorValues copy = copy();
+ DocIndexIterator iterator = copy.iterator();
RandomVectorScorer randomVectorScorer =
flatVectorsScorer.getRandomVectorScorer(similarityFunction, copy, query);
return new VectorScorer() {
@Override
public float score() throws IOException {
- return randomVectorScorer.score(copy.doc);
+ return randomVectorScorer.score(iterator.docID());
}
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return iterator;
}
};
}
@@ -227,27 +210,6 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
configuration.size);
}
- @Override
- public float[] vectorValue() throws IOException {
- return vectorValue(disi.index());
- }
-
- @Override
- public int docID() {
- return disi.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return disi.nextDoc();
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- return disi.advance(target);
- }
-
@Override
public SparseOffHeapVectorValues copy() throws IOException {
return new SparseOffHeapVectorValues(
@@ -283,20 +245,26 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
};
}
+ @Override
+ public DocIndexIterator iterator() {
+ return IndexedDISI.asDocIndexIterator(disi);
+ }
+
@Override
public VectorScorer scorer(float[] query) throws IOException {
SparseOffHeapVectorValues copy = copy();
+ DocIndexIterator iterator = copy.iterator();
RandomVectorScorer randomVectorScorer =
flatVectorsScorer.getRandomVectorScorer(similarityFunction, copy, query);
return new VectorScorer() {
@Override
public float score() throws IOException {
- return randomVectorScorer.score(copy.disi.index());
+ return randomVectorScorer.score(iterator.index());
}
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return iterator;
}
};
}
@@ -311,8 +279,6 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
super(dimension, 0, null, 0, flatVectorsScorer, similarityFunction);
}
- private int doc = -1;
-
@Override
public int dimension() {
return super.dimension();
@@ -323,26 +289,6 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
return 0;
}
- @Override
- public float[] vectorValue() throws IOException {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) {
- return doc = NO_MORE_DOCS;
- }
-
@Override
public EmptyOffHeapVectorValues copy() {
throw new UnsupportedOperationException();
@@ -354,8 +300,8 @@ public abstract class OffHeapFloatVectorValues extends FloatVectorValues
}
@Override
- public int ordToDoc(int ord) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java
index 1af68618d83..b731e758b7a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsWriter.java
@@ -39,6 +39,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
@@ -361,11 +362,10 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
private static DocsWithFieldSet writeByteVectorData(
IndexOutput output, ByteVectorValues byteVectorValues) throws IOException {
DocsWithFieldSet docsWithField = new DocsWithFieldSet();
- for (int docV = byteVectorValues.nextDoc();
- docV != NO_MORE_DOCS;
- docV = byteVectorValues.nextDoc()) {
+ KnnVectorValues.DocIndexIterator iter = byteVectorValues.iterator();
+ for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
// write vector
- byte[] binaryValue = byteVectorValues.vectorValue();
+ byte[] binaryValue = byteVectorValues.vectorValue(iter.index());
assert binaryValue.length == byteVectorValues.dimension() * VectorEncoding.BYTE.byteSize;
output.writeBytes(binaryValue, binaryValue.length);
docsWithField.add(docV);
@@ -382,11 +382,10 @@ public final class Lucene99FlatVectorsWriter extends FlatVectorsWriter {
ByteBuffer buffer =
ByteBuffer.allocate(floatVectorValues.dimension() * VectorEncoding.FLOAT32.byteSize)
.order(ByteOrder.LITTLE_ENDIAN);
- for (int docV = floatVectorValues.nextDoc();
- docV != NO_MORE_DOCS;
- docV = floatVectorValues.nextDoc()) {
+ KnnVectorValues.DocIndexIterator iter = floatVectorValues.iterator();
+ for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
// write vector
- float[] value = floatVectorValues.vectorValue();
+ float[] value = floatVectorValues.vectorValue(iter.index());
buffer.asFloatBuffer().put(value);
output.writeBytes(buffer.array(), buffer.limit());
docsWithField.add(docV);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
index dc0fb7184c7..0f4e8196d52 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
@@ -32,14 +32,16 @@ import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
+import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
@@ -54,7 +56,6 @@ import org.apache.lucene.util.hnsw.HnswGraphMerger;
import org.apache.lucene.util.hnsw.IncrementalHnswGraphMerger;
import org.apache.lucene.util.hnsw.NeighborArray;
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.packed.DirectMonotonicWriter;
@@ -359,18 +360,18 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
mergeState.knnVectorsReaders[i], mergeState.docMaps[i], mergeState.liveDocs[i]);
}
}
- DocIdSetIterator mergedVectorIterator = null;
+ KnnVectorValues mergedVectorValues = null;
switch (fieldInfo.getVectorEncoding()) {
case BYTE ->
- mergedVectorIterator =
+ mergedVectorValues =
KnnVectorsWriter.MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
case FLOAT32 ->
- mergedVectorIterator =
+ mergedVectorValues =
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
}
graph =
merger.merge(
- mergedVectorIterator,
+ mergedVectorValues,
segmentWriteState.infoStream,
scorerSupplier.totalVectorCount());
vectorIndexNodeOffsets = writeGraph(graph);
@@ -582,13 +583,13 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
case BYTE ->
scorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
- RandomAccessVectorValues.fromBytes(
+ ByteVectorValues.fromBytes(
(List) flatFieldVectorsWriter.getVectors(),
fieldInfo.getVectorDimension()));
case FLOAT32 ->
scorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
- RandomAccessVectorValues.fromFloats(
+ FloatVectorValues.fromFloats(
(List) flatFieldVectorsWriter.getVectors(),
fieldInfo.getVectorDimension()));
};
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorScorer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorScorer.java
index 8443017d3f9..a4770f01f46 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorScorer.java
@@ -21,12 +21,12 @@ import static org.apache.lucene.codecs.hnsw.ScalarQuantizedVectorScorer.quantize
import java.io.IOException;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.VectorUtil;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizer;
/**
@@ -45,9 +45,9 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityFunction, RandomAccessVectorValues vectorValues)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
throws IOException {
- if (vectorValues instanceof RandomAccessQuantizedByteVectorValues quantizedByteVectorValues) {
+ if (vectorValues instanceof QuantizedByteVectorValues quantizedByteVectorValues) {
return new ScalarQuantizedRandomVectorScorerSupplier(
quantizedByteVectorValues, similarityFunction);
}
@@ -57,11 +57,9 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- float[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, float[] target)
throws IOException {
- if (vectorValues instanceof RandomAccessQuantizedByteVectorValues quantizedByteVectorValues) {
+ if (vectorValues instanceof QuantizedByteVectorValues quantizedByteVectorValues) {
ScalarQuantizer scalarQuantizer = quantizedByteVectorValues.getScalarQuantizer();
byte[] targetBytes = new byte[target.length];
float offsetCorrection =
@@ -79,9 +77,7 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityFunction,
- RandomAccessVectorValues vectorValues,
- byte[] target)
+ VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues, byte[] target)
throws IOException {
return nonQuantizedDelegate.getRandomVectorScorer(similarityFunction, vectorValues, target);
}
@@ -96,7 +92,7 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
float offsetCorrection,
VectorSimilarityFunction sim,
float constMultiplier,
- RandomAccessQuantizedByteVectorValues values) {
+ QuantizedByteVectorValues values) {
return switch (sim) {
case EUCLIDEAN -> new Euclidean(values, constMultiplier, targetBytes);
case COSINE, DOT_PRODUCT ->
@@ -120,7 +116,7 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
byte[] targetBytes,
float offsetCorrection,
float constMultiplier,
- RandomAccessQuantizedByteVectorValues values,
+ QuantizedByteVectorValues values,
FloatToFloatFunction scoreAdjustmentFunction) {
if (values.getScalarQuantizer().getBits() <= 4) {
if (values.getVectorByteLength() != values.dimension() && values.getSlice() != null) {
@@ -137,10 +133,9 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
private static class Euclidean extends RandomVectorScorer.AbstractRandomVectorScorer {
private final float constMultiplier;
private final byte[] targetBytes;
- private final RandomAccessQuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values;
- private Euclidean(
- RandomAccessQuantizedByteVectorValues values, float constMultiplier, byte[] targetBytes) {
+ private Euclidean(QuantizedByteVectorValues values, float constMultiplier, byte[] targetBytes) {
super(values);
this.values = values;
this.constMultiplier = constMultiplier;
@@ -159,13 +154,13 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
/** Calculates dot product on quantized vectors, applying the appropriate corrections */
private static class DotProduct extends RandomVectorScorer.AbstractRandomVectorScorer {
private final float constMultiplier;
- private final RandomAccessQuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values;
private final byte[] targetBytes;
private final float offsetCorrection;
private final FloatToFloatFunction scoreAdjustmentFunction;
public DotProduct(
- RandomAccessQuantizedByteVectorValues values,
+ QuantizedByteVectorValues values,
float constMultiplier,
byte[] targetBytes,
float offsetCorrection,
@@ -193,14 +188,14 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
private static class CompressedInt4DotProduct
extends RandomVectorScorer.AbstractRandomVectorScorer {
private final float constMultiplier;
- private final RandomAccessQuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values;
private final byte[] compressedVector;
private final byte[] targetBytes;
private final float offsetCorrection;
private final FloatToFloatFunction scoreAdjustmentFunction;
private CompressedInt4DotProduct(
- RandomAccessQuantizedByteVectorValues values,
+ QuantizedByteVectorValues values,
float constMultiplier,
byte[] targetBytes,
float offsetCorrection,
@@ -231,13 +226,13 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
private static class Int4DotProduct extends RandomVectorScorer.AbstractRandomVectorScorer {
private final float constMultiplier;
- private final RandomAccessQuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values;
private final byte[] targetBytes;
private final float offsetCorrection;
private final FloatToFloatFunction scoreAdjustmentFunction;
public Int4DotProduct(
- RandomAccessQuantizedByteVectorValues values,
+ QuantizedByteVectorValues values,
float constMultiplier,
byte[] targetBytes,
float offsetCorrection,
@@ -271,13 +266,12 @@ public class Lucene99ScalarQuantizedVectorScorer implements FlatVectorsScorer {
implements RandomVectorScorerSupplier {
private final VectorSimilarityFunction vectorSimilarityFunction;
- private final RandomAccessQuantizedByteVectorValues values;
- private final RandomAccessQuantizedByteVectorValues values1;
- private final RandomAccessQuantizedByteVectorValues values2;
+ private final QuantizedByteVectorValues values;
+ private final QuantizedByteVectorValues values1;
+ private final QuantizedByteVectorValues values2;
public ScalarQuantizedRandomVectorScorerSupplier(
- RandomAccessQuantizedByteVectorValues values,
- VectorSimilarityFunction vectorSimilarityFunction)
+ QuantizedByteVectorValues values, VectorSimilarityFunction vectorSimilarityFunction)
throws IOException {
this.values = values;
this.values1 = values.copy();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
index 40002fe06a6..32eea942e2a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
@@ -135,7 +135,7 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
}
final long quantizedVectorBytes;
- if (fieldEntry.compress) {
+ if (fieldEntry.bits <= 4 && fieldEntry.compress) {
// two dimensions -> one byte
quantizedVectorBytes = ((dimension + 1) >> 1) + Float.BYTES;
} else {
@@ -402,10 +402,10 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
private static final class QuantizedVectorValues extends FloatVectorValues {
private final FloatVectorValues rawVectorValues;
- private final OffHeapQuantizedByteVectorValues quantizedVectorValues;
+ private final QuantizedByteVectorValues quantizedVectorValues;
QuantizedVectorValues(
- FloatVectorValues rawVectorValues, OffHeapQuantizedByteVectorValues quantizedVectorValues) {
+ FloatVectorValues rawVectorValues, QuantizedByteVectorValues quantizedVectorValues) {
this.rawVectorValues = rawVectorValues;
this.quantizedVectorValues = quantizedVectorValues;
}
@@ -421,34 +421,28 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
}
@Override
- public float[] vectorValue() throws IOException {
- return rawVectorValues.vectorValue();
+ public float[] vectorValue(int ord) throws IOException {
+ return rawVectorValues.vectorValue(ord);
}
@Override
- public int docID() {
- return rawVectorValues.docID();
+ public int ordToDoc(int ord) {
+ return rawVectorValues.ordToDoc(ord);
}
@Override
- public int nextDoc() throws IOException {
- int rawDocId = rawVectorValues.nextDoc();
- int quantizedDocId = quantizedVectorValues.nextDoc();
- assert rawDocId == quantizedDocId;
- return quantizedDocId;
- }
-
- @Override
- public int advance(int target) throws IOException {
- int rawDocId = rawVectorValues.advance(target);
- int quantizedDocId = quantizedVectorValues.advance(target);
- assert rawDocId == quantizedDocId;
- return quantizedDocId;
+ public QuantizedVectorValues copy() throws IOException {
+ return new QuantizedVectorValues(rawVectorValues.copy(), quantizedVectorValues.copy());
}
@Override
public VectorScorer scorer(float[] query) throws IOException {
return quantizedVectorValues.scorer(query);
}
+
+ @Override
+ public DocIndexIterator iterator() {
+ return rawVectorValues.iterator();
+ }
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
index bb333ad45c2..1a30b5271cd 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
@@ -19,9 +19,7 @@ package org.apache.lucene.codecs.lucene99;
import static org.apache.lucene.codecs.KnnVectorsWriter.MergedVectorValues.hasVectorValues;
import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
-import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL;
-import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
-import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
+import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.*;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
@@ -45,6 +43,7 @@ import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Sorter;
@@ -653,12 +652,11 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
|| bits <= 4
|| shouldRecomputeQuantiles(mergedQuantiles, quantizationStates)) {
int numVectors = 0;
- FloatVectorValues vectorValues =
- KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
+ DocIdSetIterator iter =
+ KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState)
+ .iterator();
// iterate vectorValues and increment numVectors
- for (int doc = vectorValues.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = vectorValues.nextDoc()) {
+ for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
numVectors++;
}
return buildScalarQuantizer(
@@ -730,11 +728,10 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
? OffHeapQuantizedByteVectorValues.compressedArray(
quantizedByteVectorValues.dimension(), bits)
: null;
- for (int docV = quantizedByteVectorValues.nextDoc();
- docV != NO_MORE_DOCS;
- docV = quantizedByteVectorValues.nextDoc()) {
+ KnnVectorValues.DocIndexIterator iter = quantizedByteVectorValues.iterator();
+ for (int docV = iter.nextDoc(); docV != NO_MORE_DOCS; docV = iter.nextDoc()) {
// write vector
- byte[] binaryValue = quantizedByteVectorValues.vectorValue();
+ byte[] binaryValue = quantizedByteVectorValues.vectorValue(iter.index());
assert binaryValue.length == quantizedByteVectorValues.dimension()
: "dim=" + quantizedByteVectorValues.dimension() + " len=" + binaryValue.length;
if (compressedVector != null) {
@@ -743,7 +740,8 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
} else {
output.writeBytes(binaryValue, binaryValue.length);
}
- output.writeInt(Float.floatToIntBits(quantizedByteVectorValues.getScoreCorrectionConstant()));
+ output.writeInt(
+ Float.floatToIntBits(quantizedByteVectorValues.getScoreCorrectionConstant(iter.index())));
docsWithField.add(docV);
}
return docsWithField;
@@ -855,7 +853,6 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
static class FloatVectorWrapper extends FloatVectorValues {
private final List vectorList;
- protected int curDoc = -1;
FloatVectorWrapper(List vectorList) {
this.vectorList = vectorList;
@@ -872,51 +869,42 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public float[] vectorValue() throws IOException {
- if (curDoc == -1 || curDoc >= vectorList.size()) {
- throw new IOException("Current doc not set or too many iterations");
+ public FloatVectorValues copy() throws IOException {
+ return this;
+ }
+
+ @Override
+ public float[] vectorValue(int ord) throws IOException {
+ if (ord < 0 || ord >= vectorList.size()) {
+ throw new IOException("vector ord " + ord + " out of bounds");
}
- return vectorList.get(curDoc);
+ return vectorList.get(ord);
}
@Override
- public int docID() {
- if (curDoc >= vectorList.size()) {
- return NO_MORE_DOCS;
- }
- return curDoc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- curDoc++;
- return docID();
- }
-
- @Override
- public int advance(int target) throws IOException {
- curDoc = target;
- return docID();
- }
-
- @Override
- public VectorScorer scorer(float[] target) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
}
}
static class QuantizedByteVectorValueSub extends DocIDMerger.Sub {
private final QuantizedByteVectorValues values;
+ private final KnnVectorValues.DocIndexIterator iterator;
QuantizedByteVectorValueSub(MergeState.DocMap docMap, QuantizedByteVectorValues values) {
super(docMap);
this.values = values;
- assert values.docID() == -1;
+ iterator = values.iterator();
+ assert iterator.docID() == -1;
}
@Override
public int nextDoc() throws IOException {
- return values.nextDoc();
+ return iterator.nextDoc();
+ }
+
+ public int index() {
+ return iterator.index();
}
}
@@ -973,7 +961,6 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
private final DocIDMerger docIdMerger;
private final int size;
- private int docId;
private QuantizedByteVectorValueSub current;
private MergedQuantizedVectorValues(
@@ -985,33 +972,16 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
totalSize += sub.values.size();
}
size = totalSize;
- docId = -1;
}
@Override
- public byte[] vectorValue() throws IOException {
- return current.values.vectorValue();
+ public byte[] vectorValue(int ord) throws IOException {
+ return current.values.vectorValue(current.index());
}
@Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- current = docIdMerger.next();
- if (current == null) {
- docId = NO_MORE_DOCS;
- } else {
- docId = current.mappedDocID;
- }
- return docId;
- }
-
- @Override
- public int advance(int target) {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return new CompositeIterator();
}
@Override
@@ -1025,13 +995,51 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public float getScoreCorrectionConstant() throws IOException {
- return current.values.getScoreCorrectionConstant();
+ public float getScoreCorrectionConstant(int ord) throws IOException {
+ return current.values.getScoreCorrectionConstant(current.index());
}
- @Override
- public VectorScorer scorer(float[] target) throws IOException {
- throw new UnsupportedOperationException();
+ private class CompositeIterator extends DocIndexIterator {
+ private int docId;
+ private int ord;
+
+ public CompositeIterator() {
+ docId = -1;
+ ord = -1;
+ }
+
+ @Override
+ public int index() {
+ return ord;
+ }
+
+ @Override
+ public int docID() {
+ return docId;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ current = docIdMerger.next();
+ if (current == null) {
+ docId = NO_MORE_DOCS;
+ ord = NO_MORE_DOCS;
+ } else {
+ docId = current.mappedDocID;
+ ++ord;
+ }
+ return docId;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long cost() {
+ return size;
+ }
}
}
@@ -1039,6 +1047,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
private final FloatVectorValues values;
private final ScalarQuantizer quantizer;
private final byte[] quantizedVector;
+ private int lastOrd = -1;
private float offsetValue = 0f;
private final VectorSimilarityFunction vectorSimilarityFunction;
@@ -1054,7 +1063,14 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public float getScoreCorrectionConstant() {
+ public float getScoreCorrectionConstant(int ord) {
+ if (ord != lastOrd) {
+ throw new IllegalStateException(
+ "attempt to retrieve score correction for different ord "
+ + ord
+ + " than the quantization was done for: "
+ + lastOrd);
+ }
return offsetValue;
}
@@ -1069,41 +1085,31 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public byte[] vectorValue() throws IOException {
+ public byte[] vectorValue(int ord) throws IOException {
+ if (ord != lastOrd) {
+ offsetValue = quantize(ord);
+ lastOrd = ord;
+ }
return quantizedVector;
}
- @Override
- public int docID() {
- return values.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- int doc = values.nextDoc();
- if (doc != NO_MORE_DOCS) {
- quantize();
- }
- return doc;
- }
-
- @Override
- public int advance(int target) throws IOException {
- int doc = values.advance(target);
- if (doc != NO_MORE_DOCS) {
- quantize();
- }
- return doc;
- }
-
@Override
public VectorScorer scorer(float[] target) throws IOException {
throw new UnsupportedOperationException();
}
- private void quantize() throws IOException {
- offsetValue =
- quantizer.quantize(values.vectorValue(), quantizedVector, vectorSimilarityFunction);
+ private float quantize(int ord) throws IOException {
+ return quantizer.quantize(values.vectorValue(ord), quantizedVector, vectorSimilarityFunction);
+ }
+
+ @Override
+ public int ordToDoc(int ord) {
+ return values.ordToDoc(ord);
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ return values.iterator();
}
}
@@ -1160,9 +1166,9 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public float getScoreCorrectionConstant() throws IOException {
+ public float getScoreCorrectionConstant(int ord) throws IOException {
return scalarQuantizer.recalculateCorrectiveOffset(
- in.vectorValue(), oldScalarQuantizer, vectorSimilarityFunction);
+ in.vectorValue(ord), oldScalarQuantizer, vectorSimilarityFunction);
}
@Override
@@ -1176,35 +1182,24 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public byte[] vectorValue() throws IOException {
- return in.vectorValue();
+ public byte[] vectorValue(int ord) throws IOException {
+ return in.vectorValue(ord);
}
@Override
- public int docID() {
- return in.docID();
+ public int ordToDoc(int ord) {
+ return in.ordToDoc(ord);
}
@Override
- public int nextDoc() throws IOException {
- return in.nextDoc();
- }
-
- @Override
- public int advance(int target) throws IOException {
- return in.advance(target);
- }
-
- @Override
- public VectorScorer scorer(float[] target) throws IOException {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return in.iterator();
}
}
static final class NormalizedFloatVectorValues extends FloatVectorValues {
private final FloatVectorValues values;
private final float[] normalizedVector;
- int curDoc = -1;
public NormalizedFloatVectorValues(FloatVectorValues values) {
this.values = values;
@@ -1222,38 +1217,25 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
}
@Override
- public float[] vectorValue() throws IOException {
+ public int ordToDoc(int ord) {
+ return values.ordToDoc(ord);
+ }
+
+ @Override
+ public float[] vectorValue(int ord) throws IOException {
+ System.arraycopy(values.vectorValue(ord), 0, normalizedVector, 0, normalizedVector.length);
+ VectorUtil.l2normalize(normalizedVector);
return normalizedVector;
}
@Override
- public VectorScorer scorer(float[] query) throws IOException {
- throw new UnsupportedOperationException();
+ public DocIndexIterator iterator() {
+ return values.iterator();
}
@Override
- public int docID() {
- return values.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- curDoc = values.nextDoc();
- if (curDoc != NO_MORE_DOCS) {
- System.arraycopy(values.vectorValue(), 0, normalizedVector, 0, normalizedVector.length);
- VectorUtil.l2normalize(normalizedVector);
- }
- return curDoc;
- }
-
- @Override
- public int advance(int target) throws IOException {
- curDoc = values.advance(target);
- if (curDoc != NO_MORE_DOCS) {
- System.arraycopy(values.vectorValue(), 0, normalizedVector, 0, normalizedVector.length);
- VectorUtil.l2normalize(normalizedVector);
- }
- return curDoc;
+ public NormalizedFloatVectorValues copy() throws IOException {
+ return new NormalizedFloatVectorValues(values.copy());
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java
index 655dcca1166..051c926a679 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/OffHeapQuantizedByteVectorValues.java
@@ -30,15 +30,13 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.packed.DirectMonotonicReader;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizer;
/**
* Read the quantized vector values and their score correction values from the index input. This
* supports both iterated and random access.
*/
-public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValues
- implements RandomAccessQuantizedByteVectorValues {
+public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVectorValues {
protected final int dimension;
protected final int size;
@@ -141,11 +139,6 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
return binaryValue;
}
- @Override
- public float getScoreCorrectionConstant() {
- return scoreCorrectionConstant[0];
- }
-
@Override
public float getScoreCorrectionConstant(int targetOrd) throws IOException {
if (lastOrd == targetOrd) {
@@ -213,8 +206,6 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
*/
public static class DenseOffHeapVectorValues extends OffHeapQuantizedByteVectorValues {
- private int doc = -1;
-
public DenseOffHeapVectorValues(
int dimension,
int size,
@@ -226,30 +217,6 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
super(dimension, size, scalarQuantizer, similarityFunction, vectorsScorer, compress, slice);
}
- @Override
- public byte[] vectorValue() throws IOException {
- return vectorValue(doc);
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- if (target >= size) {
- return doc = NO_MORE_DOCS;
- }
- return doc = target;
- }
-
@Override
public DenseOffHeapVectorValues copy() throws IOException {
return new DenseOffHeapVectorValues(
@@ -270,20 +237,26 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
@Override
public VectorScorer scorer(float[] target) throws IOException {
DenseOffHeapVectorValues copy = copy();
+ DocIndexIterator iterator = copy.iterator();
RandomVectorScorer vectorScorer =
vectorsScorer.getRandomVectorScorer(similarityFunction, copy, target);
return new VectorScorer() {
@Override
public float score() throws IOException {
- return vectorScorer.score(copy.doc);
+ return vectorScorer.score(iterator.index());
}
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return iterator;
}
};
}
+
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
}
private static class SparseOffHeapVectorValues extends OffHeapQuantizedByteVectorValues {
@@ -312,24 +285,8 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
}
@Override
- public byte[] vectorValue() throws IOException {
- return vectorValue(disi.index());
- }
-
- @Override
- public int docID() {
- return disi.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- return disi.nextDoc();
- }
-
- @Override
- public int advance(int target) throws IOException {
- assert docID() < target;
- return disi.advance(target);
+ public DocIndexIterator iterator() {
+ return IndexedDISI.asDocIndexIterator(disi);
}
@Override
@@ -372,17 +329,18 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
@Override
public VectorScorer scorer(float[] target) throws IOException {
SparseOffHeapVectorValues copy = copy();
+ DocIndexIterator iterator = copy.iterator();
RandomVectorScorer vectorScorer =
vectorsScorer.getRandomVectorScorer(similarityFunction, copy, target);
return new VectorScorer() {
@Override
public float score() throws IOException {
- return vectorScorer.score(copy.disi.index());
+ return vectorScorer.score(iterator.index());
}
@Override
public DocIdSetIterator iterator() {
- return copy;
+ return iterator;
}
};
}
@@ -404,8 +362,6 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
null);
}
- private int doc = -1;
-
@Override
public int dimension() {
return super.dimension();
@@ -417,23 +373,8 @@ public abstract class OffHeapQuantizedByteVectorValues extends QuantizedByteVect
}
@Override
- public byte[] vectorValue() {
- throw new UnsupportedOperationException();
- }
-
- @Override
- public int docID() {
- return doc;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(doc + 1);
- }
-
- @Override
- public int advance(int target) {
- return doc = NO_MORE_DOCS;
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
index 9350c016f67..2e45e232b5f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldDocValuesFormat.java
@@ -38,7 +38,6 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
-import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.util.IOUtils;
/**
@@ -257,7 +256,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
private static class FieldsReader extends DocValuesProducer {
- private final IntObjectHashMap fields = new IntObjectHashMap<>();
+ private final Map fields = new HashMap<>();
private final Map formats = new HashMap<>();
// clone for merge
@@ -271,10 +270,10 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
}
// Then rebuild fields:
- for (IntObjectHashMap.IntObjectCursor ent : other.fields) {
- DocValuesProducer producer = oldToNew.get(ent.value);
+ for (Map.Entry ent : other.fields.entrySet()) {
+ DocValuesProducer producer = oldToNew.get(ent.getValue());
assert producer != null;
- fields.put(ent.key, producer);
+ fields.put(ent.getKey(), producer);
}
}
@@ -303,7 +302,7 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
segmentSuffix,
format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
}
- fields.put(fi.number, formats.get(segmentSuffix));
+ fields.put(fieldName, formats.get(segmentSuffix));
}
}
}
@@ -317,37 +316,37 @@ public abstract class PerFieldDocValuesFormat extends DocValuesFormat {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getNumeric(field);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getBinary(field);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSorted(field);
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSortedNumeric(field);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
- DocValuesProducer producer = fields.get(field.number);
+ DocValuesProducer producer = fields.get(field.name);
return producer == null ? null : producer.getSkipper(field);
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/ByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/ByteVectorValues.java
index d33ca1ca354..e9be3423c18 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ByteVectorValues.java
@@ -17,8 +17,8 @@
package org.apache.lucene.index;
import java.io.IOException;
+import java.util.List;
import org.apache.lucene.document.KnnByteVectorField;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.VectorScorer;
/**
@@ -27,34 +27,21 @@ import org.apache.lucene.search.VectorScorer;
*
* @lucene.experimental
*/
-public abstract class ByteVectorValues extends DocIdSetIterator {
+public abstract class ByteVectorValues extends KnnVectorValues {
/** Sole constructor */
protected ByteVectorValues() {}
- /** Return the dimension of the vectors */
- public abstract int dimension();
-
/**
- * Return the number of vectors for this field.
- *
- * @return the number of vectors returned by this iterator
- */
- public abstract int size();
-
- @Override
- public final long cost() {
- return size();
- }
-
- /**
- * Return the vector value for the current document ID. It is illegal to call this method when the
- * iterator is not positioned: before advancing, or after failing to advance. The returned array
- * may be shared across calls, re-used, and modified as the iterator advances.
+ * Return the vector value for the given vector ordinal which must be in [0, size() - 1],
+ * otherwise IndexOutOfBoundsException is thrown. The returned array may be shared across calls.
*
* @return the vector value
*/
- public abstract byte[] vectorValue() throws IOException;
+ public abstract byte[] vectorValue(int ord) throws IOException;
+
+ @Override
+ public abstract ByteVectorValues copy() throws IOException;
/**
* Checks the Vector Encoding of a field
@@ -78,12 +65,53 @@ public abstract class ByteVectorValues extends DocIdSetIterator {
}
/**
- * Return a {@link VectorScorer} for the given query vector. The iterator for the scorer is not
- * the same instance as the iterator for this {@link ByteVectorValues}. It is a copy, and
- * iteration over the scorer will not affect the iteration of this {@link ByteVectorValues}.
+ * Return a {@link VectorScorer} for the given query vector.
*
* @param query the query vector
* @return a {@link VectorScorer} instance or null
*/
- public abstract VectorScorer scorer(byte[] query) throws IOException;
+ public VectorScorer scorer(byte[] query) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public VectorEncoding getEncoding() {
+ return VectorEncoding.BYTE;
+ }
+
+ /**
+ * Creates a {@link ByteVectorValues} from a list of byte arrays.
+ *
+ * @param vectors the list of byte arrays
+ * @param dim the dimension of the vectors
+ * @return a {@link ByteVectorValues} instancec
+ */
+ public static ByteVectorValues fromBytes(List vectors, int dim) {
+ return new ByteVectorValues() {
+ @Override
+ public int size() {
+ return vectors.size();
+ }
+
+ @Override
+ public int dimension() {
+ return dim;
+ }
+
+ @Override
+ public byte[] vectorValue(int targetOrd) {
+ return vectors.get(targetOrd);
+ }
+
+ @Override
+ public ByteVectorValues copy() {
+ return this;
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
+ };
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index b8256ecf587..becb00cbb5b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -2760,16 +2760,16 @@ public final class CheckIndex implements Closeable {
CheckIndex.Status.VectorValuesStatus status,
CodecReader codecReader)
throws IOException {
- int docCount = 0;
+ int count = 0;
int everyNdoc = Math.max(values.size() / 64, 1);
- while (values.nextDoc() != NO_MORE_DOCS) {
+ while (count < values.size()) {
// search the first maxNumSearches vectors to exercise the graph
- if (values.docID() % everyNdoc == 0) {
+ if (values.ordToDoc(count) % everyNdoc == 0) {
KnnCollector collector = new TopKnnCollector(10, Integer.MAX_VALUE);
if (vectorsReaderSupportsSearch(codecReader, fieldInfo.name)) {
codecReader
.getVectorReader()
- .search(fieldInfo.name, values.vectorValue(), collector, null);
+ .search(fieldInfo.name, values.vectorValue(count), collector, null);
TopDocs docs = collector.topDocs();
if (docs.scoreDocs.length == 0) {
throw new CheckIndexException(
@@ -2777,7 +2777,7 @@ public final class CheckIndex implements Closeable {
}
}
}
- int valueLength = values.vectorValue().length;
+ int valueLength = values.vectorValue(count).length;
if (valueLength != fieldInfo.getVectorDimension()) {
throw new CheckIndexException(
"Field \""
@@ -2787,19 +2787,19 @@ public final class CheckIndex implements Closeable {
+ " not matching the field's dimension="
+ fieldInfo.getVectorDimension());
}
- ++docCount;
+ ++count;
}
- if (docCount != values.size()) {
+ if (count != values.size()) {
throw new CheckIndexException(
"Field \""
+ fieldInfo.name
+ "\" has size="
+ values.size()
+ " but when iterated, returns "
- + docCount
+ + count
+ " docs with values");
}
- status.totalVectorValues += docCount;
+ status.totalVectorValues += count;
}
private static void checkByteVectorValues(
@@ -2808,21 +2808,23 @@ public final class CheckIndex implements Closeable {
CheckIndex.Status.VectorValuesStatus status,
CodecReader codecReader)
throws IOException {
- int docCount = 0;
+ int count = 0;
int everyNdoc = Math.max(values.size() / 64, 1);
boolean supportsSearch = vectorsReaderSupportsSearch(codecReader, fieldInfo.name);
- while (values.nextDoc() != NO_MORE_DOCS) {
+ while (count < values.size()) {
// search the first maxNumSearches vectors to exercise the graph
- if (supportsSearch && values.docID() % everyNdoc == 0) {
+ if (supportsSearch && values.ordToDoc(count) % everyNdoc == 0) {
KnnCollector collector = new TopKnnCollector(10, Integer.MAX_VALUE);
- codecReader.getVectorReader().search(fieldInfo.name, values.vectorValue(), collector, null);
+ codecReader
+ .getVectorReader()
+ .search(fieldInfo.name, values.vectorValue(count), collector, null);
TopDocs docs = collector.topDocs();
if (docs.scoreDocs.length == 0) {
throw new CheckIndexException(
"Field \"" + fieldInfo.name + "\" failed to search k nearest neighbors");
}
}
- int valueLength = values.vectorValue().length;
+ int valueLength = values.vectorValue(count).length;
if (valueLength != fieldInfo.getVectorDimension()) {
throw new CheckIndexException(
"Field \""
@@ -2832,19 +2834,19 @@ public final class CheckIndex implements Closeable {
+ " not matching the field's dimension="
+ fieldInfo.getVectorDimension());
}
- ++docCount;
+ ++count;
}
- if (docCount != values.size()) {
+ if (count != values.size()) {
throw new CheckIndexException(
"Field \""
+ fieldInfo.name
+ "\" has size="
+ values.size()
+ " but when iterated, returns "
- + docCount
+ + count
+ " docs with values");
}
- status.totalVectorValues += docCount;
+ status.totalVectorValues += count;
}
/**
diff --git a/lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java b/lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java
index ca2cb1a27d4..614a652cd35 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ExitableDirectoryReader.java
@@ -429,37 +429,10 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
}
private class ExitableFloatVectorValues extends FloatVectorValues {
- private int docToCheck;
private final FloatVectorValues vectorValues;
public ExitableFloatVectorValues(FloatVectorValues vectorValues) {
this.vectorValues = vectorValues;
- docToCheck = 0;
- }
-
- @Override
- public int advance(int target) throws IOException {
- final int advance = vectorValues.advance(target);
- if (advance >= docToCheck) {
- checkAndThrow();
- docToCheck = advance + DOCS_BETWEEN_TIMEOUT_CHECK;
- }
- return advance;
- }
-
- @Override
- public int docID() {
- return vectorValues.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- final int nextDoc = vectorValues.nextDoc();
- if (nextDoc >= docToCheck) {
- checkAndThrow();
- docToCheck = nextDoc + DOCS_BETWEEN_TIMEOUT_CHECK;
- }
- return nextDoc;
}
@Override
@@ -468,8 +441,13 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
}
@Override
- public float[] vectorValue() throws IOException {
- return vectorValues.vectorValue();
+ public float[] vectorValue(int ord) throws IOException {
+ return vectorValues.vectorValue(ord);
+ }
+
+ @Override
+ public int ordToDoc(int ord) {
+ return vectorValues.ordToDoc(ord);
}
@Override
@@ -477,61 +455,27 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
return vectorValues.size();
}
+ @Override
+ public DocIndexIterator iterator() {
+ return createExitableIterator(vectorValues.iterator(), queryTimeout);
+ }
+
@Override
public VectorScorer scorer(float[] target) throws IOException {
return vectorValues.scorer(target);
}
- /**
- * Throws {@link ExitingReaderException} if {@link QueryTimeout#shouldExit()} returns true, or
- * if {@link Thread#interrupted()} returns true.
- */
- private void checkAndThrow() {
- if (queryTimeout.shouldExit()) {
- throw new ExitingReaderException(
- "The request took too long to iterate over vector values. Timeout: "
- + queryTimeout.toString()
- + ", FloatVectorValues="
- + in);
- } else if (Thread.interrupted()) {
- throw new ExitingReaderException(
- "Interrupted while iterating over vector values. FloatVectorValues=" + in);
- }
+ @Override
+ public FloatVectorValues copy() {
+ throw new UnsupportedOperationException();
}
}
private class ExitableByteVectorValues extends ByteVectorValues {
- private int docToCheck;
private final ByteVectorValues vectorValues;
public ExitableByteVectorValues(ByteVectorValues vectorValues) {
this.vectorValues = vectorValues;
- docToCheck = 0;
- }
-
- @Override
- public int advance(int target) throws IOException {
- final int advance = vectorValues.advance(target);
- if (advance >= docToCheck) {
- checkAndThrow();
- docToCheck = advance + DOCS_BETWEEN_TIMEOUT_CHECK;
- }
- return advance;
- }
-
- @Override
- public int docID() {
- return vectorValues.docID();
- }
-
- @Override
- public int nextDoc() throws IOException {
- final int nextDoc = vectorValues.nextDoc();
- if (nextDoc >= docToCheck) {
- checkAndThrow();
- docToCheck = nextDoc + DOCS_BETWEEN_TIMEOUT_CHECK;
- }
- return nextDoc;
}
@Override
@@ -545,8 +489,18 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
}
@Override
- public byte[] vectorValue() throws IOException {
- return vectorValues.vectorValue();
+ public byte[] vectorValue(int ord) throws IOException {
+ return vectorValues.vectorValue(ord);
+ }
+
+ @Override
+ public int ordToDoc(int ord) {
+ return vectorValues.ordToDoc(ord);
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ return createExitableIterator(vectorValues.iterator(), queryTimeout);
}
@Override
@@ -554,23 +508,66 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
return vectorValues.scorer(target);
}
- /**
- * Throws {@link ExitingReaderException} if {@link QueryTimeout#shouldExit()} returns true, or
- * if {@link Thread#interrupted()} returns true.
- */
+ @Override
+ public ByteVectorValues copy() {
+ throw new UnsupportedOperationException();
+ }
+ }
+ }
+
+ private static KnnVectorValues.DocIndexIterator createExitableIterator(
+ KnnVectorValues.DocIndexIterator delegate, QueryTimeout queryTimeout) {
+ return new KnnVectorValues.DocIndexIterator() {
+ private int nextCheck;
+
+ @Override
+ public int index() {
+ return delegate.index();
+ }
+
+ @Override
+ public int docID() {
+ return delegate.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ int doc = delegate.nextDoc();
+ if (doc >= nextCheck) {
+ checkAndThrow();
+ nextCheck = doc + ExitableFilterAtomicReader.DOCS_BETWEEN_TIMEOUT_CHECK;
+ }
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return delegate.cost();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ int doc = delegate.advance(target);
+ if (doc >= nextCheck) {
+ checkAndThrow();
+ nextCheck = doc + ExitableFilterAtomicReader.DOCS_BETWEEN_TIMEOUT_CHECK;
+ }
+ return doc;
+ }
+
private void checkAndThrow() {
if (queryTimeout.shouldExit()) {
throw new ExitingReaderException(
- "The request took too long to iterate over vector values. Timeout: "
+ "The request took too long to iterate over knn vector values. Timeout: "
+ queryTimeout.toString()
- + ", ByteVectorValues="
- + in);
+ + ", KnnVectorValues="
+ + delegate);
} else if (Thread.interrupted()) {
throw new ExitingReaderException(
- "Interrupted while iterating over vector values. ByteVectorValues=" + in);
+ "Interrupted while iterating over knn vector values. KnnVectorValues=" + delegate);
}
}
- }
+ };
}
/** Wrapper class for another PointValues implementation that is used by ExitableFields. */
@@ -683,7 +680,7 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
if (queryTimeout.shouldExit()) {
throw new ExitingReaderException(
"The request took too long to intersect point values. Timeout: "
- + queryTimeout.toString()
+ + queryTimeout
+ ", PointValues="
+ pointValues);
} else if (Thread.interrupted()) {
@@ -815,7 +812,7 @@ public class ExitableDirectoryReader extends FilterDirectoryReader {
/** Wrapper class for another Terms implementation that is used by ExitableFields. */
public static class ExitableTerms extends FilterTerms {
- private QueryTimeout queryTimeout;
+ private final QueryTimeout queryTimeout;
/** Constructor * */
public ExitableTerms(Terms terms, QueryTimeout queryTimeout) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/FloatVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/FloatVectorValues.java
index e5dbc620f5c..aa840fc3931 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FloatVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FloatVectorValues.java
@@ -17,8 +17,8 @@
package org.apache.lucene.index;
import java.io.IOException;
+import java.util.List;
import org.apache.lucene.document.KnnFloatVectorField;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.VectorScorer;
/**
@@ -27,34 +27,21 @@ import org.apache.lucene.search.VectorScorer;
*
* @lucene.experimental
*/
-public abstract class FloatVectorValues extends DocIdSetIterator {
+public abstract class FloatVectorValues extends KnnVectorValues {
/** Sole constructor */
protected FloatVectorValues() {}
- /** Return the dimension of the vectors */
- public abstract int dimension();
-
/**
- * Return the number of vectors for this field.
- *
- * @return the number of vectors returned by this iterator
- */
- public abstract int size();
-
- @Override
- public final long cost() {
- return size();
- }
-
- /**
- * Return the vector value for the current document ID. It is illegal to call this method when the
- * iterator is not positioned: before advancing, or after failing to advance. The returned array
- * may be shared across calls, re-used, and modified as the iterator advances.
+ * Return the vector value for the given vector ordinal which must be in [0, size() - 1],
+ * otherwise IndexOutOfBoundsException is thrown. The returned array may be shared across calls.
*
* @return the vector value
*/
- public abstract float[] vectorValue() throws IOException;
+ public abstract float[] vectorValue(int ord) throws IOException;
+
+ @Override
+ public abstract FloatVectorValues copy() throws IOException;
/**
* Checks the Vector Encoding of a field
@@ -79,12 +66,53 @@ public abstract class FloatVectorValues extends DocIdSetIterator {
/**
* Return a {@link VectorScorer} for the given query vector and the current {@link
- * FloatVectorValues}. The iterator for the scorer is not the same instance as the iterator for
- * this {@link FloatVectorValues}. It is a copy, and iteration over the scorer will not affect the
- * iteration of this {@link FloatVectorValues}.
+ * FloatVectorValues}.
*
- * @param query the query vector
+ * @param target the query vector
* @return a {@link VectorScorer} instance or null
*/
- public abstract VectorScorer scorer(float[] query) throws IOException;
+ public VectorScorer scorer(float[] target) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public VectorEncoding getEncoding() {
+ return VectorEncoding.FLOAT32;
+ }
+
+ /**
+ * Creates a {@link FloatVectorValues} from a list of float arrays.
+ *
+ * @param vectors the list of float arrays
+ * @param dim the dimension of the vectors
+ * @return a {@link FloatVectorValues} instance
+ */
+ public static FloatVectorValues fromFloats(List vectors, int dim) {
+ return new FloatVectorValues() {
+ @Override
+ public int size() {
+ return vectors.size();
+ }
+
+ @Override
+ public int dimension() {
+ return dim;
+ }
+
+ @Override
+ public float[] vectorValue(int targetOrd) {
+ return vectors.get(targetOrd);
+ }
+
+ @Override
+ public FloatVectorValues copy() {
+ return this;
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
+ };
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/Impacts.java b/lucene/core/src/java/org/apache/lucene/index/Impacts.java
index 35e8cca5c70..e366b6f6b0b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Impacts.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Impacts.java
@@ -40,7 +40,8 @@ public abstract class Impacts {
/**
* Return impacts on the given level. These impacts are sorted by increasing frequency and
* increasing unsigned norm, and only valid until the doc ID returned by {@link
- * #getDocIdUpTo(int)} for the same level, included. The returned list is never empty. NOTE: There
+ * #getDocIdUpTo(int)} for the same level, included. The returned list is never empty and should
+ * implement {@link java.util.RandomAccess} if it contains more than a single element. NOTE: There
* is no guarantee that these impacts actually appear in postings, only that they trigger scores
* that are greater than or equal to the impacts that actually appear in postings.
*/
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index def9ef06fce..346da8a907e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1255,8 +1255,7 @@ public class IndexWriter
return reader.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
} else if (si.info.getUseCompoundFile()) {
// cfs
- try (Directory cfs =
- codec.compoundFormat().getCompoundReader(si.info.dir, si.info, IOContext.DEFAULT)) {
+ try (Directory cfs = codec.compoundFormat().getCompoundReader(si.info.dir, si.info)) {
return reader.read(cfs, si.info, "", IOContext.READONCE);
}
} else {
diff --git a/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java
new file mode 100644
index 00000000000..8e58f387a33
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/KnnVectorValues.java
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+import org.apache.lucene.document.KnnByteVectorField;
+import org.apache.lucene.document.KnnFloatVectorField;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.Bits;
+
+/**
+ * This class abstracts addressing of document vector values indexed as {@link KnnFloatVectorField}
+ * or {@link KnnByteVectorField}.
+ *
+ * @lucene.experimental
+ */
+public abstract class KnnVectorValues {
+
+ /** Return the dimension of the vectors */
+ public abstract int dimension();
+
+ /**
+ * Return the number of vectors for this field.
+ *
+ * @return the number of vectors returned by this iterator
+ */
+ public abstract int size();
+
+ /**
+ * Return the docid of the document indexed with the given vector ordinal. This default
+ * implementation returns the argument and is appropriate for dense values implementations where
+ * every doc has a single value.
+ */
+ public int ordToDoc(int ord) {
+ return ord;
+ }
+
+ /**
+ * Creates a new copy of this {@link KnnVectorValues}. This is helpful when you need to access
+ * different values at once, to avoid overwriting the underlying vector returned.
+ */
+ public abstract KnnVectorValues copy() throws IOException;
+
+ /** Returns the vector byte length, defaults to dimension multiplied by float byte size */
+ public int getVectorByteLength() {
+ return dimension() * getEncoding().byteSize;
+ }
+
+ /** The vector encoding of these values. */
+ public abstract VectorEncoding getEncoding();
+
+ /** Returns a Bits accepting docs accepted by the argument and having a vector value */
+ public Bits getAcceptOrds(Bits acceptDocs) {
+ // FIXME: change default to return acceptDocs and provide this impl
+ // somewhere more specialized (in every non-dense impl).
+ if (acceptDocs == null) {
+ return null;
+ }
+ return new Bits() {
+ @Override
+ public boolean get(int index) {
+ return acceptDocs.get(ordToDoc(index));
+ }
+
+ @Override
+ public int length() {
+ return size();
+ }
+ };
+ }
+
+ /** Create an iterator for this instance. */
+ public DocIndexIterator iterator() {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * A DocIdSetIterator that also provides an index() method tracking a distinct ordinal for a
+ * vector associated with each doc.
+ */
+ public abstract static class DocIndexIterator extends DocIdSetIterator {
+
+ /** return the value index (aka "ordinal" or "ord") corresponding to the current doc */
+ public abstract int index();
+ }
+
+ /**
+ * Creates an iterator for instances where every doc has a value, and the value ordinals are equal
+ * to the docids.
+ */
+ protected DocIndexIterator createDenseIterator() {
+ return new DocIndexIterator() {
+
+ int doc = -1;
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int index() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (doc >= size() - 1) {
+ return doc = NO_MORE_DOCS;
+ } else {
+ return ++doc;
+ }
+ }
+
+ @Override
+ public int advance(int target) {
+ if (target >= size()) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = target;
+ }
+
+ @Override
+ public long cost() {
+ return size();
+ }
+ };
+ }
+
+ /**
+ * Creates an iterator from a DocIdSetIterator indicating which docs have values, and for which
+ * ordinals increase monotonically with docid.
+ */
+ protected static DocIndexIterator fromDISI(DocIdSetIterator docsWithField) {
+ return new DocIndexIterator() {
+
+ int ord = -1;
+
+ @Override
+ public int docID() {
+ return docsWithField.docID();
+ }
+
+ @Override
+ public int index() {
+ return ord;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (docID() == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ ord++;
+ return docsWithField.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return docsWithField.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return docsWithField.cost();
+ }
+ };
+ }
+
+ /**
+ * Creates an iterator from this instance's ordinal-to-docid mapping which must be monotonic
+ * (docid increases when ordinal does).
+ */
+ protected DocIndexIterator createSparseIterator() {
+ return new DocIndexIterator() {
+ private int ord = -1;
+
+ @Override
+ public int docID() {
+ if (ord == -1) {
+ return -1;
+ }
+ if (ord == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ }
+ return ordToDoc(ord);
+ }
+
+ @Override
+ public int index() {
+ return ord;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ if (ord >= size() - 1) {
+ ord = NO_MORE_DOCS;
+ } else {
+ ++ord;
+ }
+ return docID();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return slowAdvance(target);
+ }
+
+ @Override
+ public long cost() {
+ return size();
+ }
+ };
+ }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafMetaData.java b/lucene/core/src/java/org/apache/lucene/index/LeafMetaData.java
index 4d9f753e2e3..4595560eff8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LeafMetaData.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafMetaData.java
@@ -33,9 +33,9 @@ import org.apache.lucene.util.Version;
* are in no particular order.
* @param hasBlocks Returns true
iff this index contains blocks created with {@link
* IndexWriter#addDocument(Iterable)} or it's corresponding update methods with at least 2 or
- * more documents per call. Note: This property was not recorded before {@link
- * Version#LUCENE_9_9_0} this method will return false for all leaves written before {@link
- * Version#LUCENE_9_9_0}
+ * more documents per call. Note: This property was not recorded before {@link Version
+ * LUCENE_9_9_0} this method will return false for all leaves written before {@link Version
+ * LUCENE_9_9_0}
* @see IndexWriter#updateDocuments(Term, Iterable)
* @see IndexWriter#updateDocuments(Query, Iterable)
* @see IndexWriter#softUpdateDocuments(Term, Iterable, Field...)
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeState.java b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
index dcf9923feb3..838699215f0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@@ -41,7 +41,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
*
* @lucene.experimental
*/
-public class MergeState implements Cloneable {
+public class MergeState {
/** Maps document IDs from old segments to document IDs in the new segment */
public final DocMap[] docMaps;
@@ -302,55 +302,4 @@ public class MergeState implements Cloneable {
this.intraMergeTaskExecutor = intraMergeTaskExecutor;
this.needsIndexSort = needsIndexSort;
}
-
- @Override
- public MergeState clone() {
- StoredFieldsReader[] storedFieldsReaders = this.storedFieldsReaders.clone();
- TermVectorsReader[] termVectorsReaders = this.termVectorsReaders.clone();
- NormsProducer[] normsProducers = this.normsProducers.clone();
- DocValuesProducer[] docValuesProducers = this.docValuesProducers.clone();
- FieldsProducer[] fieldsProducers = this.fieldsProducers.clone();
- PointsReader[] pointsReaders = this.pointsReaders.clone();
- KnnVectorsReader[] knnVectorsReaders = this.knnVectorsReaders.clone();
- for (int i = 0; i < storedFieldsReaders.length; ++i) {
- if (storedFieldsReaders[i] != null) {
- storedFieldsReaders[i] = storedFieldsReaders[i].getMergeInstance();
- }
- if (termVectorsReaders[i] != null) {
- termVectorsReaders[i] = termVectorsReaders[i].getMergeInstance();
- }
- if (normsProducers[i] != null) {
- normsProducers[i] = normsProducers[i].getMergeInstance();
- }
- if (docValuesProducers[i] != null) {
- docValuesProducers[i] = docValuesProducers[i].getMergeInstance();
- }
- if (fieldsProducers[i] != null) {
- fieldsProducers[i] = fieldsProducers[i].getMergeInstance();
- }
- if (pointsReaders[i] != null) {
- pointsReaders[i] = pointsReaders[i].getMergeInstance();
- }
- if (knnVectorsReaders[i] != null) {
- knnVectorsReaders[i] = knnVectorsReaders[i].getMergeInstance();
- }
- }
- return new MergeState(
- docMaps,
- segmentInfo,
- mergeFieldInfos,
- storedFieldsReaders,
- termVectorsReaders,
- normsProducers,
- docValuesProducers,
- fieldInfos,
- liveDocs,
- fieldsProducers,
- pointsReaders,
- knnVectorsReaders,
- maxDocs,
- infoStream,
- intraMergeTaskExecutor,
- needsIndexSort);
- }
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
index 557d31ad441..63c021660c7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PendingSoftDeletes.java
@@ -76,15 +76,14 @@ final class PendingSoftDeletes extends PendingDeletes {
hardDeletes.onNewReader(reader, info);
// only re-calculate this if we haven't seen this generation
if (dvGeneration < info.getDocValuesGen()) {
- final DocIdSetIterator iterator =
- FieldExistsQuery.getDocValuesDocIdSetIterator(field, reader);
- int newDelCount;
- if (iterator
- != null) { // nothing is deleted we don't have a soft deletes field in this segment
- assert info.info.maxDoc() > 0 : "maxDoc is 0";
+ final int newDelCount;
+ var iterator = FieldExistsQuery.getDocValuesDocIdSetIterator(field, reader);
+ if (iterator != null && iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ iterator = FieldExistsQuery.getDocValuesDocIdSetIterator(field, reader);
newDelCount = applySoftDeletes(iterator, getMutableBits());
assert newDelCount >= 0 : " illegal pending delete count: " + newDelCount;
} else {
+ // nothing is deleted we don't have a soft deletes field in this segment
newDelCount = 0;
}
assert info.getSoftDelCount() == newDelCount
@@ -227,12 +226,7 @@ final class PendingSoftDeletes extends PendingDeletes {
// updates always outside of CFS
Closeable toClose;
if (segInfo.getUseCompoundFile()) {
- toClose =
- dir =
- segInfo
- .getCodec()
- .compoundFormat()
- .getCompoundReader(segInfo.dir, segInfo, IOContext.READONCE);
+ toClose = dir = segInfo.getCodec().compoundFormat().getCompoundReader(segInfo.dir, segInfo);
} else {
toClose = null;
dir = segInfo.dir;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
index a29f734ea2f..7da6d77136c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@@ -80,7 +80,7 @@ final class SegmentCoreReaders {
try {
if (si.info.getUseCompoundFile()) {
- cfsDir = cfsReader = codec.compoundFormat().getCompoundReader(dir, si.info, context);
+ cfsDir = cfsReader = codec.compoundFormat().getCompoundReader(dir, si.info);
} else {
cfsReader = null;
cfsDir = dir;
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
index 0f4df818ddc..1d9878fe0db 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentDocValuesProducer.java
@@ -18,10 +18,11 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collections;
+import java.util.HashMap;
import java.util.IdentityHashMap;
+import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.DocValuesProducer;
-import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.internal.hppc.LongArrayList;
import org.apache.lucene.store.Directory;
@@ -31,7 +32,7 @@ import org.apache.lucene.store.Directory;
// producer?
class SegmentDocValuesProducer extends DocValuesProducer {
- final IntObjectHashMap dvProducersByField = new IntObjectHashMap<>();
+ final Map dvProducersByField = new HashMap<>();
final Set dvProducers =
Collections.newSetFromMap(new IdentityHashMap());
final LongArrayList dvGens = new LongArrayList();
@@ -66,7 +67,7 @@ class SegmentDocValuesProducer extends DocValuesProducer {
dvGens.add(docValuesGen);
dvProducers.add(baseProducer);
}
- dvProducersByField.put(fi.number, baseProducer);
+ dvProducersByField.put(fi.name, baseProducer);
} else {
assert !dvGens.contains(docValuesGen);
// otherwise, producer sees only the one fieldinfo it wrote
@@ -75,7 +76,7 @@ class SegmentDocValuesProducer extends DocValuesProducer {
docValuesGen, si, dir, new FieldInfos(new FieldInfo[] {fi}));
dvGens.add(docValuesGen);
dvProducers.add(dvp);
- dvProducersByField.put(fi.number, dvp);
+ dvProducersByField.put(fi.name, dvp);
}
}
} catch (Throwable t) {
@@ -90,42 +91,42 @@ class SegmentDocValuesProducer extends DocValuesProducer {
@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getNumeric(field);
}
@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getBinary(field);
}
@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getSorted(field);
}
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getSortedNumeric(field);
}
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getSortedSet(field);
}
@Override
public DocValuesSkipper getSkipper(FieldInfo field) throws IOException {
- DocValuesProducer dvProducer = dvProducersByField.get(field.number);
+ DocValuesProducer dvProducer = dvProducersByField.get(field.name);
assert dvProducer != null;
return dvProducer.getSkipper(field);
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
index b9f14b4e39c..5e336c7fef0 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -17,9 +17,7 @@
package org.apache.lucene.index;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.List;
-import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.Codec;
@@ -31,7 +29,6 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsWriter;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.TermVectorsWriter;
-import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.InfoStream;
@@ -102,12 +99,7 @@ final class SegmentMerger {
}
private MergeState mergeState() {
- MergeState mergeState = this.mergeState;
- if (Thread.currentThread() != mergeStateCreationThread) {
- // Most merges, e.g. small merges, run in the same thread, so save the cost of pulling a clone
- // in that case.
- mergeState = mergeState.clone();
- }
+ assert Thread.currentThread() == mergeStateCreationThread;
return mergeState;
}
@@ -147,8 +139,6 @@ final class SegmentMerger {
IOContext.DEFAULT,
segmentWriteState.segmentSuffix);
- TaskExecutor taskExecutor = new TaskExecutor(mergeState.intraMergeTaskExecutor);
- List> mergingTasks = new ArrayList<>();
if (mergeState.mergeFieldInfos.hasNorms()) {
mergeWithLogging(this::mergeNorms, segmentWriteState, segmentReadState, "norms", numMerged);
}
@@ -161,12 +151,7 @@ final class SegmentMerger {
}
if (mergeState.mergeFieldInfos.hasPointValues()) {
- mergingTasks.add(
- () -> {
- mergeWithLogging(
- this::mergePoints, segmentWriteState, segmentReadState, "points", numMerged);
- return null;
- });
+ mergeWithLogging(this::mergePoints, segmentWriteState, segmentReadState, "points", numMerged);
}
if (mergeState.mergeFieldInfos.hasVectorValues()) {
@@ -179,14 +164,9 @@ final class SegmentMerger {
}
if (mergeState.mergeFieldInfos.hasTermVectors()) {
- mergingTasks.add(
- () -> {
- mergeWithLogging(this::mergeTermVectors, "term vectors");
- return null;
- });
+ mergeWithLogging(this::mergeTermVectors, "term vectors");
}
- taskExecutor.invokeAll(mergingTasks);
// write the merged infos
mergeWithLogging(
this::mergeFieldInfos, segmentWriteState, segmentReadState, "field infos", numMerged);
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java
index 148ead9cb2e..69d557d270a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java
@@ -20,8 +20,10 @@ import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Objects;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
@@ -32,10 +34,7 @@ import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
-import org.apache.lucene.internal.hppc.IntObjectHashMap;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.KnnCollector;
-import org.apache.lucene.search.VectorScorer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
@@ -302,38 +301,21 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
}
}
- private record DocValuesSub(T sub, int docStart, int docEnd) {}
+ private record DocValuesSub(T sub, int docStart, int ordStart) {}
- private static class MergedDocIdSetIterator extends DocIdSetIterator {
+ private static class MergedDocIterator
+ extends KnnVectorValues.DocIndexIterator {
final Iterator> it;
- final long cost;
DocValuesSub current;
- int currentIndex = 0;
+ KnnVectorValues.DocIndexIterator currentIterator;
+ int ord = -1;
int doc = -1;
- MergedDocIdSetIterator(List> subs) {
- long cost = 0;
- for (DocValuesSub sub : subs) {
- if (sub.sub != null) {
- cost += sub.sub.cost();
- }
- }
- this.cost = cost;
+ MergedDocIterator(List> subs) {
this.it = subs.iterator();
current = it.next();
- }
-
- private boolean advanceSub(int target) {
- while (current.sub == null || current.docEnd <= target) {
- if (it.hasNext() == false) {
- doc = NO_MORE_DOCS;
- return false;
- }
- current = it.next();
- currentIndex++;
- }
- return true;
+ currentIterator = currentIterator();
}
@Override
@@ -341,41 +323,47 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
return doc;
}
+ @Override
+ public int index() {
+ return ord;
+ }
+
@Override
public int nextDoc() throws IOException {
while (true) {
if (current.sub != null) {
- int next = current.sub.nextDoc();
+ int next = currentIterator.nextDoc();
if (next != NO_MORE_DOCS) {
+ ++ord;
return doc = current.docStart + next;
}
}
if (it.hasNext() == false) {
+ ord = NO_MORE_DOCS;
return doc = NO_MORE_DOCS;
}
current = it.next();
- currentIndex++;
+ currentIterator = currentIterator();
+ ord = current.ordStart - 1;
}
}
- @Override
- public int advance(int target) throws IOException {
- while (true) {
- if (advanceSub(target) == false) {
- return DocIdSetIterator.NO_MORE_DOCS;
- }
- int next = current.sub.advance(target - current.docStart);
- if (next == DocIdSetIterator.NO_MORE_DOCS) {
- target = current.docEnd;
- } else {
- return doc = current.docStart + next;
- }
+ private KnnVectorValues.DocIndexIterator currentIterator() {
+ if (current.sub != null) {
+ return current.sub.iterator();
+ } else {
+ return null;
}
}
@Override
public long cost() {
- return cost;
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ throw new UnsupportedOperationException();
}
}
@@ -389,7 +377,7 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
private final CodecReader[] codecReaders;
private final DocValuesProducer[] producers;
private final int[] docStarts;
- private final IntObjectHashMap cachedOrdMaps = new IntObjectHashMap<>();
+ private final Map cachedOrdMaps = new HashMap<>();
SlowCompositeDocValuesProducerWrapper(CodecReader[] codecReaders, int[] docStarts) {
this.codecReaders = codecReaders;
@@ -428,14 +416,14 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
public SortedDocValues getSorted(FieldInfo field) throws IOException {
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
- map = cachedOrdMaps.get(field.number);
+ map = cachedOrdMaps.get(field.name);
if (map == null) {
// uncached, or not a multi dv
SortedDocValues dv =
MultiDocValues.getSortedValues(new MultiReader(codecReaders), field.name);
if (dv instanceof MultiSortedDocValues) {
map = ((MultiSortedDocValues) dv).mapping;
- cachedOrdMaps.put(field.number, map);
+ cachedOrdMaps.put(field.name, map);
}
return dv;
}
@@ -464,14 +452,14 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
OrdinalMap map = null;
synchronized (cachedOrdMaps) {
- map = cachedOrdMaps.get(field.number);
+ map = cachedOrdMaps.get(field.name);
if (map == null) {
// uncached, or not a multi dv
SortedSetDocValues dv =
MultiDocValues.getSortedSetValues(new MultiReader(codecReaders), field.name);
if (dv instanceof MultiSortedSetDocValues) {
map = ((MultiSortedSetDocValues) dv).mapping;
- cachedOrdMaps.put(field.number, map);
+ cachedOrdMaps.put(field.name, map);
}
return dv;
}
@@ -847,55 +835,75 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
int size = 0;
for (CodecReader reader : codecReaders) {
FloatVectorValues values = reader.getFloatVectorValues(field);
+ subs.add(new DocValuesSub<>(values, docStarts[i], size));
if (values != null) {
if (dimension == -1) {
dimension = values.dimension();
}
size += values.size();
}
- subs.add(new DocValuesSub<>(values, docStarts[i], docStarts[i + 1]));
i++;
}
- final int finalDimension = dimension;
- final int finalSize = size;
- MergedDocIdSetIterator mergedIterator = new MergedDocIdSetIterator<>(subs);
- return new FloatVectorValues() {
+ return new MergedFloatVectorValues(dimension, size, subs);
+ }
- @Override
- public int dimension() {
- return finalDimension;
- }
+ class MergedFloatVectorValues extends FloatVectorValues {
+ final int dimension;
+ final int size;
+ final DocValuesSub>[] subs;
+ final MergedDocIterator iter;
+ final int[] starts;
+ int lastSubIndex;
- @Override
- public int size() {
- return finalSize;
+ MergedFloatVectorValues(int dimension, int size, List> subs) {
+ this.dimension = dimension;
+ this.size = size;
+ this.subs = subs.toArray(new DocValuesSub>[0]);
+ iter = new MergedDocIterator<>(subs);
+ // [0, start(1), ..., size] - we want the extra element
+ // to avoid checking for out-of-array bounds
+ starts = new int[subs.size() + 1];
+ for (int i = 0; i < subs.size(); i++) {
+ starts[i] = subs.get(i).ordStart;
}
+ starts[starts.length - 1] = size;
+ }
- @Override
- public float[] vectorValue() throws IOException {
- return mergedIterator.current.sub.vectorValue();
- }
+ @Override
+ public MergedDocIterator iterator() {
+ return iter;
+ }
- @Override
- public int docID() {
- return mergedIterator.docID();
- }
+ @Override
+ public int dimension() {
+ return dimension;
+ }
- @Override
- public int nextDoc() throws IOException {
- return mergedIterator.nextDoc();
- }
+ @Override
+ public int size() {
+ return size;
+ }
- @Override
- public int advance(int target) throws IOException {
- return mergedIterator.advance(target);
+ @SuppressWarnings("unchecked")
+ @Override
+ public FloatVectorValues copy() throws IOException {
+ List> subsCopy = new ArrayList<>();
+ for (Object sub : subs) {
+ subsCopy.add((DocValuesSub) sub);
}
+ return new MergedFloatVectorValues(dimension, size, subsCopy);
+ }
- @Override
- public VectorScorer scorer(float[] target) {
- throw new UnsupportedOperationException();
- }
- };
+ @Override
+ public float[] vectorValue(int ord) throws IOException {
+ assert ord >= 0 && ord < size;
+ // We need to implement fully random-access API here in order to support callers like
+ // SortingCodecReader that rely on it.
+ lastSubIndex = findSub(ord, lastSubIndex, starts);
+ assert subs[lastSubIndex].sub != null;
+ return ((FloatVectorValues) subs[lastSubIndex].sub)
+ .vectorValue(ord - subs[lastSubIndex].ordStart);
+ }
}
@Override
@@ -906,55 +914,101 @@ final class SlowCompositeCodecReaderWrapper extends CodecReader {
int size = 0;
for (CodecReader reader : codecReaders) {
ByteVectorValues values = reader.getByteVectorValues(field);
+ subs.add(new DocValuesSub<>(values, docStarts[i], size));
if (values != null) {
if (dimension == -1) {
dimension = values.dimension();
}
size += values.size();
}
- subs.add(new DocValuesSub<>(values, docStarts[i], docStarts[i + 1]));
i++;
}
- final int finalDimension = dimension;
- final int finalSize = size;
- MergedDocIdSetIterator mergedIterator = new MergedDocIdSetIterator<>(subs);
- return new ByteVectorValues() {
+ return new MergedByteVectorValues(dimension, size, subs);
+ }
- @Override
- public int dimension() {
- return finalDimension;
- }
+ class MergedByteVectorValues extends ByteVectorValues {
+ final int dimension;
+ final int size;
+ final DocValuesSub>[] subs;
+ final MergedDocIterator iter;
+ final int[] starts;
+ int lastSubIndex;
- @Override
- public int size() {
- return finalSize;
+ MergedByteVectorValues(int dimension, int size, List> subs) {
+ this.dimension = dimension;
+ this.size = size;
+ this.subs = subs.toArray(new DocValuesSub>[0]);
+ iter = new MergedDocIterator<>(subs);
+ // [0, start(1), ..., size] - we want the extra element
+ // to avoid checking for out-of-array bounds
+ starts = new int[subs.size() + 1];
+ for (int i = 0; i < subs.size(); i++) {
+ starts[i] = subs.get(i).ordStart;
}
+ starts[starts.length - 1] = size;
+ }
- @Override
- public byte[] vectorValue() throws IOException {
- return mergedIterator.current.sub.vectorValue();
- }
+ @Override
+ public MergedDocIterator iterator() {
+ return iter;
+ }
- @Override
- public int docID() {
- return mergedIterator.docID();
- }
+ @Override
+ public int dimension() {
+ return dimension;
+ }
- @Override
- public int nextDoc() throws IOException {
- return mergedIterator.nextDoc();
- }
+ @Override
+ public int size() {
+ return size;
+ }
- @Override
- public int advance(int target) throws IOException {
- return mergedIterator.advance(target);
- }
+ @Override
+ public byte[] vectorValue(int ord) throws IOException {
+ assert ord >= 0 && ord < size;
+ // We need to implement fully random-access API here in order to support callers like
+ // SortingCodecReader that rely on it. We maintain lastSubIndex since we expect some
+ // repetition.
+ lastSubIndex = findSub(ord, lastSubIndex, starts);
+ return ((ByteVectorValues) subs[lastSubIndex].sub)
+ .vectorValue(ord - subs[lastSubIndex].ordStart);
+ }
- @Override
- public VectorScorer scorer(byte[] target) {
- throw new UnsupportedOperationException();
+ @SuppressWarnings("unchecked")
+ @Override
+ public ByteVectorValues copy() throws IOException {
+ List> newSubs = new ArrayList<>();
+ for (Object sub : subs) {
+ newSubs.add((DocValuesSub) sub);
}
- };
+ return new MergedByteVectorValues(dimension, size, newSubs);
+ }
+ }
+
+ private static int findSub(int ord, int lastSubIndex, int[] starts) {
+ if (ord >= starts[lastSubIndex]) {
+ if (ord >= starts[lastSubIndex + 1]) {
+ return binarySearchStarts(starts, ord, lastSubIndex + 1, starts.length);
+ }
+ } else {
+ return binarySearchStarts(starts, ord, 0, lastSubIndex);
+ }
+ return lastSubIndex;
+ }
+
+ private static int binarySearchStarts(int[] starts, int ord, int from, int to) {
+ int pos = Arrays.binarySearch(starts, from, to, ord);
+ if (pos < 0) {
+ // subtract one since binarySearch returns an *insertion point*
+ return -2 - pos;
+ } else {
+ while (pos < starts.length - 1 && starts[pos + 1] == ord) {
+ // Arrays.binarySearch can return any of a sequence of repeated value
+ // but we always want the last one
+ ++pos;
+ }
+ return pos;
+ }
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
index fee0fc2f730..daec0c197d6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
@@ -25,6 +25,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
+import java.util.function.Supplier;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.KnnVectorsReader;
@@ -32,10 +33,11 @@ import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;
@@ -206,121 +208,175 @@ public final class SortingCodecReader extends FilterCodecReader {
}
}
- /** Sorting FloatVectorValues that iterate over documents in the order of the provided sortMap */
- private static class SortingFloatVectorValues extends FloatVectorValues {
- final int size;
- final int dimension;
- final FixedBitSet docsWithField;
- final float[][] vectors;
+ /**
+ * Factory for SortingValuesIterator. This enables us to create new iterators as needed without
+ * recomputing the sorting mappings.
+ */
+ static class SortingIteratorSupplier implements Supplier {
+ private final FixedBitSet docBits;
+ private final int[] docToOrd;
+ private final int size;
- private int docId = -1;
+ SortingIteratorSupplier(FixedBitSet docBits, int[] docToOrd, int size) {
+ this.docBits = docBits;
+ this.docToOrd = docToOrd;
+ this.size = size;
+ }
- SortingFloatVectorValues(FloatVectorValues delegate, Sorter.DocMap sortMap) throws IOException {
- this.size = delegate.size();
- this.dimension = delegate.dimension();
- docsWithField = new FixedBitSet(sortMap.size());
- vectors = new float[sortMap.size()][];
- for (int doc = delegate.nextDoc(); doc != NO_MORE_DOCS; doc = delegate.nextDoc()) {
- int newDocID = sortMap.oldToNew(doc);
- docsWithField.set(newDocID);
- vectors[newDocID] = delegate.vectorValue().clone();
+ @Override
+ public SortingValuesIterator get() {
+ return new SortingValuesIterator(docBits, docToOrd, size);
+ }
+
+ public int size() {
+ return size;
+ }
+ }
+
+ /**
+ * Creates a factory for SortingValuesIterator. Does the work of computing the (new docId to old
+ * ordinal) mapping, and caches the result, enabling it to create new iterators cheaply.
+ *
+ * @param values the values over which to iterate
+ * @param docMap the mapping from "old" docIds to "new" (sorted) docIds.
+ */
+ public static SortingIteratorSupplier iteratorSupplier(
+ KnnVectorValues values, Sorter.DocMap docMap) throws IOException {
+
+ final int[] docToOrd = new int[docMap.size()];
+ final FixedBitSet docBits = new FixedBitSet(docMap.size());
+ int count = 0;
+ // Note: docToOrd will contain zero for docids that have no vector. This is OK though
+ // because the iterator cannot be positioned on such docs
+ KnnVectorValues.DocIndexIterator iter = values.iterator();
+ for (int doc = iter.nextDoc(); doc != NO_MORE_DOCS; doc = iter.nextDoc()) {
+ int newDocId = docMap.oldToNew(doc);
+ if (newDocId != -1) {
+ docToOrd[newDocId] = iter.index();
+ docBits.set(newDocId);
+ ++count;
}
}
+ return new SortingIteratorSupplier(docBits, docToOrd, count);
+ }
+
+ /**
+ * Iterator over KnnVectorValues accepting a mapping to differently-sorted docs. Consequently
+ * index() may skip around, not increasing monotonically as iteration proceeds.
+ */
+ public static class SortingValuesIterator extends KnnVectorValues.DocIndexIterator {
+ private final FixedBitSet docBits;
+ private final DocIdSetIterator docsWithValues;
+ private final int[] docToOrd;
+
+ int doc = -1;
+
+ SortingValuesIterator(FixedBitSet docBits, int[] docToOrd, int size) {
+ this.docBits = docBits;
+ this.docToOrd = docToOrd;
+ docsWithValues = new BitSetIterator(docBits, size);
+ }
@Override
public int docID() {
- return docId;
+ return doc;
+ }
+
+ @Override
+ public int index() {
+ assert docBits.get(doc);
+ return docToOrd[doc];
}
@Override
public int nextDoc() throws IOException {
- return advance(docId + 1);
- }
-
- @Override
- public float[] vectorValue() throws IOException {
- return vectors[docId];
- }
-
- @Override
- public int dimension() {
- return dimension;
- }
-
- @Override
- public int size() {
- return size;
- }
-
- @Override
- public int advance(int target) throws IOException {
- if (target >= docsWithField.length()) {
- return NO_MORE_DOCS;
+ if (doc != NO_MORE_DOCS) {
+ doc = docsWithValues.nextDoc();
}
- return docId = docsWithField.nextSetBit(target);
+ return doc;
}
@Override
- public VectorScorer scorer(float[] target) {
+ public long cost() {
+ return docBits.cardinality();
+ }
+
+ @Override
+ public int advance(int target) {
throw new UnsupportedOperationException();
}
}
- private static class SortingByteVectorValues extends ByteVectorValues {
- final int size;
- final int dimension;
- final FixedBitSet docsWithField;
- final byte[][] vectors;
+ /** Sorting FloatVectorValues that maps ordinals using the provided sortMap */
+ private static class SortingFloatVectorValues extends FloatVectorValues {
+ final FloatVectorValues delegate;
+ final SortingIteratorSupplier iteratorSupplier;
- private int docId = -1;
-
- SortingByteVectorValues(ByteVectorValues delegate, Sorter.DocMap sortMap) throws IOException {
- this.size = delegate.size();
- this.dimension = delegate.dimension();
- docsWithField = new FixedBitSet(sortMap.size());
- vectors = new byte[sortMap.size()][];
- for (int doc = delegate.nextDoc(); doc != NO_MORE_DOCS; doc = delegate.nextDoc()) {
- int newDocID = sortMap.oldToNew(doc);
- docsWithField.set(newDocID);
- vectors[newDocID] = delegate.vectorValue().clone();
- }
+ SortingFloatVectorValues(FloatVectorValues delegate, Sorter.DocMap sortMap) throws IOException {
+ this.delegate = delegate;
+ // SortingValuesIterator consumes the iterator and records the docs and ord mapping
+ iteratorSupplier = iteratorSupplier(delegate, sortMap);
}
@Override
- public int docID() {
- return docId;
- }
-
- @Override
- public int nextDoc() throws IOException {
- return advance(docId + 1);
- }
-
- @Override
- public byte[] vectorValue() throws IOException {
- return vectors[docId];
+ public float[] vectorValue(int ord) throws IOException {
+ // ords are interpreted in the delegate's ord-space.
+ return delegate.vectorValue(ord);
}
@Override
public int dimension() {
- return dimension;
+ return delegate.dimension();
}
@Override
public int size() {
- return size;
+ return iteratorSupplier.size();
}
@Override
- public int advance(int target) throws IOException {
- if (target >= docsWithField.length()) {
- return NO_MORE_DOCS;
- }
- return docId = docsWithField.nextSetBit(target);
+ public FloatVectorValues copy() {
+ throw new UnsupportedOperationException();
}
@Override
- public VectorScorer scorer(byte[] target) {
+ public DocIndexIterator iterator() {
+ return iteratorSupplier.get();
+ }
+ }
+
+ private static class SortingByteVectorValues extends ByteVectorValues {
+ final ByteVectorValues delegate;
+ final SortingIteratorSupplier iteratorSupplier;
+
+ SortingByteVectorValues(ByteVectorValues delegate, Sorter.DocMap sortMap) throws IOException {
+ this.delegate = delegate;
+ // SortingValuesIterator consumes the iterator and records the docs and ord mapping
+ iteratorSupplier = iteratorSupplier(delegate, sortMap);
+ }
+
+ @Override
+ public byte[] vectorValue(int ord) throws IOException {
+ return delegate.vectorValue(ord);
+ }
+
+ @Override
+ public DocIndexIterator iterator() {
+ return iteratorSupplier.get();
+ }
+
+ @Override
+ public int dimension() {
+ return delegate.dimension();
+ }
+
+ @Override
+ public int size() {
+ return iteratorSupplier.size();
+ }
+
+ @Override
+ public ByteVectorValues copy() {
throw new UnsupportedOperationException();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
index 702df660c44..2fb0c0783a2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
@@ -438,7 +438,10 @@ public class TieredMergePolicy extends MergePolicy {
}
// allowedSegCount may occasionally be less than segsPerTier
// if segment sizes are below the floor size
- allowedSegCount = Math.max(allowedSegCount, Math.max(segsPerTier, targetSearchConcurrency));
+ allowedSegCount = Math.max(allowedSegCount, segsPerTier);
+ // No need to merge if the total number of segments (including too big segments) is less than or
+ // equal to the target search concurrency.
+ allowedSegCount = Math.max(allowedSegCount, targetSearchConcurrency - tooBigCount);
int allowedDocCount = getMaxAllowedDocs(totalMaxDoc, totalDelDocs);
if (verbose(mergeContext) && tooBigCount > 0) {
diff --git a/lucene/core/src/java/org/apache/lucene/internal/hppc/CharObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/internal/hppc/CharObjectHashMap.java
index b1fad7017b5..40b32141f3f 100644
--- a/lucene/core/src/java/org/apache/lucene/internal/hppc/CharObjectHashMap.java
+++ b/lucene/core/src/java/org/apache/lucene/internal/hppc/CharObjectHashMap.java
@@ -574,15 +574,6 @@ public class CharObjectHashMap
public int size() {
return CharObjectHashMap.this.size();
}
-
- public VType[] toArray() {
- VType[] array = (VType[]) new Object[size()];
- int i = 0;
- for (ObjectCursor cursor : this) {
- array[i++] = cursor.value;
- }
- return array;
- }
}
/** An iterator over the set of assigned values. */
diff --git a/lucene/core/src/java/org/apache/lucene/internal/hppc/IntObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/internal/hppc/IntObjectHashMap.java
index 180bb3249f3..732b0ecb71c 100644
--- a/lucene/core/src/java/org/apache/lucene/internal/hppc/IntObjectHashMap.java
+++ b/lucene/core/src/java/org/apache/lucene/internal/hppc/IntObjectHashMap.java
@@ -562,15 +562,6 @@ public class IntObjectHashMap
public int size() {
return IntObjectHashMap.this.size();
}
-
- public VType[] toArray() {
- VType[] array = (VType[]) new Object[size()];
- int i = 0;
- for (ObjectCursor cursor : this) {
- array[i++] = cursor.value;
- }
- return array;
- }
}
/** An iterator over the set of assigned values. */
diff --git a/lucene/core/src/java/org/apache/lucene/internal/hppc/LongObjectHashMap.java b/lucene/core/src/java/org/apache/lucene/internal/hppc/LongObjectHashMap.java
index 4bc890b80b1..5f34625f675 100644
--- a/lucene/core/src/java/org/apache/lucene/internal/hppc/LongObjectHashMap.java
+++ b/lucene/core/src/java/org/apache/lucene/internal/hppc/LongObjectHashMap.java
@@ -562,15 +562,6 @@ public class LongObjectHashMap
public int size() {
return LongObjectHashMap.this.size();
}
-
- public VType[] toArray() {
- VType[] array = (VType[]) new Object[size()];
- int i = 0;
- for (ObjectCursor cursor : this) {
- array[i++] = cursor.value;
- }
- return array;
- }
}
/** An iterator over the set of assigned values. */
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java b/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
index 409bcbc0b64..adaace27727 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
@@ -181,8 +181,8 @@ public class FieldExistsQuery extends Query {
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
iterator =
switch (fieldInfo.getVectorEncoding()) {
- case FLOAT32 -> context.reader().getFloatVectorValues(field);
- case BYTE -> context.reader().getByteVectorValues(field);
+ case FLOAT32 -> context.reader().getFloatVectorValues(field).iterator();
+ case BYTE -> context.reader().getByteVectorValues(field).iterator();
};
} else if (fieldInfo.getDocValuesType()
!= DocValuesType.NONE) { // the field indexes doc values
diff --git a/lucene/core/src/java/org/apache/lucene/search/HitsThresholdChecker.java b/lucene/core/src/java/org/apache/lucene/search/HitsThresholdChecker.java
index 43ff4fecdbb..78e1a589a77 100644
--- a/lucene/core/src/java/org/apache/lucene/search/HitsThresholdChecker.java
+++ b/lucene/core/src/java/org/apache/lucene/search/HitsThresholdChecker.java
@@ -46,7 +46,11 @@ abstract class HitsThresholdChecker {
if (thresholdReached) {
return true;
}
- return thresholdReached = globalHitCount.longValue() > getHitsThreshold();
+ if (globalHitCount.longValue() > getHitsThreshold()) {
+ thresholdReached = true;
+ return true;
+ }
+ return false;
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
index 5940a80a961..eac33dbf039 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
@@ -35,8 +35,8 @@ final class MaxScoreAccumulator {
}
/**
- * Return the max encoded DocAndScore in a way that is consistent with {@link
- * DocAndScore#compareTo}.
+ * Return the max encoded docId and score found in the two longs, following the encoding in {@link
+ * #accumulate}.
*/
private static long maxEncode(long v1, long v2) {
float score1 = Float.intBitsToFloat((int) (v1 >> 32));
@@ -57,26 +57,15 @@ final class MaxScoreAccumulator {
acc.accumulate(encode);
}
- DocAndScore get() {
- long value = acc.get();
- if (value == Long.MIN_VALUE) {
- return null;
- }
- float score = Float.intBitsToFloat((int) (value >> 32));
- int docId = (int) value;
- return new DocAndScore(docId, score);
+ public static float toScore(long value) {
+ return Float.intBitsToFloat((int) (value >> 32));
}
- record DocAndScore(int docId, float score) implements Comparable {
+ public static int docId(long value) {
+ return (int) value;
+ }
- @Override
- public int compareTo(DocAndScore o) {
- int cmp = Float.compare(score, o.score);
- if (cmp == 0) {
- // tie-break on doc id, lower id has the priority
- return Integer.compare(o.docId, docId);
- }
- return cmp;
- }
+ long getRaw() {
+ return acc.get();
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
index 8786343ccec..18f5b83e93a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
@@ -40,6 +40,8 @@ final class MaxScoreBulkScorer extends BulkScorer {
// Index of the first scorer that is required, this scorer and all following scorers are required
// for a document to match.
int firstRequiredScorer;
+ // The minimum value of minCompetitiveScore that would produce a more favorable partitioning.
+ float nextMinCompetitiveScore;
private final long cost;
float minCompetitiveScore;
private final Score scorable = new Score();
@@ -114,9 +116,14 @@ final class MaxScoreBulkScorer extends BulkScorer {
while (top.doc < outerWindowMax) {
scoreInnerWindow(collector, acceptDocs, outerWindowMax);
top = essentialQueue.top();
+ if (minCompetitiveScore >= nextMinCompetitiveScore) {
+ // The minimum competitive score increased substantially, so we can now partition scorers
+ // in a more favorable way.
+ break;
+ }
}
- outerWindowMin = outerWindowMax;
+ outerWindowMin = Math.min(top.doc, outerWindowMax);
}
return nextCandidate(max);
@@ -337,6 +344,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
});
double maxScoreSum = 0;
firstEssentialScorer = 0;
+ nextMinCompetitiveScore = Float.POSITIVE_INFINITY;
for (int i = 0; i < allScorers.length; ++i) {
final DisiWrapper w = scratch[i];
double newMaxScoreSum = maxScoreSum + w.maxWindowScore;
@@ -349,6 +357,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
firstEssentialScorer++;
} else {
allScorers[allScorers.length - 1 - (i - firstEssentialScorer)] = w;
+ nextMinCompetitiveScore = Math.min(maxScoreSumFloat, nextMinCompetitiveScore);
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java
index 2d38370e86a..a449f675daa 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreCache.java
@@ -71,7 +71,9 @@ public final class MaxScoreCache {
private float computeMaxScore(List impacts) {
float maxScore = 0;
- for (Impact impact : impacts) {
+ var scorer = this.scorer;
+ for (int i = 0, length = impacts.size(); i < length; i++) {
+ Impact impact = impacts.get(i);
maxScore = Math.max(scorer.score(impact.freq, impact.norm), maxScore);
}
return maxScore;
diff --git a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
index c2efa68a45b..f0e0cfd6bdb 100644
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@@ -181,7 +181,7 @@ public abstract class PointInSetQuery extends Query implements Accountable {
@Override
public Scorer get(long leadCost) throws IOException {
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
- values.intersect(new MergePointVisitor(sortedPackedPoints, result));
+ values.intersect(new MergePointVisitor(sortedPackedPoints.iterator(), result));
DocIdSetIterator iterator = result.build().iterator();
return new ConstantScoreScorer(score(), scoreMode, iterator);
}
@@ -192,7 +192,9 @@ public abstract class PointInSetQuery extends Query implements Accountable {
if (cost == -1) {
// Computing the cost may be expensive, so only do it if necessary
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
- cost = values.estimateDocCount(new MergePointVisitor(sortedPackedPoints, result));
+ cost =
+ values.estimateDocCount(
+ new MergePointVisitor(sortedPackedPoints.iterator(), result));
assert cost >= 0;
}
return cost;
@@ -260,18 +262,15 @@ public abstract class PointInSetQuery extends Query implements Accountable {
private class MergePointVisitor implements IntersectVisitor {
private final DocIdSetBuilder result;
- private TermIterator iterator;
+ private final TermIterator iterator;
private BytesRef nextQueryPoint;
private final ByteArrayComparator comparator;
- private final PrefixCodedTerms sortedPackedPoints;
private DocIdSetBuilder.BulkAdder adder;
- public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result)
- throws IOException {
+ public MergePointVisitor(TermIterator iterator, DocIdSetBuilder result) throws IOException {
this.result = result;
- this.sortedPackedPoints = sortedPackedPoints;
this.comparator = ArrayUtil.getUnsignedComparator(bytesPerDim);
- this.iterator = this.sortedPackedPoints.iterator();
+ this.iterator = iterator;
nextQueryPoint = iterator.next();
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java b/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java
index 331d692a854..6c89c267a52 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TaskExecutor.java
@@ -20,7 +20,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.Callable;
@@ -73,15 +72,68 @@ public final class TaskExecutor {
/**
* Execute all the callables provided as an argument, wait for them to complete and return the
* obtained results. If an exception is thrown by more than one callable, the subsequent ones will
- * be added as suppressed exceptions to the first one that was caught.
+ * be added as suppressed exceptions to the first one that was caught. Additionally, if one task
+ * throws an exception, all other tasks from the same group are cancelled, to avoid needless
+ * computation as their results would not be exposed anyways.
*
* @param callables the callables to execute
* @return a list containing the results from the tasks execution
* @param the return type of the task execution
*/
public List invokeAll(Collection> callables) throws IOException {
- TaskGroup taskGroup = new TaskGroup<>(callables);
- return taskGroup.invokeAll(executor);
+ List> futures = new ArrayList<>(callables.size());
+ for (Callable callable : callables) {
+ futures.add(new Task<>(callable, futures));
+ }
+ final int count = futures.size();
+ // taskId provides the first index of an un-executed task in #futures
+ final AtomicInteger taskId = new AtomicInteger(0);
+ // we fork execution count - 1 tasks to execute at least one task on the current thread to
+ // minimize needless forking and blocking of the current thread
+ if (count > 1) {
+ final Runnable work =
+ () -> {
+ int id = taskId.getAndIncrement();
+ if (id < count) {
+ futures.get(id).run();
+ }
+ };
+ for (int j = 0; j < count - 1; j++) {
+ executor.execute(work);
+ }
+ }
+ // try to execute as many tasks as possible on the current thread to minimize context
+ // switching in case of long running concurrent
+ // tasks as well as dead-locking if the current thread is part of #executor for executors that
+ // have limited or no parallelism
+ int id;
+ while ((id = taskId.getAndIncrement()) < count) {
+ futures.get(id).run();
+ if (id >= count - 1) {
+ // save redundant CAS in case this was the last task
+ break;
+ }
+ }
+ return collectResults(futures);
+ }
+
+ private static List collectResults(List> futures) throws IOException {
+ Throwable exc = null;
+ List results = new ArrayList<>(futures.size());
+ for (Future future : futures) {
+ try {
+ results.add(future.get());
+ } catch (InterruptedException e) {
+ exc = IOUtils.useOrSuppress(exc, new ThreadInterruptedException(e));
+ } catch (ExecutionException e) {
+ exc = IOUtils.useOrSuppress(exc, e.getCause());
+ }
+ }
+ assert assertAllFuturesCompleted(futures) : "Some tasks are still running?";
+ if (exc != null) {
+ throw IOUtils.rethrowAlways(exc);
+ }
+ return results;
}
@Override
@@ -89,128 +141,62 @@ public final class TaskExecutor {
return "TaskExecutor(" + "executor=" + executor + ')';
}
- /**
- * Holds all the sub-tasks that a certain operation gets split into as it gets parallelized and
- * exposes the ability to invoke such tasks and wait for them all to complete their execution and
- * provide their results. Additionally, if one task throws an exception, all other tasks from the
- * same group are cancelled, to avoid needless computation as their results would not be exposed
- * anyways. Creates one {@link FutureTask} for each {@link Callable} provided
- *
- * @param the return type of all the callables
- */
- private static final class TaskGroup {
- private final List> futures;
-
- TaskGroup(Collection> callables) {
- List> tasks = new ArrayList<>(callables.size());
- for (Callable callable : callables) {
- tasks.add(createTask(callable));
+ private static boolean assertAllFuturesCompleted(Collection extends Future>> futures) {
+ for (Future> future : futures) {
+ if (future.isDone() == false) {
+ return false;
}
- this.futures = Collections.unmodifiableList(tasks);
+ }
+ return true;
+ }
+
+ private static void cancelAll(Collection extends Future> futures) {
+ for (Future> future : futures) {
+ future.cancel(false);
+ }
+ }
+
+ private static class Task extends FutureTask {
+
+ private final AtomicBoolean startedOrCancelled = new AtomicBoolean(false);
+
+ private final Collection extends Future> futures;
+
+ public Task(Callable callable, Collection extends Future> futures) {
+ super(callable);
+ this.futures = futures;
}
- RunnableFuture createTask(Callable callable) {
- return new FutureTask<>(callable) {
-
- private final AtomicBoolean startedOrCancelled = new AtomicBoolean(false);
-
- @Override
- public void run() {
- if (startedOrCancelled.compareAndSet(false, true)) {
- super.run();
- }
- }
-
- @Override
- protected void setException(Throwable t) {
- super.setException(t);
- cancelAll();
- }
-
- @Override
- public boolean cancel(boolean mayInterruptIfRunning) {
- assert mayInterruptIfRunning == false
- : "cancelling tasks that are running is not supported";
- /*
- Future#get (called in invokeAll) throws CancellationException when invoked against a running task that has been cancelled but
- leaves the task running. We rather want to make sure that invokeAll does not leave any running tasks behind when it returns.
- Overriding cancel ensures that tasks that are already started will complete normally once cancelled, and Future#get will
- wait for them to finish instead of throwing CancellationException. A cleaner way would have been to override FutureTask#get and
- make it wait for cancelled tasks, but FutureTask#awaitDone is private. Tasks that are cancelled before they are started will be no-op.
- */
- if (startedOrCancelled.compareAndSet(false, true)) {
- // task is cancelled hence it has no results to return. That's fine: they would be
- // ignored anyway.
- set(null);
- return true;
- }
- return false;
- }
- };
+ @Override
+ public void run() {
+ if (startedOrCancelled.compareAndSet(false, true)) {
+ super.run();
+ }
}
- List invokeAll(Executor executor) throws IOException {
- final int count = futures.size();
- // taskId provides the first index of an un-executed task in #futures
- final AtomicInteger taskId = new AtomicInteger(0);
- // we fork execution count - 1 tasks to execute at least one task on the current thread to
- // minimize needless forking and blocking of the current thread
- if (count > 1) {
- final Runnable work =
- () -> {
- int id = taskId.getAndIncrement();
- if (id < count) {
- futures.get(id).run();
- }
- };
- for (int j = 0; j < count - 1; j++) {
- executor.execute(work);
- }
- }
- // try to execute as many tasks as possible on the current thread to minimize context
- // switching in case of long running concurrent
- // tasks as well as dead-locking if the current thread is part of #executor for executors that
- // have limited or no parallelism
- int id;
- while ((id = taskId.getAndIncrement()) < count) {
- futures.get(id).run();
- if (id >= count - 1) {
- // save redundant CAS in case this was the last task
- break;
- }
- }
- Throwable exc = null;
- List results = new ArrayList<>(count);
- for (int i = 0; i < count; i++) {
- Future future = futures.get(i);
- try {
- results.add(future.get());
- } catch (InterruptedException e) {
- exc = IOUtils.useOrSuppress(exc, new ThreadInterruptedException(e));
- } catch (ExecutionException e) {
- exc = IOUtils.useOrSuppress(exc, e.getCause());
- }
- }
- assert assertAllFuturesCompleted() : "Some tasks are still running?";
- if (exc != null) {
- throw IOUtils.rethrowAlways(exc);
- }
- return results;
+ @Override
+ protected void setException(Throwable t) {
+ super.setException(t);
+ cancelAll(futures);
}
- private boolean assertAllFuturesCompleted() {
- for (RunnableFuture future : futures) {
- if (future.isDone() == false) {
- return false;
- }
- }
- return true;
- }
-
- private void cancelAll() {
- for (Future future : futures) {
- future.cancel(false);
+ @Override
+ public boolean cancel(boolean mayInterruptIfRunning) {
+ assert mayInterruptIfRunning == false : "cancelling tasks that are running is not supported";
+ /*
+ Future#get (called in #collectResults) throws CancellationException when invoked against a running task that has been cancelled but
+ leaves the task running. We rather want to make sure that invokeAll does not leave any running tasks behind when it returns.
+ Overriding cancel ensures that tasks that are already started will complete normally once cancelled, and Future#get will
+ wait for them to finish instead of throwing CancellationException. A cleaner way would have been to override FutureTask#get and
+ make it wait for cancelled tasks, but FutureTask#awaitDone is private. Tasks that are cancelled before they are started will be no-op.
+ */
+ if (startedOrCancelled.compareAndSet(false, true)) {
+ // task is cancelled hence it has no results to return. That's fine: they would be
+ // ignored anyway.
+ set(null);
+ return true;
}
+ return false;
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
index da01b24f0bd..c82df0ac1eb 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
@@ -27,13 +27,7 @@ import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.BytesRefComparator;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.StringSorter;
+import org.apache.lucene.util.*;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -141,6 +135,16 @@ public class TermInSetQuery extends MultiTermQuery implements Accountable {
return termData.size();
}
+ /**
+ * Get an iterator over the encoded terms for query inspection.
+ *
+ * @lucene.experimental
+ */
+ public BytesRefIterator getBytesRefIterator() {
+ final TermIterator iterator = this.termData.iterator();
+ return () -> iterator.next();
+ }
+
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 114797f44cb..eac31bf89d0 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -24,7 +24,6 @@ import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
-import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
import org.apache.lucene.search.TotalHits.Relation;
/**
@@ -366,10 +365,12 @@ public abstract class TopFieldCollector extends TopDocsCollector {
// we can start checking the global maximum score even
// if the local queue is not full because the threshold
// is reached.
- DocAndScore maxMinScore = minScoreAcc.get();
- if (maxMinScore != null && maxMinScore.score() > minCompetitiveScore) {
- scorer.setMinCompetitiveScore(maxMinScore.score());
- minCompetitiveScore = maxMinScore.score();
+ long maxMinScore = minScoreAcc.getRaw();
+ float score;
+ if (maxMinScore != Long.MIN_VALUE
+ && (score = MaxScoreAccumulator.toScore(maxMinScore)) > minCompetitiveScore) {
+ scorer.setMinCompetitiveScore(score);
+ minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index b951aaa7f89..3469276982b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
/**
* A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link
@@ -226,13 +225,13 @@ public abstract class TopScoreDocCollector extends TopDocsCollector {
protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
- DocAndScore maxMinScore = minScoreAcc.get();
- if (maxMinScore != null) {
+ long maxMinScore = minScoreAcc.getRaw();
+ if (maxMinScore != Long.MIN_VALUE) {
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
- float score =
- docBase >= maxMinScore.docId() ? Math.nextUp(maxMinScore.score()) : maxMinScore.score();
+ float score = MaxScoreAccumulator.toScore(maxMinScore);
+ score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? Math.nextUp(score) : score;
if (score > minCompetitiveScore) {
assert hitsThresholdChecker.isThresholdReached();
scorer.setMinCompetitiveScore(score);
diff --git a/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java b/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java
index 1ca979d6794..051cd9ed633 100644
--- a/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/knn/MultiLeafKnnCollector.java
@@ -33,17 +33,17 @@ public final class MultiLeafKnnCollector implements KnnCollector {
// greediness of globally non-competitive search: (0,1]
private static final float DEFAULT_GREEDINESS = 0.9f;
+ private static final int DEFAULT_INTERVAL = 0xff;
// the global queue of the highest similarities collected so far across all segments
private final BlockingFloatHeap globalSimilarityQueue;
// the local queue of the highest similarities if we are not competitive globally
// the size of this queue is defined by greediness
private final FloatHeap nonCompetitiveQueue;
- private final float greediness;
// the queue of the local similarities to periodically update with the global queue
private final FloatHeap updatesQueue;
private final float[] updatesScratch;
// interval to synchronize the local and global queues, as a number of visited vectors
- private final int interval = 0xff; // 255
+ private final int interval;
private boolean kResultsCollected = false;
private float cachedGlobalMinSim = Float.NEGATIVE_INFINITY;
private final AbstractKnnCollector subCollector;
@@ -58,7 +58,32 @@ public final class MultiLeafKnnCollector implements KnnCollector {
*/
public MultiLeafKnnCollector(
int k, BlockingFloatHeap globalSimilarityQueue, AbstractKnnCollector subCollector) {
- this.greediness = DEFAULT_GREEDINESS;
+ this(k, DEFAULT_GREEDINESS, DEFAULT_INTERVAL, globalSimilarityQueue, subCollector);
+ }
+
+ /**
+ * Create a new MultiLeafKnnCollector.
+ *
+ * @param k the number of neighbors to collect
+ * @param greediness the greediness of the global search
+ * @param interval (by number of collected values) the interval to synchronize the local and
+ * global queues
+ * @param globalSimilarityQueue the global queue of the highest similarities collected so far
+ * @param subCollector the local collector
+ */
+ public MultiLeafKnnCollector(
+ int k,
+ float greediness,
+ int interval,
+ BlockingFloatHeap globalSimilarityQueue,
+ AbstractKnnCollector subCollector) {
+ if (greediness < 0 || greediness > 1) {
+ throw new IllegalArgumentException("greediness must be in [0,1]");
+ }
+ if (interval <= 0) {
+ throw new IllegalArgumentException("interval must be positive");
+ }
+ this.interval = interval;
this.subCollector = subCollector;
this.globalSimilarityQueue = globalSimilarityQueue;
this.nonCompetitiveQueue = new FloatHeap(Math.max(1, Math.round((1 - greediness) * k)));
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
index 77f71782e31..b4546946acf 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/Axiomatic.java
@@ -44,13 +44,26 @@ public abstract class Axiomatic extends SimilarityBase {
protected final int queryLen;
/**
- * Constructor setting all Axiomatic hyperparameters
+ * Constructor setting all Axiomatic hyperparameters and using default discountOverlaps value.
*
* @param s hyperparam for the growth function
* @param queryLen the query length
* @param k hyperparam for the primitive weighting function
*/
public Axiomatic(float s, int queryLen, float k) {
+ this(true, s, queryLen, k);
+ }
+
+ /**
+ * Constructor setting all Axiomatic hyperparameters
+ *
+ * @param discountOverlaps true if overlap tokens should not impact document length for scoring.
+ * @param s hyperparam for the growth function
+ * @param queryLen the query length
+ * @param k hyperparam for the primitive weighting function
+ */
+ public Axiomatic(boolean discountOverlaps, float s, int queryLen, float k) {
+ super(discountOverlaps);
if (Float.isFinite(s) == false || Float.isNaN(s) || s < 0 || s > 1) {
throw new IllegalArgumentException("illegal s value: " + s + ", must be between 0 and 1");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java
index b9c651008cc..34d619ea69f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFISimilarity.java
@@ -46,11 +46,23 @@ public class DFISimilarity extends SimilarityBase {
private final Independence independence;
/**
- * Create DFI with the specified divergence from independence measure
+ * Create DFI with the specified divergence from independence measure and using default
+ * discountOverlaps value
*
* @param independenceMeasure measure of divergence from independence
*/
public DFISimilarity(Independence independenceMeasure) {
+ this(independenceMeasure, true);
+ }
+
+ /**
+ * Create DFI with the specified parameters
+ *
+ * @param independenceMeasure measure of divergence from independence
+ * @param discountOverlaps true if overlap tokens should not impact document length for scoring.
+ */
+ public DFISimilarity(Independence independenceMeasure, boolean discountOverlaps) {
+ super(discountOverlaps);
this.independence = independenceMeasure;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
index 0b3c1a5e7f0..08e424b3230 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/DFRSimilarity.java
@@ -83,7 +83,7 @@ public class DFRSimilarity extends SimilarityBase {
protected final Normalization normalization;
/**
- * Creates DFRSimilarity from the three components.
+ * Creates DFRSimilarity from the three components and using default discountOverlaps value.
*
* Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
@@ -98,7 +98,7 @@ public class DFRSimilarity extends SimilarityBase {
}
/**
- * Creates DFRSimilarity from the three components.
+ * Creates DFRSimilarity from the three components and with the specified discountOverlaps value.
*
*
Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
index 5b0e93571b1..d2325d20033 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/IBSimilarity.java
@@ -76,7 +76,7 @@ public class IBSimilarity extends SimilarityBase {
protected final Normalization normalization;
/**
- * Creates IBSimilarity from the three components.
+ * Creates IBSimilarity from the three components and using default discountOverlaps value.
*
*
Note that null
values are not allowed: if you want no normalization, instead
* pass {@link NoNormalization}.
@@ -86,6 +86,26 @@ public class IBSimilarity extends SimilarityBase {
* @param normalization term frequency normalization
*/
public IBSimilarity(Distribution distribution, Lambda lambda, Normalization normalization) {
+ this(distribution, lambda, normalization, true);
+ }
+
+ /**
+ * Creates IBSimilarity from the three components and with the specified discountOverlaps value.
+ *
+ *
Note that null
values are not allowed: if you want no normalization, instead
+ * pass {@link NoNormalization}.
+ *
+ * @param distribution probabilistic distribution modeling term occurrence
+ * @param lambda distribution's λw parameter
+ * @param normalization term frequency normalization
+ * @param discountOverlaps true if overlap tokens should not impact document length for scoring.
+ */
+ public IBSimilarity(
+ Distribution distribution,
+ Lambda lambda,
+ Normalization normalization,
+ boolean discountOverlaps) {
+ super(discountOverlaps);
this.distribution = distribution;
this.lambda = lambda;
this.normalization = normalization;
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
index 9f708362bb5..b3994c5dc46 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/IndriDirichletSimilarity.java
@@ -37,6 +37,13 @@ public class IndriDirichletSimilarity extends LMSimilarity {
/** The μ parameter. */
private final float mu;
+ /** Instantiates the similarity with the provided parameters. */
+ public IndriDirichletSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float mu) {
+ super(collectionModel, discountOverlaps);
+ this.mu = mu;
+ }
+
/** Instantiates the similarity with the provided μ parameter. */
public IndriDirichletSimilarity(CollectionModel collectionModel, float mu) {
super(collectionModel);
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
index 51b1604aef1..ab80d0d337e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
@@ -39,7 +39,13 @@ public class LMDirichletSimilarity extends LMSimilarity {
/** Instantiates the similarity with the provided μ parameter. */
public LMDirichletSimilarity(CollectionModel collectionModel, float mu) {
- super(collectionModel);
+ this(collectionModel, true, mu);
+ }
+
+ /** Instantiates the similarity with the provided parameters. */
+ public LMDirichletSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float mu) {
+ super(collectionModel, discountOverlaps);
if (Float.isFinite(mu) == false || mu < 0) {
throw new IllegalArgumentException(
"illegal mu value: " + mu + ", must be a non-negative finite value");
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
index e1990f34b0b..7029fa8e133 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
@@ -43,7 +43,13 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
/** Instantiates with the specified collectionModel and λ parameter. */
public LMJelinekMercerSimilarity(CollectionModel collectionModel, float lambda) {
- super(collectionModel);
+ this(collectionModel, true, lambda);
+ }
+
+ /** Instantiates with the specified collectionModel and parameters. */
+ public LMJelinekMercerSimilarity(
+ CollectionModel collectionModel, boolean discountOverlaps, float lambda) {
+ super(collectionModel, discountOverlaps);
if (Float.isNaN(lambda) || lambda <= 0 || lambda > 1) {
throw new IllegalArgumentException("lambda must be in the range (0 .. 1]");
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
index e1536db268f..5bd48f37a34 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
@@ -43,6 +43,12 @@ public abstract class LMSimilarity extends SimilarityBase {
/** Creates a new instance with the specified collection language model. */
public LMSimilarity(CollectionModel collectionModel) {
+ this(collectionModel, true);
+ }
+
+ /** Creates a new instance with the specified collection language model and discountOverlaps. */
+ public LMSimilarity(CollectionModel collectionModel, boolean discountOverlaps) {
+ super(discountOverlaps);
this.collectionModel = collectionModel;
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
index 13151692bc0..7f2aadf54a5 100644
--- a/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/BufferedIndexInput.java
@@ -151,7 +151,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc
}
@Override
- protected void readGroupVInt(long[] dst, int offset) throws IOException {
+ public void readGroupVInt(long[] dst, int offset) throws IOException {
final int len =
GroupVIntUtil.readGroupVInt(
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);
diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java
index 4b722b61689..a09f78e5f3a 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataInput.java
@@ -204,7 +204,7 @@ public final class ByteBuffersDataInput extends DataInput
}
@Override
- protected void readGroupVInt(long[] dst, int offset) throws IOException {
+ public void readGroupVInt(long[] dst, int offset) throws IOException {
final ByteBuffer block = blocks[blockIndex(pos)];
final int blockOffset = blockOffset(pos);
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...).
diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
index eaa0929848d..1c6bcd63629 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
@@ -31,7 +31,6 @@ import java.util.function.Consumer;
import java.util.function.IntFunction;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.UnicodeUtil;
@@ -415,12 +414,17 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
@Override
public void writeString(String v) {
try {
- if (v.length() <= MAX_CHARS_PER_WINDOW) {
- final BytesRef utf8 = new BytesRef(v);
- writeVInt(utf8.length);
- writeBytes(utf8.bytes, utf8.offset, utf8.length);
+ final int charCount = v.length();
+ final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(v, 0, charCount);
+ writeVInt(byteLen);
+ ByteBuffer currentBlock = this.currentBlock;
+ if (currentBlock.hasArray() && currentBlock.remaining() >= byteLen) {
+ int startingPos = currentBlock.position();
+ UnicodeUtil.UTF16toUTF8(
+ v, 0, charCount, currentBlock.array(), currentBlock.arrayOffset() + startingPos);
+ currentBlock.position(startingPos + byteLen);
} else {
- writeLongString(v);
+ writeLongString(byteLen, v);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
@@ -541,9 +545,7 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
}
/** Writes a long string in chunks */
- private void writeLongString(final String s) throws IOException {
- final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(s, 0, s.length());
- writeVInt(byteLen);
+ private void writeLongString(int byteLen, final String s) throws IOException {
final byte[] buf =
new byte[Math.min(byteLen, UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * MAX_CHARS_PER_WINDOW)];
for (int i = 0, end = s.length(); i < end; ) {
diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java
index c66d864d570..6aebb771b68 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersIndexInput.java
@@ -206,7 +206,7 @@ public final class ByteBuffersIndexInput extends IndexInput implements RandomAcc
}
@Override
- protected void readGroupVInt(long[] dst, int offset) throws IOException {
+ public void readGroupVInt(long[] dst, int offset) throws IOException {
ensureOpen();
in.readGroupVInt(dst, offset);
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/DataInput.java b/lucene/core/src/java/org/apache/lucene/store/DataInput.java
index 427e81f2df2..70f9a96db9c 100644
--- a/lucene/core/src/java/org/apache/lucene/store/DataInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/DataInput.java
@@ -100,28 +100,10 @@ public abstract class DataInput implements Cloneable {
}
/**
- * Read all the group varints, including the tail vints. we need a long[] because this is what
- * postings are using, all longs are actually required to be integers.
- *
- * @param dst the array to read ints into.
- * @param limit the number of int values to read.
- * @lucene.experimental
- */
- public final void readGroupVInts(long[] dst, int limit) throws IOException {
- int i;
- for (i = 0; i <= limit - 4; i += 4) {
- readGroupVInt(dst, i);
- }
- for (; i < limit; ++i) {
- dst[i] = readVInt() & 0xFFFFFFFFL;
- }
- }
-
- /**
- * Override if you have a efficient implementation. In general this is when the input supports
+ * Override if you have an efficient implementation. In general this is when the input supports
* random access.
*/
- protected void readGroupVInt(long[] dst, int offset) throws IOException {
+ public void readGroupVInt(long[] dst, int offset) throws IOException {
GroupVIntUtil.readGroupVInt(this, dst, offset);
}
diff --git a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
index ee84d908838..38eb1dcbcee 100644
--- a/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IndexInput.java
@@ -127,6 +127,10 @@ public abstract class IndexInput extends DataInput implements Closeable {
* CompoundFormat} implementations to honor the {@link ReadAdvice} of each file within the
* compound file.
*
+ *
NOTE : it is only legal to call this method if this {@link IndexInput} has been open
+ * with {@link ReadAdvice#NORMAL}. However, this method accepts any {@link ReadAdvice} value but
+ * {@code null} as a read advice for the slice.
+ *
*
The default implementation delegates to {@link #slice(String, long, long)} and ignores the
* {@link ReadAdvice}.
*/
diff --git a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java
index e1b5466342a..1c5033172db 100644
--- a/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/GroupVIntUtil.java
@@ -33,10 +33,29 @@ public final class GroupVIntUtil {
private static final long[] MASKS = new long[] {0xFFL, 0xFFFFL, 0xFFFFFFL, 0xFFFFFFFFL};
/**
- * Default implementation of read single group, for optimal performance, you should use {@link
- * DataInput#readGroupVInts(long[], int)} instead.
+ * Read all the group varints, including the tail vints. we need a long[] because this is what
+ * postings are using, all longs are actually required to be integers.
*
* @param dst the array to read ints into.
+ * @param limit the number of int values to read.
+ * @lucene.experimental
+ */
+ public static void readGroupVInts(DataInput in, long[] dst, int limit) throws IOException {
+ int i;
+ for (i = 0; i <= limit - 4; i += 4) {
+ in.readGroupVInt(dst, i);
+ }
+ for (; i < limit; ++i) {
+ dst[i] = in.readVInt() & 0xFFFFFFFFL;
+ }
+ }
+
+ /**
+ * Default implementation of read single group, for optimal performance, you should use {@link
+ * GroupVIntUtil#readGroupVInts(DataInput, long[], int)} instead.
+ *
+ * @param in the input to use to read data.
+ * @param dst the array to read ints into.
* @param offset the offset in the array to start storing ints.
*/
public static void readGroupVInt(DataInput in, long[] dst, int offset) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
index d264c1da58d..5580f5fbe8e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
+++ b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
@@ -209,6 +209,156 @@ public abstract class StringHelper {
return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed);
}
+ /**
+ * Generates 128-bit hash from the byte array with the given offset, length and seed.
+ *
+ *
The code is adopted from Apache Commons (link )
+ *
+ * @param data The input byte array
+ * @param offset The first element of array
+ * @param length The length of array
+ * @param seed The initial seed value
+ * @return The 128-bit hash (2 longs)
+ */
+ public static long[] murmurhash3_x64_128(
+ final byte[] data, final int offset, final int length, final int seed) {
+ // Use an unsigned 32-bit integer as the seed
+ return murmurhash3_x64_128(data, offset, length, seed & 0xFFFFFFFFL);
+ }
+
+ @SuppressWarnings("fallthrough")
+ private static long[] murmurhash3_x64_128(
+ final byte[] data, final int offset, final int length, final long seed) {
+ long h1 = seed;
+ long h2 = seed;
+ final int nblocks = length >> 4;
+
+ // Constants for 128-bit variant
+ final long C1 = 0x87c37b91114253d5L;
+ final long C2 = 0x4cf5ad432745937fL;
+ final int R1 = 31;
+ final int R2 = 27;
+ final int R3 = 33;
+ final int M = 5;
+ final int N1 = 0x52dce729;
+ final int N2 = 0x38495ab5;
+
+ // body
+ for (int i = 0; i < nblocks; i++) {
+ final int index = offset + (i << 4);
+ long k1 = (long) BitUtil.VH_LE_LONG.get(data, index);
+ long k2 = (long) BitUtil.VH_LE_LONG.get(data, index + 8);
+
+ // mix functions for k1
+ k1 *= C1;
+ k1 = Long.rotateLeft(k1, R1);
+ k1 *= C2;
+ h1 ^= k1;
+ h1 = Long.rotateLeft(h1, R2);
+ h1 += h2;
+ h1 = h1 * M + N1;
+
+ // mix functions for k2
+ k2 *= C2;
+ k2 = Long.rotateLeft(k2, R3);
+ k2 *= C1;
+ h2 ^= k2;
+ h2 = Long.rotateLeft(h2, R1);
+ h2 += h1;
+ h2 = h2 * M + N2;
+ }
+
+ // tail
+ long k1 = 0;
+ long k2 = 0;
+ final int index = offset + (nblocks << 4);
+ switch (length & 0x0F) {
+ case 15:
+ k2 ^= ((long) data[index + 14] & 0xff) << 48;
+ case 14:
+ k2 ^= ((long) data[index + 13] & 0xff) << 40;
+ case 13:
+ k2 ^= ((long) data[index + 12] & 0xff) << 32;
+ case 12:
+ k2 ^= ((long) data[index + 11] & 0xff) << 24;
+ case 11:
+ k2 ^= ((long) data[index + 10] & 0xff) << 16;
+ case 10:
+ k2 ^= ((long) data[index + 9] & 0xff) << 8;
+ case 9:
+ k2 ^= data[index + 8] & 0xff;
+ k2 *= C2;
+ k2 = Long.rotateLeft(k2, R3);
+ k2 *= C1;
+ h2 ^= k2;
+
+ case 8:
+ k1 ^= ((long) data[index + 7] & 0xff) << 56;
+ case 7:
+ k1 ^= ((long) data[index + 6] & 0xff) << 48;
+ case 6:
+ k1 ^= ((long) data[index + 5] & 0xff) << 40;
+ case 5:
+ k1 ^= ((long) data[index + 4] & 0xff) << 32;
+ case 4:
+ k1 ^= ((long) data[index + 3] & 0xff) << 24;
+ case 3:
+ k1 ^= ((long) data[index + 2] & 0xff) << 16;
+ case 2:
+ k1 ^= ((long) data[index + 1] & 0xff) << 8;
+ case 1:
+ k1 ^= data[index] & 0xff;
+ k1 *= C1;
+ k1 = Long.rotateLeft(k1, R1);
+ k1 *= C2;
+ h1 ^= k1;
+ }
+
+ // finalization
+ h1 ^= length;
+ h2 ^= length;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix64(h1);
+ h2 = fmix64(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ return new long[] {h1, h2};
+ }
+
+ /**
+ * Performs the final avalanche mix step of the 64-bit hash function.
+ *
+ * @param hash The current hash
+ * @return The final hash
+ */
+ private static long fmix64(long hash) {
+ hash ^= (hash >>> 33);
+ hash *= 0xff51afd7ed558ccdL;
+ hash ^= (hash >>> 33);
+ hash *= 0xc4ceb9fe1a85ec53L;
+ hash ^= (hash >>> 33);
+ return hash;
+ }
+
+ /**
+ * Generates 128-bit hash from the byte array with the given offset, length and seed.
+ *
+ *
The code is adopted from Apache Commons (link )
+ *
+ * @param data The input data
+ * @return The 128-bit hash (2 longs)
+ */
+ public static long[] murmurhash3_x64_128(BytesRef data) {
+ return murmurhash3_x64_128(data.bytes, data.offset, data.length, 104729);
+ }
+
// Holds 128 bit unsigned value:
private static BigInteger nextId;
private static final BigInteger mask128;
diff --git a/lucene/core/src/java/org/apache/lucene/util/Version.java b/lucene/core/src/java/org/apache/lucene/util/Version.java
index 91eb4649efc..e232f1ab6d2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@@ -32,140 +32,23 @@ import java.util.jar.Manifest;
public final class Version {
/**
- * Match settings and bugs in Lucene's 9.0.0 release.
- *
- * @deprecated (9.1.0) Use latest
+ * @deprecated Use latest
*/
- @Deprecated public static final Version LUCENE_9_0_0 = new Version(9, 0, 0);
+ @Deprecated public static final Version LUCENE_10_0_0 = new Version(10, 0, 0);
/**
- * Match settings and bugs in Lucene's 9.1.0 release.
- *
- * @deprecated (9.2.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_1_0 = new Version(9, 1, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.2.0 release.
- *
- * @deprecated (9.3.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_2_0 = new Version(9, 2, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.3.0 release.
- *
- * @deprecated (9.4.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_3_0 = new Version(9, 3, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.4.0 release.
+ * Match settings and bugs in Lucene's 10.1.0 release.
*
* @deprecated Use latest
*/
- @Deprecated public static final Version LUCENE_9_4_0 = new Version(9, 4, 0);
+ @Deprecated public static final Version LUCENE_10_1_0 = new Version(10, 1, 0);
/**
- * Match settings and bugs in Lucene's 9.4.1 release.
- *
- * @deprecated Use latest
- * @deprecated (9.4.2) Use latest
- */
- @Deprecated public static final Version LUCENE_9_4_1 = new Version(9, 4, 1);
-
- /**
- * Match settings and bugs in Lucene's 9.4.2 release.
- *
- * @deprecated Use latest
- */
- @Deprecated public static final Version LUCENE_9_4_2 = new Version(9, 4, 2);
-
- /**
- * Match settings and bugs in Lucene's 9.5.0 release.
- *
- * @deprecated (9.6.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_5_0 = new Version(9, 5, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.6.0 release.
- *
- * @deprecated (9.7.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_6_0 = new Version(9, 6, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.7.0 release.
- *
- * @deprecated (9.8.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_7_0 = new Version(9, 7, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.8.0 release.
- *
- * @deprecated (9.9.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_8_0 = new Version(9, 8, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.9.0 release.
- *
- * @deprecated (9.9.1) Use latest
- */
- @Deprecated public static final Version LUCENE_9_9_0 = new Version(9, 9, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.9.1 release.
- *
- * @deprecated (9.9.2) Use latest
- */
- @Deprecated public static final Version LUCENE_9_9_1 = new Version(9, 9, 1);
-
- /**
- * Match settings and bugs in Lucene's 9.9.2 release.
- *
- * @deprecated (9.10.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_9_2 = new Version(9, 9, 2);
-
- /**
- * Match settings and bugs in Lucene's 9.10.0 release.
- *
- * @deprecated (9.11.0) Use latest
- */
- @Deprecated public static final Version LUCENE_9_10_0 = new Version(9, 10, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.11.0 release.
- *
- * @deprecated Use latest
- * @deprecated (9.12.0) Use latest
- * @deprecated (9.11.1) Use latest
- */
- @Deprecated public static final Version LUCENE_9_11_0 = new Version(9, 11, 0);
-
- /**
- * Match settings and bugs in Lucene's 9.11.1 release.
- *
- * @deprecated Use latest
- */
- @Deprecated public static final Version LUCENE_9_11_1 = new Version(9, 11, 1);
-
- /**
- * Match settings and bugs in Lucene's 9.12.0 release.
- *
- * @deprecated Use latest
- */
- @Deprecated public static final Version LUCENE_9_12_0 = new Version(9, 12, 0);
-
- /**
- * Match settings and bugs in Lucene's 10.0.0 release.
+ * Match settings and bugs in Lucene's 11.0.0 release.
*
*
Use this to get the latest & greatest settings, bug fixes, etc, for Lucene.
*/
- public static final Version LUCENE_10_0_0 = new Version(10, 0, 0);
+ public static final Version LUCENE_11_0_0 = new Version(11, 0, 0);
// To add a new version:
// * Only add above this comment
@@ -181,7 +64,7 @@ public final class Version {
* re-test your entire application to ensure it behaves as expected, as some defaults may
* have changed and may break functionality in your application.
*/
- public static final Version LATEST = LUCENE_10_0_0;
+ public static final Version LATEST = LUCENE_11_0_0;
/**
* Constant for backwards compatibility.
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
index 9f6a10b9ddc..b9ea0d9aa08 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
@@ -17,13 +17,16 @@
package org.apache.lucene.util.bkd;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.DocBaseBitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LongsRef;
final class DocIdsWriter {
@@ -36,6 +39,7 @@ final class DocIdsWriter {
private static final byte LEGACY_DELTA_VINT = (byte) 0;
private final int[] scratch;
+ private final LongsRef scratchLongs = new LongsRef();
/**
* IntsRef to be used to iterate over the scratch buffer. A single instance is reused to avoid
@@ -205,12 +209,17 @@ final class DocIdsWriter {
}
}
- private static DocIdSetIterator readBitSetIterator(IndexInput in, int count) throws IOException {
+ private DocIdSetIterator readBitSetIterator(IndexInput in, int count) throws IOException {
int offsetWords = in.readVInt();
int longLen = in.readVInt();
- long[] bits = new long[longLen];
- in.readLongs(bits, 0, longLen);
- FixedBitSet bitSet = new FixedBitSet(bits, longLen << 6);
+ scratchLongs.longs = ArrayUtil.growNoCopy(scratchLongs.longs, longLen);
+ in.readLongs(scratchLongs.longs, 0, longLen);
+ // make ghost bits clear for FixedBitSet.
+ if (longLen < scratchLongs.length) {
+ Arrays.fill(scratchLongs.longs, longLen, scratchLongs.longs.length, 0);
+ }
+ scratchLongs.length = longLen;
+ FixedBitSet bitSet = new FixedBitSet(scratchLongs.longs, longLen << 6);
return new DocBaseBitSetIterator(bitSet, count, offsetWords << 6);
}
@@ -230,7 +239,7 @@ final class DocIdsWriter {
}
}
- private static void readBitSet(IndexInput in, int count, int[] docIDs) throws IOException {
+ private void readBitSet(IndexInput in, int count, int[] docIDs) throws IOException {
DocIdSetIterator iterator = readBitSetIterator(in, count);
int docId, pos = 0;
while ((docId = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
@@ -307,8 +316,7 @@ final class DocIdsWriter {
}
}
- private static void readBitSet(IndexInput in, int count, IntersectVisitor visitor)
- throws IOException {
+ private void readBitSet(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
DocIdSetIterator bitSetIterator = readBitSetIterator(in, count);
visitor.visit(bitSetIterator);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
index a6df78eaab5..c0f4bfeb572 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@@ -775,10 +775,11 @@ public final class Util {
/** Just takes unsigned byte values from the BytesRef and converts into an IntsRef. */
public static IntsRef toIntsRef(BytesRef input, IntsRefBuilder scratch) {
- scratch.clear();
+ scratch.growNoCopy(input.length);
for (int i = 0; i < input.length; i++) {
- scratch.append(input.bytes[i + input.offset] & 0xFF);
+ scratch.setIntAt(i, input.bytes[i + input.offset] & 0xFF);
}
+ scratch.setLength(input.length);
return scratch.get();
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java
index 392d83fa262..c4e7d159b48 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/ConcurrentHnswMerger.java
@@ -19,7 +19,7 @@ package org.apache.lucene.util.hnsw;
import java.io.IOException;
import org.apache.lucene.codecs.hnsw.HnswGraphProvider;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.FixedBitSet;
@@ -46,7 +46,7 @@ public class ConcurrentHnswMerger extends IncrementalHnswGraphMerger {
}
@Override
- protected HnswBuilder createBuilder(DocIdSetIterator mergedVectorIterator, int maxOrd)
+ protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxOrd)
throws IOException {
if (initReader == null) {
return new HnswConcurrentMergeBuilder(
@@ -61,7 +61,7 @@ public class ConcurrentHnswMerger extends IncrementalHnswGraphMerger {
HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name);
BitSet initializedNodes = new FixedBitSet(maxOrd);
- int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorIterator, initializedNodes);
+ int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorValues, initializedNodes);
return new HnswConcurrentMergeBuilder(
taskExecutor,
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
index 1f5253ef7f8..bed1480e926 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphBuilder.java
@@ -439,7 +439,11 @@ public class HnswGraphBuilder implements HnswBuilder {
maxConn *= 2;
}
List components = HnswUtil.components(hnsw, level, notFullyConnected, maxConn);
- // System.out.println("HnswGraphBuilder.connectComponents level=" + level + ": " + components);
+ if (infoStream.isEnabled(HNSW_COMPONENT)) {
+ infoStream.message(
+ HNSW_COMPONENT, "connect " + components.size() + " components on level=" + level);
+ }
+ // System.out.println("HnswGraphBuilder. level=" + level + ": " + components);
boolean result = true;
if (components.size() > 1) {
// connect other components to the largest one
@@ -457,12 +461,16 @@ public class HnswGraphBuilder implements HnswBuilder {
if (c.start() == NO_MORE_DOCS) {
continue;
}
+ if (infoStream.isEnabled(HNSW_COMPONENT)) {
+ infoStream.message(HNSW_COMPONENT, "connect component " + c + " to " + c0);
+ }
+
beam.clear();
eps[0] = c0.start();
RandomVectorScorer scorer = scorerSupplier.scorer(c.start());
// find the closest node in the largest component to the lowest-numbered node in this
// component that has room to make a connection
- graphSearcher.searchLevel(beam, scorer, 0, eps, hnsw, notFullyConnected);
+ graphSearcher.searchLevel(beam, scorer, level, eps, hnsw, notFullyConnected);
boolean linked = false;
while (beam.size() > 0) {
int c0node = beam.popNode();
@@ -475,8 +483,14 @@ public class HnswGraphBuilder implements HnswBuilder {
// System.out.println("link " + c0 + "." + c0node + " to " + c + "." + c.start());
link(level, c0node, c.start(), score, notFullyConnected);
linked = true;
+ if (infoStream.isEnabled(HNSW_COMPONENT)) {
+ infoStream.message(HNSW_COMPONENT, "connected ok " + c0node + " -> " + c.start());
+ }
}
if (!linked) {
+ if (infoStream.isEnabled(HNSW_COMPONENT)) {
+ infoStream.message(HNSW_COMPONENT, "not connected; no free nodes found");
+ }
result = false;
}
}
@@ -541,7 +555,7 @@ public class HnswGraphBuilder implements HnswBuilder {
return queue.nodes();
}
- float minimumScore() {
+ public float minimumScore() {
return queue.topScore();
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphMerger.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphMerger.java
index 7ed5dd142de..31e9c768dc0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraphMerger.java
@@ -18,8 +18,8 @@ package org.apache.lucene.util.hnsw;
import java.io.IOException;
import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.InfoStream;
@@ -45,12 +45,12 @@ public interface HnswGraphMerger {
/**
* Merge and produce the on heap graph
*
- * @param mergedVectorIterator iterator over the vectors in the merged segment
+ * @param mergedVectorValues view of the vectors in the merged segment
* @param infoStream optional info stream to set to builder
* @param maxOrd max number of vectors that will be added to the graph
* @return merged graph
* @throws IOException during merge
*/
- OnHeapHnswGraph merge(DocIdSetIterator mergedVectorIterator, InfoStream infoStream, int maxOrd)
+ OnHeapHnswGraph merge(KnnVectorValues mergedVectorValues, InfoStream infoStream, int maxOrd)
throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java
index 7331111d45a..d64961a02ee 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/IncrementalHnswGraphMerger.java
@@ -25,9 +25,9 @@ import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.internal.hppc.IntIntHashMap;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
@@ -108,12 +108,12 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
* Builds a new HnswGraphBuilder using the biggest graph from the merge state as a starting point.
* If no valid readers were added to the merge state, a new graph is created.
*
- * @param mergedVectorIterator iterator over the vectors in the merged segment
+ * @param mergedVectorValues vector values in the merged segment
* @param maxOrd max num of vectors that will be merged into the graph
* @return HnswGraphBuilder
* @throws IOException If an error occurs while reading from the merge state
*/
- protected HnswBuilder createBuilder(DocIdSetIterator mergedVectorIterator, int maxOrd)
+ protected HnswBuilder createBuilder(KnnVectorValues mergedVectorValues, int maxOrd)
throws IOException {
if (initReader == null) {
return HnswGraphBuilder.create(
@@ -123,7 +123,7 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
HnswGraph initializerGraph = ((HnswGraphProvider) initReader).getGraph(fieldInfo.name);
BitSet initializedNodes = new FixedBitSet(maxOrd);
- int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorIterator, initializedNodes);
+ int[] oldToNewOrdinalMap = getNewOrdMapping(mergedVectorValues, initializedNodes);
return InitializedHnswGraphBuilder.fromGraph(
scorerSupplier,
M,
@@ -137,8 +137,8 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
@Override
public OnHeapHnswGraph merge(
- DocIdSetIterator mergedVectorIterator, InfoStream infoStream, int maxOrd) throws IOException {
- HnswBuilder builder = createBuilder(mergedVectorIterator, maxOrd);
+ KnnVectorValues mergedVectorValues, InfoStream infoStream, int maxOrd) throws IOException {
+ HnswBuilder builder = createBuilder(mergedVectorValues, maxOrd);
builder.setInfoStream(infoStream);
return builder.build(maxOrd);
}
@@ -147,46 +147,45 @@ public class IncrementalHnswGraphMerger implements HnswGraphMerger {
* Creates a new mapping from old ordinals to new ordinals and returns the total number of vectors
* in the newly merged segment.
*
- * @param mergedVectorIterator iterator over the vectors in the merged segment
+ * @param mergedVectorValues vector values in the merged segment
* @param initializedNodes track what nodes have been initialized
* @return the mapping from old ordinals to new ordinals
* @throws IOException If an error occurs while reading from the merge state
*/
protected final int[] getNewOrdMapping(
- DocIdSetIterator mergedVectorIterator, BitSet initializedNodes) throws IOException {
- DocIdSetIterator initializerIterator = null;
+ KnnVectorValues mergedVectorValues, BitSet initializedNodes) throws IOException {
+ KnnVectorValues.DocIndexIterator initializerIterator = null;
switch (fieldInfo.getVectorEncoding()) {
- case BYTE -> initializerIterator = initReader.getByteVectorValues(fieldInfo.name);
- case FLOAT32 -> initializerIterator = initReader.getFloatVectorValues(fieldInfo.name);
+ case BYTE -> initializerIterator = initReader.getByteVectorValues(fieldInfo.name).iterator();
+ case FLOAT32 ->
+ initializerIterator = initReader.getFloatVectorValues(fieldInfo.name).iterator();
}
IntIntHashMap newIdToOldOrdinal = new IntIntHashMap(initGraphSize);
- int oldOrd = 0;
int maxNewDocID = -1;
- for (int oldId = initializerIterator.nextDoc();
- oldId != NO_MORE_DOCS;
- oldId = initializerIterator.nextDoc()) {
- int newId = initDocMap.get(oldId);
+ for (int docId = initializerIterator.nextDoc();
+ docId != NO_MORE_DOCS;
+ docId = initializerIterator.nextDoc()) {
+ int newId = initDocMap.get(docId);
maxNewDocID = Math.max(newId, maxNewDocID);
- newIdToOldOrdinal.put(newId, oldOrd);
- oldOrd++;
+ newIdToOldOrdinal.put(newId, initializerIterator.index());
}
if (maxNewDocID == -1) {
return new int[0];
}
final int[] oldToNewOrdinalMap = new int[initGraphSize];
- int newOrd = 0;
+ KnnVectorValues.DocIndexIterator mergedVectorIterator = mergedVectorValues.iterator();
for (int newDocId = mergedVectorIterator.nextDoc();
newDocId <= maxNewDocID;
newDocId = mergedVectorIterator.nextDoc()) {
int hashDocIndex = newIdToOldOrdinal.indexOf(newDocId);
if (newIdToOldOrdinal.indexExists(hashDocIndex)) {
+ int newOrd = mergedVectorIterator.index();
initializedNodes.set(newOrd);
oldToNewOrdinalMap[newIdToOldOrdinal.indexGet(hashDocIndex)] = newOrd;
}
- newOrd++;
}
return oldToNewOrdinalMap;
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java
deleted file mode 100644
index e2c7372b667..00000000000
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomAccessVectorValues.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.util.hnsw;
-
-import java.io.IOException;
-import java.util.List;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Bits;
-
-/**
- * Provides random access to vectors by dense ordinal. This interface is used by HNSW-based
- * implementations of KNN search.
- *
- * @lucene.experimental
- */
-public interface RandomAccessVectorValues {
-
- /** Return the number of vector values */
- int size();
-
- /** Return the dimension of the returned vector values */
- int dimension();
-
- /**
- * Creates a new copy of this {@link RandomAccessVectorValues}. This is helpful when you need to
- * access different values at once, to avoid overwriting the underlying vector returned.
- */
- RandomAccessVectorValues copy() throws IOException;
-
- /**
- * Returns a slice of the underlying {@link IndexInput} that contains the vector values if
- * available
- */
- default IndexInput getSlice() {
- return null;
- }
-
- /** Returns the byte length of the vector values. */
- int getVectorByteLength();
-
- /**
- * Translates vector ordinal to the correct document ID. By default, this is an identity function.
- *
- * @param ord the vector ordinal
- * @return the document Id for that vector ordinal
- */
- default int ordToDoc(int ord) {
- return ord;
- }
-
- /**
- * Returns the {@link Bits} representing live documents. By default, this is an identity function.
- *
- * @param acceptDocs the accept docs
- * @return the accept docs
- */
- default Bits getAcceptOrds(Bits acceptDocs) {
- return acceptDocs;
- }
-
- /** Float vector values. */
- interface Floats extends RandomAccessVectorValues {
- @Override
- RandomAccessVectorValues.Floats copy() throws IOException;
-
- /**
- * Return the vector value indexed at the given ordinal.
- *
- * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
- */
- float[] vectorValue(int targetOrd) throws IOException;
-
- /** Returns the vector byte length, defaults to dimension multiplied by float byte size */
- @Override
- default int getVectorByteLength() {
- return dimension() * Float.BYTES;
- }
- }
-
- /** Byte vector values. */
- interface Bytes extends RandomAccessVectorValues {
- @Override
- RandomAccessVectorValues.Bytes copy() throws IOException;
-
- /**
- * Return the vector value indexed at the given ordinal.
- *
- * @param targetOrd a valid ordinal, ≥ 0 and < {@link #size()}.
- */
- byte[] vectorValue(int targetOrd) throws IOException;
-
- /** Returns the vector byte length, defaults to dimension multiplied by byte size */
- @Override
- default int getVectorByteLength() {
- return dimension() * Byte.BYTES;
- }
- }
-
- /**
- * Creates a {@link RandomAccessVectorValues.Floats} from a list of float arrays.
- *
- * @param vectors the list of float arrays
- * @param dim the dimension of the vectors
- * @return a {@link RandomAccessVectorValues.Floats} instance
- */
- static RandomAccessVectorValues.Floats fromFloats(List vectors, int dim) {
- return new RandomAccessVectorValues.Floats() {
- @Override
- public int size() {
- return vectors.size();
- }
-
- @Override
- public int dimension() {
- return dim;
- }
-
- @Override
- public float[] vectorValue(int targetOrd) {
- return vectors.get(targetOrd);
- }
-
- @Override
- public RandomAccessVectorValues.Floats copy() {
- return this;
- }
- };
- }
-
- /**
- * Creates a {@link RandomAccessVectorValues.Bytes} from a list of byte arrays.
- *
- * @param vectors the list of byte arrays
- * @param dim the dimension of the vectors
- * @return a {@link RandomAccessVectorValues.Bytes} instance
- */
- static RandomAccessVectorValues.Bytes fromBytes(List vectors, int dim) {
- return new RandomAccessVectorValues.Bytes() {
- @Override
- public int size() {
- return vectors.size();
- }
-
- @Override
- public int dimension() {
- return dim;
- }
-
- @Override
- public byte[] vectorValue(int targetOrd) {
- return vectors.get(targetOrd);
- }
-
- @Override
- public RandomAccessVectorValues.Bytes copy() {
- return this;
- }
- };
- }
-}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomVectorScorer.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomVectorScorer.java
index fc8ed3d004a..a135df43699 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomVectorScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/RandomVectorScorer.java
@@ -18,6 +18,7 @@
package org.apache.lucene.util.hnsw;
import java.io.IOException;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.util.Bits;
/** A {@link RandomVectorScorer} for scoring random nodes in batches against an abstract query. */
@@ -57,14 +58,14 @@ public interface RandomVectorScorer {
/** Creates a default scorer for random access vectors. */
abstract class AbstractRandomVectorScorer implements RandomVectorScorer {
- private final RandomAccessVectorValues values;
+ private final KnnVectorValues values;
/**
* Creates a new scorer for the given vector values.
*
* @param values the vector values
*/
- public AbstractRandomVectorScorer(RandomAccessVectorValues values) {
+ public AbstractRandomVectorScorer(KnnVectorValues values) {
this.values = values;
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/QuantizedByteVectorValues.java b/lucene/core/src/java/org/apache/lucene/util/quantization/QuantizedByteVectorValues.java
index a0fe957fecb..b90ab8276dd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/quantization/QuantizedByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/util/quantization/QuantizedByteVectorValues.java
@@ -17,9 +17,10 @@
package org.apache.lucene.util.quantization;
import java.io.IOException;
+import org.apache.lucene.codecs.lucene95.HasIndexSlice;
import org.apache.lucene.index.ByteVectorValues;
-import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.VectorScorer;
+import org.apache.lucene.store.IndexInput;
/**
* A version of {@link ByteVectorValues}, but additionally retrieving score correction offset for
@@ -27,31 +28,31 @@ import org.apache.lucene.search.VectorScorer;
*
* @lucene.experimental
*/
-public abstract class QuantizedByteVectorValues extends DocIdSetIterator {
- public abstract float getScoreCorrectionConstant() throws IOException;
+public abstract class QuantizedByteVectorValues extends ByteVectorValues implements HasIndexSlice {
- public abstract byte[] vectorValue() throws IOException;
-
- /** Return the dimension of the vectors */
- public abstract int dimension();
-
- /**
- * Return the number of vectors for this field.
- *
- * @return the number of vectors returned by this iterator
- */
- public abstract int size();
-
- @Override
- public final long cost() {
- return size();
+ public ScalarQuantizer getScalarQuantizer() {
+ throw new UnsupportedOperationException();
}
+ public abstract float getScoreCorrectionConstant(int ord) throws IOException;
+
/**
* Return a {@link VectorScorer} for the given query vector.
*
* @param query the query vector
* @return a {@link VectorScorer} instance or null
*/
- public abstract VectorScorer scorer(float[] query) throws IOException;
+ public VectorScorer scorer(float[] query) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public QuantizedByteVectorValues copy() throws IOException {
+ return this;
+ }
+
+ @Override
+ public IndexInput getSlice() {
+ return null;
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/quantization/ScalarQuantizer.java b/lucene/core/src/java/org/apache/lucene/util/quantization/ScalarQuantizer.java
index ab8a911ddfa..3f7bcf6c5c4 100644
--- a/lucene/core/src/java/org/apache/lucene/util/quantization/ScalarQuantizer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/quantization/ScalarQuantizer.java
@@ -25,6 +25,7 @@ import java.util.List;
import java.util.Random;
import java.util.stream.IntStream;
import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.HitQueue;
import org.apache.lucene.search.ScoreDoc;
@@ -269,11 +270,12 @@ public class ScalarQuantizer {
if (totalVectorCount == 0) {
return new ScalarQuantizer(0f, 0f, bits);
}
+ KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator();
if (confidenceInterval == 1f) {
float min = Float.POSITIVE_INFINITY;
float max = Float.NEGATIVE_INFINITY;
- while (floatVectorValues.nextDoc() != NO_MORE_DOCS) {
- for (float v : floatVectorValues.vectorValue()) {
+ while (iterator.nextDoc() != NO_MORE_DOCS) {
+ for (float v : floatVectorValues.vectorValue(iterator.index())) {
min = Math.min(min, v);
max = Math.max(max, v);
}
@@ -289,8 +291,8 @@ public class ScalarQuantizer {
if (totalVectorCount <= quantizationSampleSize) {
int scratchSize = Math.min(SCRATCH_SIZE, totalVectorCount);
int i = 0;
- while (floatVectorValues.nextDoc() != NO_MORE_DOCS) {
- float[] vectorValue = floatVectorValues.vectorValue();
+ while (iterator.nextDoc() != NO_MORE_DOCS) {
+ float[] vectorValue = floatVectorValues.vectorValue(iterator.index());
System.arraycopy(
vectorValue, 0, quantileGatheringScratch, i * vectorValue.length, vectorValue.length);
i++;
@@ -311,11 +313,11 @@ public class ScalarQuantizer {
for (int i : vectorsToTake) {
while (index <= i) {
// We cannot use `advance(docId)` as MergedVectorValues does not support it
- floatVectorValues.nextDoc();
+ iterator.nextDoc();
index++;
}
- assert floatVectorValues.docID() != NO_MORE_DOCS;
- float[] vectorValue = floatVectorValues.vectorValue();
+ assert iterator.docID() != NO_MORE_DOCS;
+ float[] vectorValue = floatVectorValues.vectorValue(iterator.index());
System.arraycopy(
vectorValue, 0, quantileGatheringScratch, idx * vectorValue.length, vectorValue.length);
idx++;
@@ -353,11 +355,16 @@ public class ScalarQuantizer {
/ (floatVectorValues.dimension() + 1),
1 - 1f / (floatVectorValues.dimension() + 1)
};
+ KnnVectorValues.DocIndexIterator iterator = floatVectorValues.iterator();
if (totalVectorCount <= sampleSize) {
int scratchSize = Math.min(SCRATCH_SIZE, totalVectorCount);
int i = 0;
- while (floatVectorValues.nextDoc() != NO_MORE_DOCS) {
- gatherSample(floatVectorValues, quantileGatheringScratch, sampledDocs, i);
+ while (iterator.nextDoc() != NO_MORE_DOCS) {
+ gatherSample(
+ floatVectorValues.vectorValue(iterator.index()),
+ quantileGatheringScratch,
+ sampledDocs,
+ i);
i++;
if (i == scratchSize) {
extractQuantiles(confidenceIntervals, quantileGatheringScratch, upperSum, lowerSum);
@@ -374,11 +381,15 @@ public class ScalarQuantizer {
for (int i : vectorsToTake) {
while (index <= i) {
// We cannot use `advance(docId)` as MergedVectorValues does not support it
- floatVectorValues.nextDoc();
+ iterator.nextDoc();
index++;
}
- assert floatVectorValues.docID() != NO_MORE_DOCS;
- gatherSample(floatVectorValues, quantileGatheringScratch, sampledDocs, idx);
+ assert iterator.docID() != NO_MORE_DOCS;
+ gatherSample(
+ floatVectorValues.vectorValue(iterator.index()),
+ quantileGatheringScratch,
+ sampledDocs,
+ idx);
idx++;
if (idx == SCRATCH_SIZE) {
extractQuantiles(confidenceIntervals, quantileGatheringScratch, upperSum, lowerSum);
@@ -437,12 +448,7 @@ public class ScalarQuantizer {
}
private static void gatherSample(
- FloatVectorValues floatVectorValues,
- float[] quantileGatheringScratch,
- List sampledDocs,
- int i)
- throws IOException {
- float[] vectorValue = floatVectorValues.vectorValue();
+ float[] vectorValue, float[] quantileGatheringScratch, List sampledDocs, int i) {
float[] copy = new float[vectorValue.length];
System.arraycopy(vectorValue, 0, copy, 0, vectorValue.length);
sampledDocs.add(copy);
diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java
index 0798885c906..b65f1e57092 100644
--- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java
+++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java
@@ -19,11 +19,12 @@ package org.apache.lucene.internal.vectorization;
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.util.Optional;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.FilterIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MemorySegmentAccessInput;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
abstract sealed class Lucene99MemorySegmentByteVectorScorer
@@ -39,10 +40,8 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
* returned.
*/
public static Optional create(
- VectorSimilarityFunction type,
- IndexInput input,
- RandomAccessVectorValues values,
- byte[] queryVector) {
+ VectorSimilarityFunction type, IndexInput input, KnnVectorValues values, byte[] queryVector) {
+ assert values instanceof ByteVectorValues;
input = FilterIndexInput.unwrapOnlyTest(input);
if (!(input instanceof MemorySegmentAccessInput msInput)) {
return Optional.empty();
@@ -58,7 +57,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
}
Lucene99MemorySegmentByteVectorScorer(
- MemorySegmentAccessInput input, RandomAccessVectorValues values, byte[] queryVector) {
+ MemorySegmentAccessInput input, KnnVectorValues values, byte[] queryVector) {
super(values);
this.input = input;
this.vectorByteSize = values.getVectorByteLength();
@@ -92,7 +91,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
}
static final class CosineScorer extends Lucene99MemorySegmentByteVectorScorer {
- CosineScorer(MemorySegmentAccessInput input, RandomAccessVectorValues values, byte[] query) {
+ CosineScorer(MemorySegmentAccessInput input, KnnVectorValues values, byte[] query) {
super(input, values, query);
}
@@ -105,8 +104,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
}
static final class DotProductScorer extends Lucene99MemorySegmentByteVectorScorer {
- DotProductScorer(
- MemorySegmentAccessInput input, RandomAccessVectorValues values, byte[] query) {
+ DotProductScorer(MemorySegmentAccessInput input, KnnVectorValues values, byte[] query) {
super(input, values, query);
}
@@ -120,7 +118,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
}
static final class EuclideanScorer extends Lucene99MemorySegmentByteVectorScorer {
- EuclideanScorer(MemorySegmentAccessInput input, RandomAccessVectorValues values, byte[] query) {
+ EuclideanScorer(MemorySegmentAccessInput input, KnnVectorValues values, byte[] query) {
super(input, values, query);
}
@@ -133,8 +131,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
}
static final class MaxInnerProductScorer extends Lucene99MemorySegmentByteVectorScorer {
- MaxInnerProductScorer(
- MemorySegmentAccessInput input, RandomAccessVectorValues values, byte[] query) {
+ MaxInnerProductScorer(MemorySegmentAccessInput input, KnnVectorValues values, byte[] query) {
super(input, values, query);
}
diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java
index 90b3bfb014c..02c71561122 100644
--- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java
+++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java
@@ -19,11 +19,12 @@ package org.apache.lucene.internal.vectorization;
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.util.Optional;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.FilterIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.MemorySegmentAccessInput;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
@@ -33,7 +34,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
final int vectorByteSize;
final int maxOrd;
final MemorySegmentAccessInput input;
- final RandomAccessVectorValues values; // to support ordToDoc/getAcceptOrds
+ final KnnVectorValues values; // to support ordToDoc/getAcceptOrds
byte[] scratch1, scratch2;
/**
@@ -41,7 +42,8 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
* optional is returned.
*/
static Optional create(
- VectorSimilarityFunction type, IndexInput input, RandomAccessVectorValues values) {
+ VectorSimilarityFunction type, IndexInput input, KnnVectorValues values) {
+ assert values instanceof ByteVectorValues;
input = FilterIndexInput.unwrapOnlyTest(input);
if (!(input instanceof MemorySegmentAccessInput msInput)) {
return Optional.empty();
@@ -56,7 +58,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
}
Lucene99MemorySegmentByteVectorScorerSupplier(
- MemorySegmentAccessInput input, RandomAccessVectorValues values) {
+ MemorySegmentAccessInput input, KnnVectorValues values) {
this.input = input;
this.values = values;
this.vectorByteSize = values.getVectorByteLength();
@@ -103,7 +105,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
static final class CosineSupplier extends Lucene99MemorySegmentByteVectorScorerSupplier {
- CosineSupplier(MemorySegmentAccessInput input, RandomAccessVectorValues values) {
+ CosineSupplier(MemorySegmentAccessInput input, KnnVectorValues values) {
super(input, values);
}
@@ -128,7 +130,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
static final class DotProductSupplier extends Lucene99MemorySegmentByteVectorScorerSupplier {
- DotProductSupplier(MemorySegmentAccessInput input, RandomAccessVectorValues values) {
+ DotProductSupplier(MemorySegmentAccessInput input, KnnVectorValues values) {
super(input, values);
}
@@ -155,7 +157,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
static final class EuclideanSupplier extends Lucene99MemorySegmentByteVectorScorerSupplier {
- EuclideanSupplier(MemorySegmentAccessInput input, RandomAccessVectorValues values) {
+ EuclideanSupplier(MemorySegmentAccessInput input, KnnVectorValues values) {
super(input, values);
}
@@ -181,7 +183,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
static final class MaxInnerProductSupplier extends Lucene99MemorySegmentByteVectorScorerSupplier {
- MaxInnerProductSupplier(MemorySegmentAccessInput input, RandomAccessVectorValues values) {
+ MaxInnerProductSupplier(MemorySegmentAccessInput input, KnnVectorValues values) {
super(input, values);
}
diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java
index b085185fb11..bd8cbb2c388 100644
--- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java
+++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java
@@ -19,11 +19,13 @@ package org.apache.lucene.internal.vectorization;
import java.io.IOException;
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
+import org.apache.lucene.codecs.lucene95.HasIndexSlice;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
public class Lucene99MemorySegmentFlatVectorsScorer implements FlatVectorsScorer {
@@ -38,15 +40,16 @@ public class Lucene99MemorySegmentFlatVectorsScorer implements FlatVectorsScorer
@Override
public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
- VectorSimilarityFunction similarityType, RandomAccessVectorValues vectorValues)
- throws IOException {
+ VectorSimilarityFunction similarityType, KnnVectorValues vectorValues) throws IOException {
// a quantized values here is a wrapping or delegation issue
- assert !(vectorValues instanceof RandomAccessQuantizedByteVectorValues);
+ assert !(vectorValues instanceof QuantizedByteVectorValues);
// currently only supports binary vectors
- if (vectorValues instanceof RandomAccessVectorValues.Bytes && vectorValues.getSlice() != null) {
+ if (vectorValues instanceof ByteVectorValues bvv
+ && bvv instanceof HasIndexSlice byteVectorValues
+ && byteVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentByteVectorScorerSupplier.create(
- similarityType, vectorValues.getSlice(), vectorValues);
+ similarityType, byteVectorValues.getSlice(), vectorValues);
if (scorer.isPresent()) {
return scorer.get();
}
@@ -56,9 +59,7 @@ public class Lucene99MemorySegmentFlatVectorsScorer implements FlatVectorsScorer
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityType,
- RandomAccessVectorValues vectorValues,
- float[] target)
+ VectorSimilarityFunction similarityType, KnnVectorValues vectorValues, float[] target)
throws IOException {
// currently only supports binary vectors, so always delegate
return delegate.getRandomVectorScorer(similarityType, vectorValues, target);
@@ -66,17 +67,17 @@ public class Lucene99MemorySegmentFlatVectorsScorer implements FlatVectorsScorer
@Override
public RandomVectorScorer getRandomVectorScorer(
- VectorSimilarityFunction similarityType,
- RandomAccessVectorValues vectorValues,
- byte[] queryVector)
+ VectorSimilarityFunction similarityType, KnnVectorValues vectorValues, byte[] queryVector)
throws IOException {
checkDimensions(queryVector.length, vectorValues.dimension());
// a quantized values here is a wrapping or delegation issue
- assert !(vectorValues instanceof RandomAccessQuantizedByteVectorValues);
- if (vectorValues instanceof RandomAccessVectorValues.Bytes && vectorValues.getSlice() != null) {
+ assert !(vectorValues instanceof QuantizedByteVectorValues);
+ if (vectorValues instanceof ByteVectorValues bvv
+ && bvv instanceof HasIndexSlice byteVectorValues
+ && byteVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentByteVectorScorer.create(
- similarityType, vectorValues.getSlice(), vectorValues, queryVector);
+ similarityType, byteVectorValues.getSlice(), vectorValues, queryVector);
if (scorer.isPresent()) {
return scorer.get();
}
diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
index c6ac3d23a12..832fa5f98e6 100644
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java
@@ -419,7 +419,7 @@ abstract class MemorySegmentIndexInput extends IndexInput
}
@Override
- protected void readGroupVInt(long[] dst, int offset) throws IOException {
+ public void readGroupVInt(long[] dst, int offset) throws IOException {
try {
final int len =
GroupVIntUtil.readGroupVInt(
@@ -530,7 +530,29 @@ abstract class MemorySegmentIndexInput extends IndexInput
@Override
public final MemorySegmentIndexInput clone() {
- final MemorySegmentIndexInput clone = buildSlice((String) null, 0L, this.length);
+ ensureOpen();
+ ensureAccessible();
+ final MemorySegmentIndexInput clone;
+ if (segments.length == 1) {
+ clone =
+ new SingleSegmentImpl(
+ toString(),
+ null, // clones don't have an Arena, as they can't close)
+ segments[0],
+ length,
+ chunkSizePower,
+ confined);
+ } else {
+ clone =
+ new MultiSegmentImpl(
+ toString(),
+ null, // clones don't have an Arena, as they can't close)
+ segments,
+ ((MultiSegmentImpl) this).offset,
+ length,
+ chunkSizePower,
+ confined);
+ }
try {
clone.seek(getFilePointer());
} catch (IOException ioe) {
@@ -567,14 +589,23 @@ abstract class MemorySegmentIndexInput extends IndexInput
public final MemorySegmentIndexInput slice(
String sliceDescription, long offset, long length, ReadAdvice advice) throws IOException {
MemorySegmentIndexInput slice = slice(sliceDescription, offset, length);
- if (NATIVE_ACCESS.isPresent()) {
+ if (NATIVE_ACCESS.isPresent() && advice != ReadAdvice.NORMAL) {
+ // No need to madvise with a normal advice, since it's the OS' default.
final NativeAccess nativeAccess = NATIVE_ACCESS.get();
- slice.advise(
- 0,
- slice.length,
- segment -> {
- nativeAccess.madvise(segment, advice);
- });
+ if (length >= nativeAccess.getPageSize()) {
+ // Only set the read advice if the inner file is large enough. Otherwise the cons are likely
+ // outweighing the pros as we're:
+ // - potentially overriding the advice of other files that share the same pages,
+ // - paying the cost of a madvise system call for little value.
+ // We could align inner files with the page size to avoid the first issue, but again the
+ // pros don't clearly overweigh the cons.
+ slice.advise(
+ 0,
+ slice.length,
+ segment -> {
+ nativeAccess.madvise(segment, advice);
+ });
+ }
}
return slice;
}
@@ -583,26 +614,30 @@ abstract class MemorySegmentIndexInput extends IndexInput
MemorySegmentIndexInput buildSlice(String sliceDescription, long offset, long length) {
ensureOpen();
ensureAccessible();
+ final MemorySegment[] slices;
+ final boolean isClone = offset == 0 && length == this.length;
+ if (isClone) {
+ slices = segments;
+ } else {
+ final long sliceEnd = offset + length;
+ final int startIndex = (int) (offset >>> chunkSizePower);
+ final int endIndex = (int) (sliceEnd >>> chunkSizePower);
+ // we always allocate one more slice, the last one may be a 0 byte one after truncating with
+ // asSlice():
+ slices = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
- final long sliceEnd = offset + length;
- final int startIndex = (int) (offset >>> chunkSizePower);
- final int endIndex = (int) (sliceEnd >>> chunkSizePower);
+ // set the last segment's limit for the sliced view.
+ slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
- // we always allocate one more slice, the last one may be a 0 byte one after truncating with
- // asSlice():
- final MemorySegment slices[] = ArrayUtil.copyOfSubArray(segments, startIndex, endIndex + 1);
-
- // set the last segment's limit for the sliced view.
- slices[slices.length - 1] = slices[slices.length - 1].asSlice(0L, sliceEnd & chunkSizeMask);
-
- offset = offset & chunkSizeMask;
+ offset = offset & chunkSizeMask;
+ }
final String newResourceDescription = getFullSliceDescription(sliceDescription);
if (slices.length == 1) {
return new SingleSegmentImpl(
newResourceDescription,
null, // clones don't have an Arena, as they can't close)
- slices[0].asSlice(offset, length),
+ isClone ? slices[0] : slices[0].asSlice(offset, length),
length,
chunkSizePower,
confined);
diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
index 1e5a305219b..7cbe376678b 100644
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
@@ -129,7 +129,9 @@ final class MemorySegmentIndexInputProvider
// internal FileChannel logic)
if (preload) {
segment.load();
- } else if (nativeAccess.filter(na -> segment.address() % na.getPageSize() == 0).isPresent()) {
+ } else if (readAdvice != ReadAdvice.NORMAL
+ && nativeAccess.filter(na -> segment.address() % na.getPageSize() == 0).isPresent()) {
+ // No need to madvise with ReadAdvice.NORMAL since it is the OS' default read advice.
nativeAccess.get().madvise(segment, readAdvice);
}
segments[segNr] = segment;
diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
index 80c1665cdd1..05eb6157118 100644
--- a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
@@ -122,10 +122,7 @@ final class PosixNativeAccess extends NativeAccess {
@Override
public void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException {
- final Integer advice = mapReadAdvice(readAdvice);
- if (advice == null) {
- return; // do nothing
- }
+ final int advice = mapReadAdvice(readAdvice);
madvise(segment, advice);
}
@@ -156,7 +153,7 @@ final class PosixNativeAccess extends NativeAccess {
}
}
- private Integer mapReadAdvice(ReadAdvice readAdvice) {
+ private int mapReadAdvice(ReadAdvice readAdvice) {
return switch (readAdvice) {
case NORMAL -> POSIX_MADV_NORMAL;
case RANDOM -> POSIX_MADV_RANDOM;
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 8b672496601..bd950aeaebd 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene912.Lucene912Codec
+org.apache.lucene.codecs.lucene100.Lucene100Codec
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/hnsw/TestFlatVectorScorer.java b/lucene/core/src/test/org/apache/lucene/codecs/hnsw/TestFlatVectorScorer.java
index 9bce1f10a43..6fe9a685e1b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/hnsw/TestFlatVectorScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/hnsw/TestFlatVectorScorer.java
@@ -35,6 +35,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer;
+import org.apache.lucene.index.ByteVectorValues;
+import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -42,7 +44,6 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.tests.util.LuceneTestCase;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.hamcrest.Matcher;
import org.hamcrest.MatcherAssert;
@@ -174,13 +175,13 @@ public class TestFlatVectorScorer extends LuceneTestCase {
}
}
- RandomAccessVectorValues byteVectorValues(
- int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
+ ByteVectorValues byteVectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim)
+ throws IOException {
return new OffHeapByteVectorValues.DenseOffHeapVectorValues(
dims, size, in.slice("byteValues", 0, in.length()), dims, flatVectorsScorer, sim);
}
- RandomAccessVectorValues floatVectorValues(
+ FloatVectorValues floatVectorValues(
int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
return new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
dims,
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
index c72bcfeea46..fe6c82e73bb 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
@@ -18,7 +18,7 @@ package org.apache.lucene.codecs.lucene90;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
@@ -31,7 +31,7 @@ import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene912Codec(Lucene912Codec.Mode.BEST_COMPRESSION);
+ return new Lucene100Codec(Lucene100Codec.Mode.BEST_COMPRESSION);
}
/**
@@ -42,7 +42,7 @@ public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFie
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
iwc.setCodec(
- new Lucene912Codec(RandomPicks.randomFrom(random(), Lucene912Codec.Mode.values())));
+ new Lucene100Codec(RandomPicks.randomFrom(random(), Lucene100Codec.Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -72,7 +72,7 @@ public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFie
expectThrows(
NullPointerException.class,
() -> {
- new Lucene912Codec(null);
+ new Lucene100Codec(null);
});
expectThrows(
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestPForUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestPForUtil.java
index 0740a8a708b..ccc786293a1 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestPForUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestPForUtil.java
@@ -46,7 +46,7 @@ public class TestPForUtil extends LuceneTestCase {
final PForUtil pforUtil = new PForUtil(forUtil);
for (int i = 0; i < iterations; ++i) {
if (random().nextInt(5) == 0) {
- pforUtil.skip(in);
+ PForUtil.skip(in);
continue;
}
final long[] restored = new long[ForUtil.BLOCK_SIZE];
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
index 825de3ab725..ed70b2df002 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
@@ -17,7 +17,6 @@
package org.apache.lucene.codecs.lucene99;
import static java.lang.String.format;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.oneOf;
@@ -29,7 +28,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
@@ -75,7 +74,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
@Override
protected Codec getCodec() {
- return new Lucene912Codec() {
+ return new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return format;
@@ -107,7 +106,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
dir,
newIndexWriterConfig()
.setCodec(
- new Lucene912Codec() {
+ new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return getKnnFormat(4);
@@ -127,7 +126,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
dir,
newIndexWriterConfig()
.setCodec(
- new Lucene912Codec() {
+ new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return getKnnFormat(7);
@@ -164,7 +163,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
dir,
newIndexWriterConfig()
.setCodec(
- new Lucene912Codec() {
+ new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene99HnswVectorsFormat();
@@ -184,7 +183,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
dir,
newIndexWriterConfig()
.setCodec(
- new Lucene912Codec() {
+ new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return getKnnFormat(7);
@@ -217,7 +216,7 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
dir,
newIndexWriterConfig()
.setCodec(
- new Lucene912Codec() {
+ new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene99HnswScalarQuantizedVectorsFormat(
@@ -312,14 +311,13 @@ public class TestLucene99HnswQuantizedVectorsFormat extends BaseKnnVectorsFormat
assertNotNull(hnswReader.getQuantizationState("f"));
QuantizedByteVectorValues quantizedByteVectorValues =
hnswReader.getQuantizedVectorValues("f");
- int docId = -1;
- while ((docId = quantizedByteVectorValues.nextDoc()) != NO_MORE_DOCS) {
- byte[] vector = quantizedByteVectorValues.vectorValue();
- float offset = quantizedByteVectorValues.getScoreCorrectionConstant();
+ for (int ord = 0; ord < quantizedByteVectorValues.size(); ord++) {
+ byte[] vector = quantizedByteVectorValues.vectorValue(ord);
+ float offset = quantizedByteVectorValues.getScoreCorrectionConstant(ord);
for (int i = 0; i < dim; i++) {
- assertEquals(vector[i], expectedVectors[docId][i]);
+ assertEquals(vector[i], expectedVectors[ord][i]);
}
- assertEquals(offset, expectedCorrections[docId], 0.00001f);
+ assertEquals(offset, expectedCorrections[ord], 0.00001f);
}
} else {
fail("reader is not Lucene99HnswVectorsReader");
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java
index a0f640fa650..3b758de6ce6 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorScorer.java
@@ -27,7 +27,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -46,13 +46,13 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.VectorUtil;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
-import org.apache.lucene.util.quantization.RandomAccessQuantizedByteVectorValues;
+import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.ScalarQuantizer;
public class TestLucene99ScalarQuantizedVectorScorer extends LuceneTestCase {
private static Codec getCodec(int bits, boolean compress) {
- return new Lucene912Codec() {
+ return new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene99HnswScalarQuantizedVectorsFormat(
@@ -100,8 +100,8 @@ public class TestLucene99ScalarQuantizedVectorScorer extends LuceneTestCase {
try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
Lucene99ScalarQuantizedVectorScorer scorer =
new Lucene99ScalarQuantizedVectorScorer(new DefaultFlatVectorScorer());
- RandomAccessQuantizedByteVectorValues values =
- new RandomAccessQuantizedByteVectorValues() {
+ QuantizedByteVectorValues values =
+ new QuantizedByteVectorValues() {
@Override
public int dimension() {
return 32;
@@ -128,7 +128,7 @@ public class TestLucene99ScalarQuantizedVectorScorer extends LuceneTestCase {
}
@Override
- public RandomAccessQuantizedByteVectorValues copy() throws IOException {
+ public QuantizedByteVectorValues copy() throws IOException {
return this;
}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java
index 64df927c765..c7f4515c25c 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99ScalarQuantizedVectorsFormat.java
@@ -28,7 +28,7 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.codecs.lucene912.Lucene912Codec;
+import org.apache.lucene.codecs.lucene100.Lucene100Codec;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnFloatVectorField;
@@ -37,6 +37,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorSimilarityFunction;
@@ -69,7 +70,7 @@ public class TestLucene99ScalarQuantizedVectorsFormat extends BaseKnnVectorsForm
@Override
protected Codec getCodec() {
- return new Lucene912Codec() {
+ return new Lucene100Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return format;
@@ -101,6 +102,11 @@ public class TestLucene99ScalarQuantizedVectorsFormat extends BaseKnnVectorsForm
}
}
+ @Override
+ public void testRecall() {
+ // ignore this test since this class always returns no results from search
+ }
+
public void testQuantizedVectorsWriteAndRead() throws Exception {
// create lucene directory with codec
int numVectors = 1 + random().nextInt(50);
@@ -173,9 +179,10 @@ public class TestLucene99ScalarQuantizedVectorsFormat extends BaseKnnVectorsForm
QuantizedByteVectorValues quantizedByteVectorValues =
quantizedReader.getQuantizedVectorValues("f");
int docId = -1;
- while ((docId = quantizedByteVectorValues.nextDoc()) != NO_MORE_DOCS) {
- byte[] vector = quantizedByteVectorValues.vectorValue();
- float offset = quantizedByteVectorValues.getScoreCorrectionConstant();
+ KnnVectorValues.DocIndexIterator iter = quantizedByteVectorValues.iterator();
+ for (docId = iter.nextDoc(); docId != NO_MORE_DOCS; docId = iter.nextDoc()) {
+ byte[] vector = quantizedByteVectorValues.vectorValue(iter.index());
+ float offset = quantizedByteVectorValues.getScoreCorrectionConstant(iter.index());
for (int i = 0; i < dim; i++) {
assertEquals(vector[i], expectedVectors[docId][i]);
}
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestField.java b/lucene/core/src/test/org/apache/lucene/document/TestField.java
index 6e3a855a0df..5c1b8f17294 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestField.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestField.java
@@ -18,6 +18,7 @@ package org.apache.lucene.document;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.codecs.Codec;
@@ -27,6 +28,7 @@ import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.IndexSearcher;
@@ -713,17 +715,21 @@ public class TestField extends LuceneTestCase {
try (IndexReader r = DirectoryReader.open(w)) {
ByteVectorValues binary = r.leaves().get(0).reader().getByteVectorValues("binary");
assertEquals(1, binary.size());
- assertNotEquals(NO_MORE_DOCS, binary.nextDoc());
- assertNotNull(binary.vectorValue());
- assertArrayEquals(b, binary.vectorValue());
- assertEquals(NO_MORE_DOCS, binary.nextDoc());
+ KnnVectorValues.DocIndexIterator iterator = binary.iterator();
+ assertNotEquals(NO_MORE_DOCS, iterator.nextDoc());
+ assertNotNull(binary.vectorValue(0));
+ assertArrayEquals(b, binary.vectorValue(0));
+ assertEquals(NO_MORE_DOCS, iterator.nextDoc());
+ expectThrows(IOException.class, () -> binary.vectorValue(1));
FloatVectorValues floatValues = r.leaves().get(0).reader().getFloatVectorValues("float");
assertEquals(1, floatValues.size());
- assertNotEquals(NO_MORE_DOCS, floatValues.nextDoc());
- assertEquals(vector.length, floatValues.vectorValue().length);
- assertEquals(vector[0], floatValues.vectorValue()[0], 0);
- assertEquals(NO_MORE_DOCS, floatValues.nextDoc());
+ KnnVectorValues.DocIndexIterator iterator1 = floatValues.iterator();
+ assertNotEquals(NO_MORE_DOCS, iterator1.nextDoc());
+ assertEquals(vector.length, floatValues.vectorValue(0).length);
+ assertEquals(vector[0], floatValues.vectorValue(0)[0], 0);
+ assertEquals(NO_MORE_DOCS, iterator1.nextDoc());
+ expectThrows(IOException.class, () -> floatValues.vectorValue(1));
}
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
index 3826962779a..9db1d305a74 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java
@@ -65,10 +65,7 @@ public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
}
if (si.info.getUseCompoundFile()) {
try (Directory cfsDir =
- si.info
- .getCodec()
- .compoundFormat()
- .getCompoundReader(dir, si.info, newIOContext(random()))) {
+ si.info.getCodec().compoundFormat().getCompoundReader(dir, si.info)) {
for (String cfsFile : cfsDir.listAll()) {
checkFooter(cfsDir, cfsFile);
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
index 76c3ee75f25..e8857791c3a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java
@@ -70,10 +70,7 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
}
if (si.info.getUseCompoundFile()) {
try (Directory cfsDir =
- si.info
- .getCodec()
- .compoundFormat()
- .getCompoundReader(dir, si.info, newIOContext(random()))) {
+ si.info.getCodec().compoundFormat().getCompoundReader(dir, si.info)) {
for (String cfsFile : cfsDir.listAll()) {
checkHeader(cfsDir, cfsFile, namesToExtensions, si.info.getId());
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDefaultCodecParallelizesIO.java b/lucene/core/src/test/org/apache/lucene/index/TestDefaultCodecParallelizesIO.java
index 2c4351fa170..58579ab93a4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDefaultCodecParallelizesIO.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDefaultCodecParallelizesIO.java
@@ -40,7 +40,14 @@ public class TestDefaultCodecParallelizesIO extends LuceneTestCase {
Directory bbDir = new ByteBuffersDirectory();
try (LineFileDocs docs = new LineFileDocs(random());
IndexWriter w =
- new IndexWriter(bbDir, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) {
+ new IndexWriter(
+ bbDir,
+ new IndexWriterConfig()
+ // Disable CFS, this test needs to know about files that are open with the
+ // RANDOM_PRELOAD advice, which CFS doesn't allow us to detect.
+ .setUseCompoundFile(false)
+ .setMergePolicy(newLogMergePolicy(false))
+ .setCodec(TestUtil.getDefaultCodec()))) {
final int numDocs = atLeast(10_000);
for (int d = 0; d < numDocs; ++d) {
Document doc = docs.nextDoc();
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
index 3c82cd6b33e..d03c8cf42b5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestExitableDirectoryReader.java
@@ -459,8 +459,8 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
expectThrows(
ExitingReaderException.class,
() -> {
- DocIdSetIterator iter = leaf.getFloatVectorValues("vector");
- scanAndRetrieve(leaf, iter);
+ KnnVectorValues values = leaf.getFloatVectorValues("vector");
+ scanAndRetrieve(leaf, values);
});
expectThrows(
@@ -473,8 +473,8 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
leaf.getLiveDocs(),
Integer.MAX_VALUE));
} else {
- DocIdSetIterator iter = leaf.getFloatVectorValues("vector");
- scanAndRetrieve(leaf, iter);
+ KnnVectorValues values = leaf.getFloatVectorValues("vector");
+ scanAndRetrieve(leaf, values);
leaf.searchNearestVectors(
"vector",
@@ -534,8 +534,8 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
expectThrows(
ExitingReaderException.class,
() -> {
- DocIdSetIterator iter = leaf.getByteVectorValues("vector");
- scanAndRetrieve(leaf, iter);
+ KnnVectorValues values = leaf.getByteVectorValues("vector");
+ scanAndRetrieve(leaf, values);
});
expectThrows(
@@ -549,8 +549,8 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
Integer.MAX_VALUE));
} else {
- DocIdSetIterator iter = leaf.getByteVectorValues("vector");
- scanAndRetrieve(leaf, iter);
+ KnnVectorValues values = leaf.getByteVectorValues("vector");
+ scanAndRetrieve(leaf, values);
leaf.searchNearestVectors(
"vector",
@@ -564,20 +564,24 @@ public class TestExitableDirectoryReader extends LuceneTestCase {
directory.close();
}
- private static void scanAndRetrieve(LeafReader leaf, DocIdSetIterator iter) throws IOException {
+ private static void scanAndRetrieve(LeafReader leaf, KnnVectorValues values) throws IOException {
+ KnnVectorValues.DocIndexIterator iter = values.iterator();
for (iter.nextDoc();
iter.docID() != DocIdSetIterator.NO_MORE_DOCS && iter.docID() < leaf.maxDoc(); ) {
- final int nextDocId = iter.docID() + 1;
+ int docId = iter.docID();
+ if (docId >= leaf.maxDoc()) {
+ break;
+ }
+ final int nextDocId = docId + 1;
if (random().nextBoolean() && nextDocId < leaf.maxDoc()) {
iter.advance(nextDocId);
} else {
iter.nextDoc();
}
-
if (random().nextBoolean()
&& iter.docID() != DocIdSetIterator.NO_MORE_DOCS
- && iter instanceof FloatVectorValues) {
- ((FloatVectorValues) iter).vectorValue();
+ && values instanceof FloatVectorValues) {
+ ((FloatVectorValues) values).vectorValue(iter.index());
}
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
index 8186eda8462..52cd21630bc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterForceMerge.java
@@ -40,7 +40,6 @@ import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.store.MockDirectoryWrapper;
@@ -244,10 +243,7 @@ public class TestIndexWriterForceMerge extends LuceneTestCase {
}
if (info.info.getUseCompoundFile()) {
try (Directory cfs =
- info.info
- .getCodec()
- .compoundFormat()
- .getCompoundReader(dir, info.info, IOContext.DEFAULT)) {
+ info.info.getCodec().compoundFormat().getCompoundReader(dir, info.info)) {
for (String file : cfs.listAll()) {
sb.append(
String.format(
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
index 72be0bd929f..5def0a26d84 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
@@ -413,11 +413,13 @@ public class TestKnnGraph extends LuceneTestCase {
// stored vector values are the same as original
int nextDocWithVectors = 0;
StoredFields storedFields = reader.storedFields();
+ KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator();
for (int i = 0; i < reader.maxDoc(); i++) {
- nextDocWithVectors = vectorValues.advance(i);
+ nextDocWithVectors = iterator.advance(i);
while (i < nextDocWithVectors && i < reader.maxDoc()) {
int id = Integer.parseInt(storedFields.document(i).get("id"));
- assertNull("document " + id + " has no vector, but was expected to", values[id]);
+ assertNull(
+ "document " + id + ", expected to have no vector, does have one", values[id]);
++i;
}
if (nextDocWithVectors == NO_MORE_DOCS) {
@@ -425,7 +427,7 @@ public class TestKnnGraph extends LuceneTestCase {
}
int id = Integer.parseInt(storedFields.document(i).get("id"));
// documents with KnnGraphValues have the expected vectors
- float[] scratch = vectorValues.vectorValue();
+ float[] scratch = vectorValues.vectorValue(iterator.index());
assertArrayEquals(
"vector did not match for doc " + i + ", id=" + id + ": " + Arrays.toString(scratch),
values[id],
@@ -435,9 +437,9 @@ public class TestKnnGraph extends LuceneTestCase {
}
// if IndexDisi.doc == NO_MORE_DOCS, we should not call IndexDisi.nextDoc()
if (nextDocWithVectors != NO_MORE_DOCS) {
- assertEquals(NO_MORE_DOCS, vectorValues.nextDoc());
+ assertEquals(NO_MORE_DOCS, iterator.nextDoc());
} else {
- assertEquals(NO_MORE_DOCS, vectorValues.docID());
+ assertEquals(NO_MORE_DOCS, iterator.docID());
}
// assert graph values:
@@ -560,7 +562,6 @@ public class TestKnnGraph extends LuceneTestCase {
String idString = Integer.toString(id);
doc.add(new StringField("id", idString, Field.Store.YES));
doc.add(new SortedDocValuesField("id", new BytesRef(idString)));
- // XSSystem.out.println("add " + idString + " " + Arrays.toString(vector));
iw.updateDocument(new Term("id", idString), doc);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
index 90b0a07aa34..e222c20d639 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentInfos.java
@@ -71,8 +71,8 @@ public class TestSegmentInfos extends LuceneTestCase {
SegmentInfo info =
new SegmentInfo(
dir,
- Version.LUCENE_10_0_0,
- Version.LUCENE_10_0_0,
+ Version.LUCENE_11_0_0,
+ Version.LUCENE_11_0_0,
"_0",
1,
false,
@@ -90,7 +90,7 @@ public class TestSegmentInfos extends LuceneTestCase {
sis.add(commitInfo);
sis.commit(dir);
sis = SegmentInfos.readLatestCommit(dir);
- assertEquals(Version.LUCENE_10_0_0, sis.getMinSegmentLuceneVersion());
+ assertEquals(Version.LUCENE_11_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
dir.close();
}
@@ -106,8 +106,8 @@ public class TestSegmentInfos extends LuceneTestCase {
SegmentInfo info =
new SegmentInfo(
dir,
- Version.LUCENE_10_0_0,
- Version.LUCENE_10_0_0,
+ Version.LUCENE_11_0_0,
+ Version.LUCENE_11_0_0,
"_0",
1,
false,
@@ -126,8 +126,8 @@ public class TestSegmentInfos extends LuceneTestCase {
info =
new SegmentInfo(
dir,
- Version.LUCENE_10_0_0,
- Version.LUCENE_10_0_0,
+ Version.LUCENE_11_0_0,
+ Version.LUCENE_11_0_0,
"_1",
1,
false,
@@ -146,7 +146,7 @@ public class TestSegmentInfos extends LuceneTestCase {
byte[] commitInfoId0 = sis.info(0).getId();
byte[] commitInfoId1 = sis.info(1).getId();
sis = SegmentInfos.readLatestCommit(dir);
- assertEquals(Version.LUCENE_10_0_0, sis.getMinSegmentLuceneVersion());
+ assertEquals(Version.LUCENE_11_0_0, sis.getMinSegmentLuceneVersion());
assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
assertEquals(
StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId()));
@@ -277,8 +277,8 @@ public class TestSegmentInfos extends LuceneTestCase {
SegmentInfo info =
new SegmentInfo(
dir,
- Version.LUCENE_9_0_0,
- Version.LUCENE_9_0_0,
+ Version.LUCENE_10_0_0,
+ Version.LUCENE_10_0_0,
"_0",
1,
false,
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesDirectoryReaderWrapper.java b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesDirectoryReaderWrapper.java
index 2098f57910d..5214b97fdc5 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesDirectoryReaderWrapper.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSoftDeletesDirectoryReaderWrapper.java
@@ -306,6 +306,12 @@ public class TestSoftDeletesDirectoryReaderWrapper extends LuceneTestCase {
softDeletesField, MatchNoDocsQuery::new, mergePolicy));
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
+ for (LeafReaderContext leafContext : reader.leaves()) {
+ assertThat(leafContext.reader(), instanceOf(SegmentReader.class));
+ SegmentReader segmentReader = (SegmentReader) leafContext.reader();
+ assertNull(segmentReader.getLiveDocs());
+ assertNull(segmentReader.getHardLiveDocs());
+ }
SoftDeletesDirectoryReaderWrapper wrapped =
new SoftDeletesDirectoryReaderWrapper(reader, softDeletesField);
assertEquals(numDocs, wrapped.numDocs());
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
index 241fc0a5fe5..9663d676255 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
@@ -242,6 +242,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
NumericDocValues ids = leaf.getNumericDocValues("id");
long prevValue = -1;
boolean usingAltIds = false;
+ KnnVectorValues.DocIndexIterator valuesIterator = vectorValues.iterator();
for (int i = 0; i < actualNumDocs; i++) {
int idNext = ids.nextDoc();
if (idNext == DocIdSetIterator.NO_MORE_DOCS) {
@@ -254,6 +255,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
sorted_set_dv = leaf.getSortedSetDocValues("sorted_set_dv");
binary_sorted_dv = leaf.getSortedDocValues("binary_sorted_dv");
vectorValues = leaf.getFloatVectorValues("vector");
+ valuesIterator = vectorValues.iterator();
prevValue = -1;
}
assertTrue(prevValue + " < " + ids.longValue(), prevValue < ids.longValue());
@@ -262,7 +264,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
assertTrue(sorted_numeric_dv.advanceExact(idNext));
assertTrue(sorted_set_dv.advanceExact(idNext));
assertTrue(binary_sorted_dv.advanceExact(idNext));
- assertEquals(idNext, vectorValues.advance(idNext));
+ assertEquals(idNext, valuesIterator.advance(idNext));
assertEquals(new BytesRef(ids.longValue() + ""), binary_dv.binaryValue());
assertEquals(
new BytesRef(ids.longValue() + ""),
@@ -274,7 +276,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
assertEquals(1, sorted_numeric_dv.docValueCount());
assertEquals(ids.longValue(), sorted_numeric_dv.nextValue());
- float[] vectorValue = vectorValues.vectorValue();
+ float[] vectorValue = vectorValues.vectorValue(valuesIterator.index());
assertEquals(1, vectorValue.length);
assertEquals((float) ids.longValue(), vectorValue[0], 0.001f);
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
index eb24d964702..a2d678a3ec0 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTieredMergePolicy.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -39,6 +40,8 @@ import org.apache.lucene.util.Version;
public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
+ private record DocCountAndSizeInBytes(int docCount, long sizeInBytes) {}
+
@Override
public TieredMergePolicy mergePolicy() {
return newTieredMergePolicy();
@@ -54,7 +57,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
int totalDelCount = 0;
int totalMaxDoc = 0;
long totalBytes = 0;
- List segmentSizes = new ArrayList<>();
+ List segmentSizes = new ArrayList<>();
for (SegmentCommitInfo sci : infos) {
totalDelCount += sci.getDelCount();
totalMaxDoc += sci.info.maxDoc();
@@ -62,10 +65,11 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
double liveRatio = 1 - (double) sci.getDelCount() / sci.info.maxDoc();
long weightedByteSize = (long) (liveRatio * byteSize);
totalBytes += weightedByteSize;
- segmentSizes.add(weightedByteSize);
+ segmentSizes.add(
+ new DocCountAndSizeInBytes(sci.info.maxDoc() - sci.getDelCount(), weightedByteSize));
minSegmentBytes = Math.min(minSegmentBytes, weightedByteSize);
}
- Collections.sort(segmentSizes);
+ Collections.sort(segmentSizes, Comparator.comparingLong(DocCountAndSizeInBytes::sizeInBytes));
final double delPercentage = 100.0 * totalDelCount / totalMaxDoc;
assertTrue(
@@ -78,7 +82,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
long levelSizeBytes = Math.max(minSegmentBytes, (long) (tmp.getFloorSegmentMB() * 1024 * 1024));
long bytesLeft = totalBytes;
double allowedSegCount = 0;
- List biggestSegments = segmentSizes;
+ List biggestSegments = segmentSizes;
if (biggestSegments.size() > tmp.getTargetSearchConcurrency() - 1) {
biggestSegments =
biggestSegments.subList(
@@ -86,11 +90,18 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
biggestSegments.size());
}
// Allow whole segments for the targetSearchConcurrency-1 biggest segments
- for (long size : biggestSegments) {
- bytesLeft -= size;
+ for (DocCountAndSizeInBytes size : biggestSegments) {
+ bytesLeft -= size.sizeInBytes();
allowedSegCount++;
}
+ int tooBigCount = 0;
+ for (DocCountAndSizeInBytes size : segmentSizes) {
+ if (size.sizeInBytes() >= maxMergedSegmentBytes / 2) {
+ tooBigCount++;
+ }
+ }
+
// below we make the assumption that segments that reached the max segment
// size divided by 2 don't need merging anymore
int mergeFactor = (int) Math.min(tmp.getSegmentsPerTier(), tmp.getMaxMergeAtOnce());
@@ -105,39 +116,31 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
bytesLeft -= tmp.getSegmentsPerTier() * levelSizeBytes;
levelSizeBytes = Math.min(levelSizeBytes * mergeFactor, maxMergedSegmentBytes / 2);
}
- allowedSegCount = Math.max(allowedSegCount, tmp.getSegmentsPerTier());
+ // Allow at least a full tier in addition of the too big segments.
+ allowedSegCount = Math.max(allowedSegCount, tooBigCount + tmp.getSegmentsPerTier());
+ // Allow at least `targetSearchConcurrency` segments.
+ allowedSegCount = Math.max(allowedSegCount, tmp.getTargetSearchConcurrency());
- // It's ok to be over the allowed segment count if none of the most balanced merges are balanced
- // enough
- boolean hasBalancedMerges = false;
- for (int i = 0; i < segmentSizes.size() - mergeFactor; ++i) {
- long maxMergeSegmentSize = segmentSizes.get(i + mergeFactor - 1);
- if (maxMergeSegmentSize >= maxMergedSegmentBytes / 2) {
- break;
- }
- long totalMergeSize = 0;
- for (int j = 0; j < i + mergeFactor; ++j) {
- totalMergeSize += segmentSizes.get(j);
- }
- if (maxMergedSegmentBytes * 1.5 <= totalMergeSize) {
- hasBalancedMerges = true;
+ // It's ok to be over the allowed segment count if none of the merges are legal, because they
+ // are either not balanced or because they exceed the max merged segment doc count.
+ // We only check pairwise merges instead of every possible merge to keep things simple. If none
+ // of the pairwise merges are legal, chances are high that no merge is legal.
+ int maxDocsPerSegment = tmp.getMaxAllowedDocs(infos.totalMaxDoc(), totalDelCount);
+ boolean hasLegalMerges = false;
+ for (int i = 0; i < segmentSizes.size() - 1; ++i) {
+ DocCountAndSizeInBytes size1 = segmentSizes.get(i);
+ DocCountAndSizeInBytes size2 = segmentSizes.get(i + 1);
+ long mergedSegmentSizeInBytes = size1.sizeInBytes() + size2.sizeInBytes();
+ int mergedSegmentDocCount = size1.docCount() + size2.docCount();
+
+ if (mergedSegmentSizeInBytes <= maxMergedSegmentBytes
+ && size2.sizeInBytes() * 1.5 <= mergedSegmentSizeInBytes
+ && mergedSegmentDocCount <= maxDocsPerSegment) {
+ hasLegalMerges = true;
break;
}
}
- // There can be more segments if we can't merge docs because they are balanced between segments.
- // At least the
- // 2 smallest segments should be mergeable.
- // should be 2 segments to merge
- int maxDocsPerSegment = tmp.getMaxAllowedDocs(infos.totalMaxDoc(), totalDelCount);
- List segmentDocs =
- infos.asList().stream()
- .map(info -> info.info.maxDoc() - info.getDelCount())
- .sorted()
- .toList();
- boolean eligibleDocsMerge =
- segmentDocs.size() >= 2 && segmentDocs.get(0) + segmentDocs.get(1) < maxDocsPerSegment;
-
int numSegments = infos.asList().size();
assertTrue(
String.format(
@@ -154,7 +157,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
delPercentage,
tmp.getDeletesPctAllowed(),
tmp.getTargetSearchConcurrency()),
- numSegments <= allowedSegCount || hasBalancedMerges == false || eligibleDocsMerge == false);
+ numSegments <= allowedSegCount || hasLegalMerges == false);
}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestCharObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestCharObjectHashMap.java
index 4cc036dcfe6..f9a1a259ce8 100644
--- a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestCharObjectHashMap.java
+++ b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestCharObjectHashMap.java
@@ -17,6 +17,8 @@
package org.apache.lucene.internal.hppc;
+import static org.apache.lucene.internal.hppc.TestIntObjectHashMap.toList;
+
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
@@ -24,6 +26,8 @@ import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
import org.junit.After;
import org.junit.Test;
@@ -66,13 +70,6 @@ public class TestCharObjectHashMap extends LuceneTestCase {
assertArrayEquals(elements, array);
}
- /** Check if the array's content is identical to a given sequence of elements. */
- private static void assertSortedListEquals(Object[] array, Object... elements) {
- assertEquals(elements.length, array.length);
- Arrays.sort(array);
- assertArrayEquals(elements, array);
- }
-
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
@@ -603,13 +600,15 @@ public class TestCharObjectHashMap extends LuceneTestCase {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
- assertSortedListEquals(map.values().toArray(), value1, value2, value3);
+ MatcherAssert.assertThat(
+ toList(map.values()), Matchers.containsInAnyOrder(value1, value2, value3));
map.clear();
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value2);
- assertSortedListEquals(map.values().toArray(), value1, value2, value2);
+ MatcherAssert.assertThat(
+ toList(map.values()), Matchers.containsInAnyOrder(value1, value2, value2));
}
/* */
diff --git a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestIntObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestIntObjectHashMap.java
index 6c6c0872ede..4144300ba55 100644
--- a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestIntObjectHashMap.java
+++ b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestIntObjectHashMap.java
@@ -18,12 +18,15 @@
package org.apache.lucene.internal.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.junit.Assert;
import org.junit.Test;
/**
@@ -66,10 +69,8 @@ public class TestIntObjectHashMap extends LuceneTestCase {
}
/** Check if the array's content is identical to a given sequence of elements. */
- private static void assertSortedListEquals(Object[] array, Object... elements) {
- assertEquals(elements.length, array.length);
- Arrays.sort(array);
- assertArrayEquals(elements, array);
+ private static void assertSortedListEquals(List array, Object... elements) {
+ Assert.assertEquals(Arrays.asList(elements), array.stream().sorted().toList());
}
private final int value0 = vcast(0);
@@ -584,13 +585,21 @@ public class TestIntObjectHashMap extends LuceneTestCase {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
- assertSortedListEquals(map.values().toArray(), value1, value2, value3);
+ assertSortedListEquals(toList(map.values()), value1, value2, value3);
map.clear();
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value2);
- assertSortedListEquals(map.values().toArray(), value1, value2, value2);
+ assertSortedListEquals(toList(map.values()), value1, value2, value2);
+ }
+
+ static List toList(Iterable> values) {
+ ArrayList list = new ArrayList<>();
+ for (var c : values) {
+ list.add(c.value);
+ }
+ return list;
}
/* */
diff --git a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestLongObjectHashMap.java b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestLongObjectHashMap.java
index f5d6176e24b..df66561197d 100644
--- a/lucene/core/src/test/org/apache/lucene/internal/hppc/TestLongObjectHashMap.java
+++ b/lucene/core/src/test/org/apache/lucene/internal/hppc/TestLongObjectHashMap.java
@@ -17,6 +17,8 @@
package org.apache.lucene.internal.hppc;
+import static org.apache.lucene.internal.hppc.TestIntObjectHashMap.toList;
+
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
@@ -24,6 +26,8 @@ import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
import org.junit.Test;
/**
@@ -65,13 +69,6 @@ public class TestLongObjectHashMap extends LuceneTestCase {
assertArrayEquals(elements, array);
}
- /** Check if the array's content is identical to a given sequence of elements. */
- private static void assertSortedListEquals(Object[] array, Object... elements) {
- assertEquals(elements.length, array.length);
- Arrays.sort(array);
- assertArrayEquals(elements, array);
- }
-
private final int value0 = vcast(0);
private final int value1 = vcast(1);
private final int value2 = vcast(2);
@@ -585,13 +582,15 @@ public class TestLongObjectHashMap extends LuceneTestCase {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
- assertSortedListEquals(map.values().toArray(), value1, value2, value3);
+ MatcherAssert.assertThat(
+ toList(map.values()), Matchers.containsInAnyOrder(value1, value2, value3));
map.clear();
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value2);
- assertSortedListEquals(map.values().toArray(), value1, value2, value2);
+ MatcherAssert.assertThat(
+ toList(map.values()), Matchers.containsInAnyOrder(value1, value2, value2));
}
/* */
diff --git a/lucene/core/src/test/org/apache/lucene/internal/vectorization/TestVectorScorer.java b/lucene/core/src/test/org/apache/lucene/internal/vectorization/TestVectorScorer.java
index da9c312ef96..bc3b6813a5b 100644
--- a/lucene/core/src/test/org/apache/lucene/internal/vectorization/TestVectorScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/internal/vectorization/TestVectorScorer.java
@@ -16,6 +16,7 @@
*/
package org.apache.lucene.internal.vectorization;
+import static java.util.Locale.ROOT;
import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
@@ -24,6 +25,8 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRO
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
@@ -39,6 +42,8 @@ import java.util.stream.IntStream;
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
+import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
@@ -47,7 +52,6 @@ import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
-import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.junit.BeforeClass;
@@ -329,12 +333,63 @@ public class TestVectorScorer extends LuceneTestCase {
}
}
- RandomAccessVectorValues vectorValues(
- int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
+ // Tests that the FlatVectorsScorer handles float vectors correctly.
+ public void testWithFloatValues() throws IOException {
+ try (Directory dir = new MMapDirectory(createTempDir("testWithFloatValues"))) {
+ final String fileName = "floatvalues";
+ try (IndexOutput out = dir.createOutput(fileName, IOContext.DEFAULT)) {
+ var vec = floatToByteArray(1f); // single vector, with one dimension
+ out.writeBytes(vec, 0, vec.length);
+ }
+
+ try (IndexInput in = dir.openInput(fileName, IOContext.DEFAULT)) {
+ for (int times = 0; times < TIMES; times++) {
+ for (var sim : List.of(COSINE, EUCLIDEAN, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT)) {
+ var vectorValues = floatVectorValues(1, 1, in, sim);
+ assert vectorValues.getEncoding().byteSize == 4;
+
+ var supplier1 = DEFAULT_SCORER.getRandomVectorScorerSupplier(sim, vectorValues);
+ var supplier2 = MEMSEG_SCORER.getRandomVectorScorerSupplier(sim, vectorValues);
+ // these assertion assumes that the supplier and scorer's toString will have float
+ // in it, since it's based on float vectors.
+ assertTrue(supplier1.toString().toLowerCase(ROOT).contains("float"));
+ assertTrue(supplier2.toString().toLowerCase(ROOT).contains("float"));
+ assertTrue(supplier1.scorer(0).toString().toLowerCase(ROOT).contains("float"));
+ assertTrue(supplier2.scorer(0).toString().toLowerCase(ROOT).contains("float"));
+ float expected = supplier1.scorer(0).score(0);
+ assertEquals(supplier2.scorer(0).score(0), expected, DELTA);
+
+ var scorer1 = DEFAULT_SCORER.getRandomVectorScorer(sim, vectorValues, new float[] {1f});
+ var scorer2 = MEMSEG_SCORER.getRandomVectorScorer(sim, vectorValues, new float[] {1f});
+ assertTrue(scorer1.toString().toLowerCase(ROOT).contains("float"));
+ assertTrue(scorer2.toString().toLowerCase(ROOT).contains("float"));
+ expected = scorer1.score(0);
+ assertEquals(scorer2.score(0), expected, DELTA);
+
+ expectThrows(
+ Throwable.class,
+ () -> DEFAULT_SCORER.getRandomVectorScorer(sim, vectorValues, new byte[] {1}));
+ expectThrows(
+ Throwable.class,
+ () -> MEMSEG_SCORER.getRandomVectorScorer(sim, vectorValues, new byte[] {1}));
+ }
+ }
+ }
+ }
+ }
+
+ KnnVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim)
+ throws IOException {
return new OffHeapByteVectorValues.DenseOffHeapVectorValues(
dims, size, in.slice("byteValues", 0, in.length()), dims, MEMSEG_SCORER, sim);
}
+ KnnVectorValues floatVectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim)
+ throws IOException {
+ return new OffHeapFloatVectorValues.DenseOffHeapVectorValues(
+ dims, size, in.slice("floatValues", 0, in.length()), dims, MEMSEG_SCORER, sim);
+ }
+
// creates the vector based on the given ordinal, which is reproducible given the ord and dims
static byte[] vector(int ord, int dims) {
var random = new Random(Objects.hash(ord, dims));
@@ -355,6 +410,11 @@ public class TestVectorScorer extends LuceneTestCase {
}
}
+ /** Converts a float value to a byte array. */
+ public static byte[] floatToByteArray(float value) {
+ return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(value).array();
+ }
+
static int randomIntBetween(int minInclusive, int maxInclusive) {
return RandomNumbers.randomIntBetween(random(), minInclusive, maxInclusive);
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
index 21a33f9ca3e..afa150e387f 100644
--- a/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/search/BaseKnnVectorQueryTestCase.java
@@ -38,6 +38,7 @@ import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.QueryTimeout;
@@ -740,7 +741,7 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
LeafReader leafReader = getOnlyLeafReader(reader);
FieldInfo fi = leafReader.getFieldInfos().fieldInfo("field");
assertNotNull(fi);
- DocIdSetIterator vectorValues;
+ KnnVectorValues vectorValues;
switch (fi.getVectorEncoding()) {
case BYTE:
vectorValues = leafReader.getByteVectorValues("field");
@@ -752,7 +753,7 @@ abstract class BaseKnnVectorQueryTestCase extends LuceneTestCase {
throw new AssertionError();
}
assertNotNull(vectorValues);
- assertEquals(NO_MORE_DOCS, vectorValues.nextDoc());
+ assertEquals(NO_MORE_DOCS, vectorValues.iterator().nextDoc());
}
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
index d816d419c4c..56160971931 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
@@ -23,44 +23,28 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
public void testSimple() {
MaxScoreAccumulator acc = new MaxScoreAccumulator();
acc.accumulate(0, 0f);
- assertEquals(0f, acc.get().score(), 0);
- assertEquals(0, acc.get().docId(), 0);
+ assertEquals(0f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(0, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(10, 0f);
- assertEquals(0f, acc.get().score(), 0);
- assertEquals(0, acc.get().docId(), 0);
+ assertEquals(0f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(0, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(100, 1000f);
- assertEquals(1000f, acc.get().score(), 0);
- assertEquals(100, acc.get().docId(), 0);
+ assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(100, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(1000, 5f);
- assertEquals(1000f, acc.get().score(), 0);
- assertEquals(100, acc.get().docId(), 0);
+ assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(100, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(99, 1000f);
- assertEquals(1000f, acc.get().score(), 0);
- assertEquals(99, acc.get().docId(), 0);
+ assertEquals(1000f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(99, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(1000, 1001f);
- assertEquals(1001f, acc.get().score(), 0);
- assertEquals(1000, acc.get().docId(), 0);
+ assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(1000, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(10, 1001f);
- assertEquals(1001f, acc.get().score(), 0);
- assertEquals(10, acc.get().docId(), 0);
+ assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(10, MaxScoreAccumulator.docId(acc.getRaw()), 0);
acc.accumulate(100, 1001f);
- assertEquals(1001f, acc.get().score(), 0);
- assertEquals(10, acc.get().docId(), 0);
- }
-
- public void testRandom() {
- MaxScoreAccumulator acc = new MaxScoreAccumulator();
- int numDocs = atLeast(100);
- int maxDocs = atLeast(10000);
- MaxScoreAccumulator.DocAndScore max = new MaxScoreAccumulator.DocAndScore(-1, -1);
- for (int i = 0; i < numDocs; i++) {
- MaxScoreAccumulator.DocAndScore res =
- new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
- acc.accumulate(res.docId(), res.score());
- if (res.compareTo(max) > 0) {
- max = res;
- }
- }
- assertEquals(max, acc.get());
+ assertEquals(1001f, MaxScoreAccumulator.toScore(acc.getRaw()), 0);
+ assertEquals(10, MaxScoreAccumulator.docId(acc.getRaw()), 0);
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java
index c6920403c91..6973cc0025a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreBulkScorer.java
@@ -38,23 +38,6 @@ import org.apache.lucene.util.Bits;
// These basic tests are similar to some of the tests in TestWANDScorer, and may not need to be kept
public class TestMaxScoreBulkScorer extends LuceneTestCase {
- private static class CapMaxScoreWindowAt2048Scorer extends FilterScorer {
-
- public CapMaxScoreWindowAt2048Scorer(Scorer in) {
- super(in);
- }
-
- @Override
- public int advanceShallow(int target) throws IOException {
- return Math.min(target | 0x7FF, in.advanceShallow(target));
- }
-
- @Override
- public float getMaxScore(int upTo) throws IOException {
- return in.getMaxScore(upTo);
- }
- }
-
private void writeDocuments(Directory dir) throws IOException {
try (IndexWriter w =
new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()))) {
@@ -96,12 +79,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@@ -168,12 +149,10 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
BulkScorer scorer =
new MaxScoreBulkScorer(context.reader().maxDoc(), Arrays.asList(scorer1, scorer2));
@@ -237,17 +216,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(
@@ -317,17 +293,14 @@ public class TestMaxScoreBulkScorer extends LuceneTestCase {
searcher
.createWeight(searcher.rewrite(clause1), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer1 = new CapMaxScoreWindowAt2048Scorer(scorer1);
Scorer scorer2 =
searcher
.createWeight(searcher.rewrite(clause2), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer2 = new CapMaxScoreWindowAt2048Scorer(scorer2);
Scorer scorer3 =
searcher
.createWeight(searcher.rewrite(clause3), ScoreMode.TOP_SCORES, 1f)
.scorer(context);
- scorer3 = new CapMaxScoreWindowAt2048Scorer(scorer3);
BulkScorer scorer =
new MaxScoreBulkScorer(
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java
index 7cfd0c5adde..b6503021617 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTermInSetQuery.java
@@ -52,6 +52,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -527,4 +528,19 @@ public class TestTermInSetQuery extends LuceneTestCase {
}
});
}
+
+ public void testTermsIterator() throws IOException {
+ TermInSetQuery empty = new TermInSetQuery("field", Collections.emptyList());
+ BytesRefIterator it = empty.getBytesRefIterator();
+ assertNull(it.next());
+
+ TermInSetQuery query =
+ new TermInSetQuery(
+ "field", List.of(newBytesRef("term1"), newBytesRef("term2"), newBytesRef("term3")));
+ it = query.getBytesRefIterator();
+ assertEquals(newBytesRef("term1"), it.next());
+ assertEquals(newBytesRef("term2"), it.next());
+ assertEquals(newBytesRef("term3"), it.next());
+ assertNull(it.next());
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTimeLimitingBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestTimeLimitingBulkScorer.java
index cce82cd34ac..de5512a904a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTimeLimitingBulkScorer.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTimeLimitingBulkScorer.java
@@ -113,7 +113,7 @@ public class TestTimeLimitingBulkScorer extends LuceneTestCase {
private static QueryTimeout countingQueryTimeout(int timeallowed) {
return new QueryTimeout() {
- static int counter = 0;
+ int counter = 0;
@Override
public boolean shouldExit() {
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
index d4df59f2f72..14b51ca214e 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
@@ -519,47 +519,47 @@ public class TestTopDocsCollector extends LuceneTestCase {
scorer.score = 3;
leafCollector.collect(0);
- assertNull(minValueChecker.get());
+ assertEquals(Long.MIN_VALUE, minValueChecker.getRaw());
assertNull(scorer.minCompetitiveScore);
scorer2.score = 6;
leafCollector2.collect(0);
- assertNull(minValueChecker.get());
+ assertEquals(Long.MIN_VALUE, minValueChecker.getRaw());
assertNull(scorer2.minCompetitiveScore);
scorer.score = 2;
leafCollector.collect(1);
- assertEquals(2f, minValueChecker.get().score(), 0f);
+ assertEquals(2f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.score = 9;
leafCollector2.collect(1);
- assertEquals(6f, minValueChecker.get().score(), 0f);
+ assertEquals(6f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(6f), scorer2.minCompetitiveScore, 0f);
scorer2.score = 7;
leafCollector2.collect(2);
- assertEquals(minValueChecker.get().score(), 7f, 0f);
+ assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer2.score = 1;
leafCollector2.collect(3);
- assertEquals(minValueChecker.get().score(), 7f, 0f);
+ assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f);
assertEquals(Math.nextUp(2f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer.score = 10;
leafCollector.collect(2);
- assertEquals(minValueChecker.get().score(), 7f, 0f);
+ assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 7f, 0f);
assertEquals(7f, scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
scorer.score = 11;
leafCollector.collect(3);
- assertEquals(minValueChecker.get().score(), 10, 0f);
+ assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 10, 0f);
assertEquals(Math.nextUp(10f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
@@ -571,19 +571,19 @@ public class TestTopDocsCollector extends LuceneTestCase {
scorer3.score = 1f;
leafCollector3.collect(0);
- assertEquals(10f, minValueChecker.get().score(), 0f);
+ assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
scorer.score = 11;
leafCollector.collect(4);
- assertEquals(11f, minValueChecker.get().score(), 0f);
+ assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(10f), scorer3.minCompetitiveScore, 0f);
scorer3.score = 2f;
leafCollector3.collect(1);
- assertEquals(minValueChecker.get().score(), 11f, 0f);
+ assertEquals(MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 11f, 0f);
assertEquals(Math.nextUp(11f), scorer.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(7f), scorer2.minCompetitiveScore, 0f);
assertEquals(Math.nextUp(11f), scorer3.minCompetitiveScore, 0f);
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
index cd6f0ac079d..c507eb0f647 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
@@ -577,47 +577,47 @@ public class TestTopFieldCollector extends LuceneTestCase {
scorer.score = 3;
leafCollector.collect(0);
- assertNull(minValueChecker.get());
+ assertEquals(Long.MIN_VALUE, minValueChecker.getRaw());
assertNull(scorer.minCompetitiveScore);
scorer2.score = 6;
leafCollector2.collect(0);
- assertNull(minValueChecker.get());
+ assertEquals(Long.MIN_VALUE, minValueChecker.getRaw());
assertNull(scorer2.minCompetitiveScore);
scorer.score = 2;
leafCollector.collect(1);
- assertEquals(2f, minValueChecker.get().score(), 0f);
+ assertEquals(2f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertNull(scorer2.minCompetitiveScore);
scorer2.score = 9;
leafCollector2.collect(1);
- assertEquals(6f, minValueChecker.get().score(), 0f);
+ assertEquals(6f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(6f, scorer2.minCompetitiveScore, 0f);
scorer2.score = 7;
leafCollector2.collect(2);
- assertEquals(7f, minValueChecker.get().score(), 0f);
+ assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer2.score = 1;
leafCollector2.collect(3);
- assertEquals(7f, minValueChecker.get().score(), 0f);
+ assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(2f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer.score = 10;
leafCollector.collect(2);
- assertEquals(7f, minValueChecker.get().score(), 0f);
+ assertEquals(7f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(7f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
scorer.score = 11;
leafCollector.collect(3);
- assertEquals(10f, minValueChecker.get().score(), 0f);
+ assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(10f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
@@ -629,19 +629,19 @@ public class TestTopFieldCollector extends LuceneTestCase {
scorer3.score = 1f;
leafCollector3.collect(0);
- assertEquals(10f, minValueChecker.get().score(), 0f);
+ assertEquals(10f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
scorer.score = 11;
leafCollector.collect(4);
- assertEquals(11f, minValueChecker.get().score(), 0f);
+ assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(11f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
assertEquals(10f, scorer3.minCompetitiveScore, 0f);
scorer3.score = 2f;
leafCollector3.collect(1);
- assertEquals(11f, minValueChecker.get().score(), 0f);
+ assertEquals(11f, MaxScoreAccumulator.toScore(minValueChecker.getRaw()), 0f);
assertEquals(11f, scorer.minCompetitiveScore, 0f);
assertEquals(7f, scorer2.minCompetitiveScore, 0f);
assertEquals(11f, scorer3.minCompetitiveScore, 0f);
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestVersion.java b/lucene/core/src/test/org/apache/lucene/util/TestVersion.java
index d34ee2f78db..b3a69b48fa4 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestVersion.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestVersion.java
@@ -33,24 +33,27 @@ public class TestVersion extends LuceneTestCase {
assertTrue("LATEST must be always onOrAfter(" + v + ")", Version.LATEST.onOrAfter(v));
}
}
- assertTrue(Version.LUCENE_10_0_0.onOrAfter(Version.LUCENE_9_0_0));
+ assertTrue(Version.LUCENE_11_0_0.onOrAfter(Version.fromBits(9, 0, 0)));
+ assertTrue(Version.LUCENE_11_0_0.onOrAfter(Version.LUCENE_10_0_0));
+ assertTrue(Version.LUCENE_11_0_0.onOrAfter(Version.LUCENE_10_1_0));
}
public void testToString() {
- assertEquals("9.0.0", Version.LUCENE_9_0_0.toString());
+ assertEquals("9.0.0", Version.fromBits(9, 0, 0).toString());
assertEquals("10.0.0", Version.LUCENE_10_0_0.toString());
+ assertEquals("10.1.0", Version.LUCENE_10_1_0.toString());
+ assertEquals("11.0.0", Version.LUCENE_11_0_0.toString());
}
public void testParseLeniently() throws Exception {
+ assertEquals(Version.LUCENE_11_0_0, Version.parseLeniently("11.0"));
+ assertEquals(Version.LUCENE_11_0_0, Version.parseLeniently("11.0.0"));
+ assertEquals(Version.LUCENE_11_0_0, Version.parseLeniently("LUCENE_11_0"));
+ assertEquals(Version.LUCENE_11_0_0, Version.parseLeniently("LUCENE_11_0_0"));
assertEquals(Version.LUCENE_10_0_0, Version.parseLeniently("10.0"));
assertEquals(Version.LUCENE_10_0_0, Version.parseLeniently("10.0.0"));
assertEquals(Version.LUCENE_10_0_0, Version.parseLeniently("LUCENE_10_0"));
assertEquals(Version.LUCENE_10_0_0, Version.parseLeniently("LUCENE_10_0_0"));
- assertEquals(Version.LUCENE_9_0_0, Version.parseLeniently("9.0"));
- assertEquals(Version.LUCENE_9_0_0, Version.parseLeniently("9.0.0"));
- assertEquals(Version.LUCENE_9_0_0, Version.parseLeniently("LUCENE_90"));
- assertEquals(Version.LUCENE_9_0_0, Version.parseLeniently("LUCENE_9_0"));
- assertEquals(Version.LUCENE_9_0_0, Version.parseLeniently("LUCENE_9_0_0"));
assertEquals(Version.LATEST, Version.parseLeniently("LATEST"));
assertEquals(Version.LATEST, Version.parseLeniently("latest"));
@@ -108,7 +111,7 @@ public class TestVersion extends LuceneTestCase {
public void testParse() throws Exception {
assertEquals(Version.LUCENE_10_0_0, Version.parse("10.0.0"));
- assertEquals(Version.LUCENE_9_0_0, Version.parse("9.0.0"));
+ assertEquals(Version.LUCENE_11_0_0, Version.parse("11.0.0"));
// Version does not pass judgement on the major version:
assertEquals(1, Version.parse("1.0").major);
@@ -116,7 +119,9 @@ public class TestVersion extends LuceneTestCase {
}
public void testForwardsCompatibility() throws Exception {
- assertTrue(Version.parse("9.10.20").onOrAfter(Version.LUCENE_9_0_0));
+ assertTrue(Version.parse("11.10.20").onOrAfter(Version.LUCENE_11_0_0));
+ assertTrue(Version.parse("10.10.20").onOrAfter(Version.LUCENE_10_0_0));
+ assertTrue(Version.parse("9.10.20").onOrAfter(Version.fromBits(9, 0, 0)));
}
public void testParseExceptions() {
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/AbstractMockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/AbstractMockVectorValues.java
deleted file mode 100644
index 54de3919b51..00000000000
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/AbstractMockVectorValues.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.util.hnsw;
-
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
-
-import java.io.IOException;
-import org.apache.lucene.util.BytesRef;
-
-abstract class AbstractMockVectorValues implements RandomAccessVectorValues {
-
- protected final int dimension;
- protected final T[] denseValues;
- protected final T[] values;
- protected final int numVectors;
- protected final BytesRef binaryValue;
-
- protected int pos = -1;
-
- AbstractMockVectorValues(T[] values, int dimension, T[] denseValues, int numVectors) {
- this.dimension = dimension;
- this.values = values;
- this.denseValues = denseValues;
- // used by tests that build a graph from bytes rather than floats
- binaryValue = new BytesRef(dimension);
- binaryValue.length = dimension;
- this.numVectors = numVectors;
- }
-
- @Override
- public int size() {
- return numVectors;
- }
-
- @Override
- public int dimension() {
- return dimension;
- }
-
- public T vectorValue(int targetOrd) {
- return denseValues[targetOrd];
- }
-
- @Override
- public abstract AbstractMockVectorValues copy();
-
- public abstract T vectorValue() throws IOException;
-
- private boolean seek(int target) {
- if (target >= 0 && target < values.length && values[target] != null) {
- pos = target;
- return true;
- } else {
- return false;
- }
- }
-
- public int docID() {
- return pos;
- }
-
- public int nextDoc() {
- return advance(pos + 1);
- }
-
- public int advance(int target) {
- while (++pos < values.length) {
- if (seek(pos)) {
- return pos;
- }
- }
- return NO_MORE_DOCS;
- }
-}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/HnswGraphTestCase.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/HnswGraphTestCase.java
index 4a6794b4994..41aeef2e5c8 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/HnswGraphTestCase.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/HnswGraphTestCase.java
@@ -56,6 +56,7 @@ import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredFields;
@@ -97,33 +98,28 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
abstract T randomVector(int dim);
- abstract AbstractMockVectorValues vectorValues(int size, int dimension);
+ abstract KnnVectorValues vectorValues(int size, int dimension);
- abstract AbstractMockVectorValues vectorValues(float[][] values);
+ abstract KnnVectorValues vectorValues(float[][] values);
- abstract AbstractMockVectorValues vectorValues(LeafReader reader, String fieldName)
- throws IOException;
+ abstract KnnVectorValues vectorValues(LeafReader reader, String fieldName) throws IOException;
- abstract AbstractMockVectorValues vectorValues(
- int size,
- int dimension,
- AbstractMockVectorValues pregeneratedVectorValues,
- int pregeneratedOffset);
+ abstract KnnVectorValues vectorValues(
+ int size, int dimension, KnnVectorValues pregeneratedVectorValues, int pregeneratedOffset);
abstract Field knnVectorField(String name, T vector, VectorSimilarityFunction similarityFunction);
- abstract RandomAccessVectorValues circularVectorValues(int nDoc);
+ abstract KnnVectorValues circularVectorValues(int nDoc);
abstract T getTargetVector();
- protected RandomVectorScorerSupplier buildScorerSupplier(RandomAccessVectorValues vectors)
+ protected RandomVectorScorerSupplier buildScorerSupplier(KnnVectorValues vectors)
throws IOException {
return flatVectorScorer.getRandomVectorScorerSupplier(similarityFunction, vectors);
}
- protected RandomVectorScorer buildScorer(RandomAccessVectorValues vectors, T query)
- throws IOException {
- RandomAccessVectorValues vectorsCopy = vectors.copy();
+ protected RandomVectorScorer buildScorer(KnnVectorValues vectors, T query) throws IOException {
+ KnnVectorValues vectorsCopy = vectors.copy();
return switch (getVectorEncoding()) {
case BYTE ->
flatVectorScorer.getRandomVectorScorer(similarityFunction, vectorsCopy, (byte[]) query);
@@ -134,6 +130,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
// Tests writing segments of various sizes and merging to ensure there are no errors
// in the HNSW graph merging logic.
+ @SuppressWarnings("unchecked")
public void testRandomReadWriteAndMerge() throws IOException {
int dim = random().nextInt(100) + 1;
int[] segmentSizes =
@@ -148,7 +145,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int M = random().nextInt(4) + 2;
int beamWidth = random().nextInt(10) + 5;
long seed = random().nextLong();
- AbstractMockVectorValues vectors = vectorValues(numVectors, dim);
+ KnnVectorValues vectors = vectorValues(numVectors, dim);
HnswGraphBuilder.randSeed = seed;
try (Directory dir = newDirectory()) {
@@ -173,7 +170,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
for (int i = 0; i < segmentSizes.length; i++) {
int size = segmentSizes[i];
- while (vectors.nextDoc() < size) {
+ for (int ord = 0; ord < size; ord++) {
if (isSparse[i] && random().nextBoolean()) {
int d = random().nextInt(10) + 1;
for (int j = 0; j < d; j++) {
@@ -182,8 +179,24 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
}
Document doc = new Document();
- doc.add(knnVectorField("field", vectors.vectorValue(), similarityFunction));
- doc.add(new StringField("id", Integer.toString(vectors.docID()), Field.Store.NO));
+ switch (vectors.getEncoding()) {
+ case BYTE -> {
+ doc.add(
+ knnVectorField(
+ "field",
+ (T) ((ByteVectorValues) vectors).vectorValue(ord),
+ similarityFunction));
+ }
+ case FLOAT32 -> {
+ doc.add(
+ knnVectorField(
+ "field",
+ (T) ((FloatVectorValues) vectors).vectorValue(ord),
+ similarityFunction));
+ }
+ }
+ ;
+ doc.add(new StringField("id", Integer.toString(vectors.ordToDoc(ord)), Field.Store.NO));
iw.addDocument(doc);
}
iw.commit();
@@ -199,13 +212,26 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
try (IndexReader reader = DirectoryReader.open(dir)) {
for (LeafReaderContext ctx : reader.leaves()) {
- AbstractMockVectorValues values = vectorValues(ctx.reader(), "field");
+ KnnVectorValues values = vectorValues(ctx.reader(), "field");
assertEquals(dim, values.dimension());
}
}
}
}
+ @SuppressWarnings("unchecked")
+ private T vectorValue(KnnVectorValues vectors, int ord) throws IOException {
+ switch (vectors.getEncoding()) {
+ case BYTE -> {
+ return (T) ((ByteVectorValues) vectors).vectorValue(ord);
+ }
+ case FLOAT32 -> {
+ return (T) ((FloatVectorValues) vectors).vectorValue(ord);
+ }
+ }
+ throw new AssertionError("unknown encoding " + vectors.getEncoding());
+ }
+
// test writing out and reading in a graph gives the expected graph
public void testReadWrite() throws IOException {
int dim = random().nextInt(100) + 1;
@@ -213,8 +239,8 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int M = random().nextInt(4) + 2;
int beamWidth = random().nextInt(10) + 5;
long seed = random().nextLong();
- AbstractMockVectorValues vectors = vectorValues(nDoc, dim);
- AbstractMockVectorValues v2 = vectors.copy(), v3 = vectors.copy();
+ KnnVectorValues vectors = vectorValues(nDoc, dim);
+ KnnVectorValues v2 = vectors.copy(), v3 = vectors.copy();
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, M, beamWidth, seed);
HnswGraph hnsw = builder.build(vectors.size());
@@ -242,15 +268,16 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
});
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
- while (v2.nextDoc() != NO_MORE_DOCS) {
- while (indexedDoc < v2.docID()) {
+ KnnVectorValues.DocIndexIterator it2 = v2.iterator();
+ while (it2.nextDoc() != NO_MORE_DOCS) {
+ while (indexedDoc < it2.docID()) {
// increment docId in the index by adding empty documents
iw.addDocument(new Document());
indexedDoc++;
}
Document doc = new Document();
- doc.add(knnVectorField("field", v2.vectorValue(), similarityFunction));
- doc.add(new StoredField("id", v2.docID()));
+ doc.add(knnVectorField("field", vectorValue(v2, it2.index()), similarityFunction));
+ doc.add(new StoredField("id", it2.docID()));
iw.addDocument(doc);
nVec++;
indexedDoc++;
@@ -258,7 +285,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
try (IndexReader reader = DirectoryReader.open(dir)) {
for (LeafReaderContext ctx : reader.leaves()) {
- AbstractMockVectorValues values = vectorValues(ctx.reader(), "field");
+ KnnVectorValues values = vectorValues(ctx.reader(), "field");
assertEquals(dim, values.dimension());
assertEquals(nVec, values.size());
assertEquals(indexedDoc, ctx.reader().maxDoc());
@@ -280,7 +307,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
public void testSortedAndUnsortedIndicesReturnSameResults() throws IOException {
int dim = random().nextInt(10) + 3;
int nDoc = random().nextInt(200) + 100;
- AbstractMockVectorValues vectors = vectorValues(nDoc, dim);
+ KnnVectorValues vectors = vectorValues(nDoc, dim);
int M = random().nextInt(10) + 5;
int beamWidth = random().nextInt(10) + 10;
@@ -323,15 +350,15 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int indexedDoc = 0;
try (IndexWriter iw = new IndexWriter(dir, iwc);
IndexWriter iw2 = new IndexWriter(dir2, iwc2)) {
- while (vectors.nextDoc() != NO_MORE_DOCS) {
- while (indexedDoc < vectors.docID()) {
+ for (int ord = 0; ord < vectors.size(); ord++) {
+ while (indexedDoc < vectors.ordToDoc(ord)) {
// increment docId in the index by adding empty documents
iw.addDocument(new Document());
indexedDoc++;
}
Document doc = new Document();
- doc.add(knnVectorField("vector", vectors.vectorValue(), similarityFunction));
- doc.add(new StoredField("id", vectors.docID()));
+ doc.add(knnVectorField("vector", vectorValue(vectors, ord), similarityFunction));
+ doc.add(new StoredField("id", vectors.ordToDoc(ord)));
doc.add(new NumericDocValuesField("sortkey", random().nextLong()));
iw.addDocument(doc);
iw2.addDocument(doc);
@@ -461,7 +488,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
public void testAknnDiverse() throws IOException {
int nDoc = 100;
similarityFunction = VectorSimilarityFunction.DOT_PRODUCT;
- RandomAccessVectorValues vectors = circularVectorValues(nDoc);
+ KnnVectorValues vectors = circularVectorValues(nDoc);
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 10, 100, random().nextInt());
OnHeapHnswGraph hnsw = builder.build(vectors.size());
@@ -493,7 +520,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
@SuppressWarnings("unchecked")
public void testSearchWithAcceptOrds() throws IOException {
int nDoc = 100;
- RandomAccessVectorValues vectors = circularVectorValues(nDoc);
+ KnnVectorValues vectors = circularVectorValues(nDoc);
similarityFunction = VectorSimilarityFunction.DOT_PRODUCT;
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 16, 100, random().nextInt());
@@ -518,7 +545,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
@SuppressWarnings("unchecked")
public void testSearchWithSelectiveAcceptOrds() throws IOException {
int nDoc = 100;
- RandomAccessVectorValues vectors = circularVectorValues(nDoc);
+ KnnVectorValues vectors = circularVectorValues(nDoc);
similarityFunction = VectorSimilarityFunction.DOT_PRODUCT;
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 16, 100, random().nextInt());
@@ -552,13 +579,13 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int dim = atLeast(10);
long seed = random().nextLong();
- AbstractMockVectorValues initializerVectors = vectorValues(initializerSize, dim);
+ KnnVectorValues initializerVectors = vectorValues(initializerSize, dim);
RandomVectorScorerSupplier initialscorerSupplier = buildScorerSupplier(initializerVectors);
HnswGraphBuilder initializerBuilder =
HnswGraphBuilder.create(initialscorerSupplier, 10, 30, seed);
OnHeapHnswGraph initializerGraph = initializerBuilder.build(initializerVectors.size());
- AbstractMockVectorValues finalVectorValues =
+ KnnVectorValues finalVectorValues =
vectorValues(totalSize, dim, initializerVectors, docIdOffset);
int[] initializerOrdMap =
createOffsetOrdinalMap(initializerSize, finalVectorValues, docIdOffset);
@@ -598,13 +625,13 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int dim = atLeast(10);
long seed = random().nextLong();
- AbstractMockVectorValues initializerVectors = vectorValues(initializerSize, dim);
+ KnnVectorValues initializerVectors = vectorValues(initializerSize, dim);
RandomVectorScorerSupplier initialscorerSupplier = buildScorerSupplier(initializerVectors);
HnswGraphBuilder initializerBuilder =
HnswGraphBuilder.create(initialscorerSupplier, 10, 30, seed);
OnHeapHnswGraph initializerGraph = initializerBuilder.build(initializerVectors.size());
- AbstractMockVectorValues finalVectorValues =
+ KnnVectorValues finalVectorValues =
vectorValues(totalSize, dim, initializerVectors.copy(), docIdOffset);
int[] initializerOrdMap =
createOffsetOrdinalMap(initializerSize, finalVectorValues, docIdOffset);
@@ -688,19 +715,17 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
private int[] createOffsetOrdinalMap(
- int docIdSize, AbstractMockVectorValues totalVectorValues, int docIdOffset) {
+ int docIdSize, KnnVectorValues totalVectorValues, int docIdOffset) throws IOException {
// Compute the offset for the ordinal map to be the number of non-null vectors in the total
- // vector values
- // before the docIdOffset
+ // vector values before the docIdOffset
int ordinalOffset = 0;
- while (totalVectorValues.nextDoc() < docIdOffset) {
+ KnnVectorValues.DocIndexIterator it = totalVectorValues.iterator();
+ while (it.nextDoc() < docIdOffset) {
ordinalOffset++;
}
int[] offsetOrdinalMap = new int[docIdSize];
- for (int curr = 0;
- totalVectorValues.docID() < docIdOffset + docIdSize;
- totalVectorValues.nextDoc()) {
+ for (int curr = 0; it.docID() < docIdOffset + docIdSize; it.nextDoc()) {
offsetOrdinalMap[curr] = ordinalOffset + curr++;
}
@@ -711,7 +736,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
public void testVisitedLimit() throws IOException {
int nDoc = 500;
similarityFunction = VectorSimilarityFunction.DOT_PRODUCT;
- RandomAccessVectorValues vectors = circularVectorValues(nDoc);
+ KnnVectorValues vectors = circularVectorValues(nDoc);
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 16, 100, random().nextInt());
OnHeapHnswGraph hnsw = builder.build(vectors.size());
@@ -746,7 +771,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
int M = randomIntBetween(4, 96);
similarityFunction = RandomizedTest.randomFrom(VectorSimilarityFunction.values());
- RandomAccessVectorValues vectors = vectorValues(size, dim);
+ KnnVectorValues vectors = vectorValues(size, dim);
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder =
@@ -771,7 +796,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
unitVector2d(0.77),
unitVector2d(0.6)
};
- AbstractMockVectorValues vectors = vectorValues(values);
+ KnnVectorValues vectors = vectorValues(values);
// First add nodes until everybody gets a full neighbor list
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 2, 10, random().nextInt());
@@ -825,7 +850,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
{10, 0, 0},
{0, 4, 0}
};
- AbstractMockVectorValues vectors = vectorValues(values);
+ KnnVectorValues vectors = vectorValues(values);
// First add nodes until everybody gets a full neighbor list
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 1, 10, random().nextInt());
@@ -855,7 +880,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
{0, 0, 20},
{0, 9, 0}
};
- AbstractMockVectorValues vectors = vectorValues(values);
+ KnnVectorValues vectors = vectorValues(values);
// First add nodes until everybody gets a full neighbor list
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 1, 10, random().nextInt());
@@ -891,7 +916,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
public void testRandom() throws IOException {
int size = atLeast(100);
int dim = atLeast(10);
- AbstractMockVectorValues vectors = vectorValues(size, dim);
+ KnnVectorValues vectors = vectorValues(size, dim);
int topK = 5;
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 10, 30, random().nextLong());
@@ -908,15 +933,13 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
TopDocs topDocs = actual.topDocs();
NeighborQueue expected = new NeighborQueue(topK, false);
for (int j = 0; j < size; j++) {
- if (vectors.vectorValue(j) != null && (acceptOrds == null || acceptOrds.get(j))) {
+ if (vectorValue(vectors, j) != null && (acceptOrds == null || acceptOrds.get(j))) {
if (getVectorEncoding() == VectorEncoding.BYTE) {
- assert query instanceof byte[];
expected.add(
- j, similarityFunction.compare((byte[]) query, (byte[]) vectors.vectorValue(j)));
+ j, similarityFunction.compare((byte[]) query, (byte[]) vectorValue(vectors, j)));
} else {
- assert query instanceof float[];
expected.add(
- j, similarityFunction.compare((float[]) query, (float[]) vectors.vectorValue(j)));
+ j, similarityFunction.compare((float[]) query, (float[]) vectorValue(vectors, j)));
}
if (expected.size() > topK) {
expected.pop();
@@ -940,7 +963,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
throws IOException, ExecutionException, InterruptedException, TimeoutException {
int size = atLeast(100);
int dim = atLeast(10);
- AbstractMockVectorValues vectors = vectorValues(size, dim);
+ KnnVectorValues vectors = vectorValues(size, dim);
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
HnswGraphBuilder builder = HnswGraphBuilder.create(scorerSupplier, 10, 30, random().nextLong());
OnHeapHnswGraph hnsw = builder.build(vectors.size());
@@ -1004,7 +1027,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
public void testConcurrentMergeBuilder() throws IOException {
int size = atLeast(1000);
int dim = atLeast(10);
- AbstractMockVectorValues vectors = vectorValues(size, dim);
+ KnnVectorValues vectors = vectorValues(size, dim);
RandomVectorScorerSupplier scorerSupplier = buildScorerSupplier(vectors);
ExecutorService exec = Executors.newFixedThreadPool(4, new NamedThreadFactory("hnswMerge"));
TaskExecutor taskExecutor = new TaskExecutor(exec);
@@ -1033,7 +1056,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
// Search for a large number of results
int topK = size - 1;
- AbstractMockVectorValues docVectors = vectorValues(size, dim);
+ KnnVectorValues docVectors = vectorValues(size, dim);
HnswGraph graph =
HnswGraphBuilder.create(buildScorerSupplier(docVectors), 10, 30, random().nextLong())
.build(size);
@@ -1047,8 +1070,8 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
};
- AbstractMockVectorValues queryVectors = vectorValues(1, dim);
- RandomVectorScorer queryScorer = buildScorer(docVectors, queryVectors.vectorValue(0));
+ KnnVectorValues queryVectors = vectorValues(1, dim);
+ RandomVectorScorer queryScorer = buildScorer(docVectors, vectorValue(queryVectors, 0));
KnnCollector collector = new TopKnnCollector(topK, Integer.MAX_VALUE);
HnswGraphSearcher.search(queryScorer, collector, singleLevelGraph, null);
@@ -1076,8 +1099,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
/** Returns vectors evenly distributed around the upper unit semicircle. */
- static class CircularFloatVectorValues extends FloatVectorValues
- implements RandomAccessVectorValues.Floats {
+ static class CircularFloatVectorValues extends FloatVectorValues {
private final int size;
private final float[] value;
@@ -1103,22 +1125,18 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
return size;
}
- @Override
public float[] vectorValue() {
return vectorValue(doc);
}
- @Override
public int docID() {
return doc;
}
- @Override
public int nextDoc() {
return advance(doc + 1);
}
- @Override
public int advance(int target) {
if (target >= 0 && target < size) {
doc = target;
@@ -1140,8 +1158,7 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
}
/** Returns vectors evenly distributed around the upper unit semicircle. */
- static class CircularByteVectorValues extends ByteVectorValues
- implements RandomAccessVectorValues.Bytes {
+ static class CircularByteVectorValues extends ByteVectorValues {
private final int size;
private final float[] value;
private final byte[] bValue;
@@ -1169,22 +1186,18 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
return size;
}
- @Override
public byte[] vectorValue() {
return vectorValue(doc);
}
- @Override
public int docID() {
return doc;
}
- @Override
public int nextDoc() {
return advance(doc + 1);
}
- @Override
public int advance(int target) {
if (target >= 0 && target < size) {
doc = target;
@@ -1227,27 +1240,25 @@ abstract class HnswGraphTestCase extends LuceneTestCase {
return neighbors;
}
- void assertVectorsEqual(AbstractMockVectorValues u, AbstractMockVectorValues v)
- throws IOException {
+ void assertVectorsEqual(KnnVectorValues u, KnnVectorValues v) throws IOException {
int uDoc, vDoc;
- while (true) {
- uDoc = u.nextDoc();
- vDoc = v.nextDoc();
+ assertEquals(u.size(), v.size());
+ for (int ord = 0; ord < u.size(); ord++) {
+ uDoc = u.ordToDoc(ord);
+ vDoc = v.ordToDoc(ord);
assertEquals(uDoc, vDoc);
- if (uDoc == NO_MORE_DOCS) {
- break;
- }
+ assertNotEquals(NO_MORE_DOCS, uDoc);
switch (getVectorEncoding()) {
case BYTE ->
assertArrayEquals(
"vectors do not match for doc=" + uDoc,
- (byte[]) u.vectorValue(),
- (byte[]) v.vectorValue());
+ (byte[]) vectorValue(u, ord),
+ (byte[]) vectorValue(v, ord));
case FLOAT32 ->
assertArrayEquals(
"vectors do not match for doc=" + uDoc,
- (float[]) u.vectorValue(),
- (float[]) v.vectorValue(),
+ (float[]) vectorValue(u, ord),
+ (float[]) vectorValue(v, ord),
1e-4f);
default ->
throw new IllegalArgumentException("unknown vector encoding: " + getVectorEncoding());
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockByteVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockByteVectorValues.java
index a3b17b9a621..4ab86c70781 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockByteVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockByteVectorValues.java
@@ -17,11 +17,17 @@
package org.apache.lucene.util.hnsw;
+import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
-class MockByteVectorValues extends AbstractMockVectorValues
- implements RandomAccessVectorValues.Bytes {
+class MockByteVectorValues extends ByteVectorValues {
+ private final int dimension;
+ private final byte[][] denseValues;
+ protected final byte[][] values;
+ private final int numVectors;
+ private final BytesRef binaryValue;
private final byte[] scratch;
static MockByteVectorValues fromValues(byte[][] values) {
@@ -43,10 +49,26 @@ class MockByteVectorValues extends AbstractMockVectorValues
}
MockByteVectorValues(byte[][] values, int dimension, byte[][] denseValues, int numVectors) {
- super(values, dimension, denseValues, numVectors);
+ this.dimension = dimension;
+ this.values = values;
+ this.denseValues = denseValues;
+ this.numVectors = numVectors;
+ // used by tests that build a graph from bytes rather than floats
+ binaryValue = new BytesRef(dimension);
+ binaryValue.length = dimension;
scratch = new byte[dimension];
}
+ @Override
+ public int size() {
+ return values.length;
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
@Override
public MockByteVectorValues copy() {
return new MockByteVectorValues(
@@ -55,20 +77,20 @@ class MockByteVectorValues extends AbstractMockVectorValues
@Override
public byte[] vectorValue(int ord) {
- return values[ord];
- }
-
- @Override
- public byte[] vectorValue() {
if (LuceneTestCase.random().nextBoolean()) {
- return values[pos];
+ return values[ord];
} else {
// Sometimes use the same scratch array repeatedly, mimicing what the codec will do.
// This should help us catch cases of aliasing where the same ByteVectorValues source is used
// twice in a
// single computation.
- System.arraycopy(values[pos], 0, scratch, 0, dimension);
+ System.arraycopy(values[ord], 0, scratch, 0, dimension);
return scratch;
}
}
+
+ @Override
+ public DocIndexIterator iterator() {
+ return createDenseIterator();
+ }
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
index f183f6c99a6..5411f2418de 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
@@ -17,11 +17,15 @@
package org.apache.lucene.util.hnsw;
+import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.ArrayUtil;
-class MockVectorValues extends AbstractMockVectorValues
- implements RandomAccessVectorValues.Floats {
+class MockVectorValues extends FloatVectorValues {
+ private final int dimension;
+ private final float[][] denseValues;
+ protected final float[][] values;
+ private final int numVectors;
private final float[] scratch;
static MockVectorValues fromValues(float[][] values) {
@@ -43,10 +47,23 @@ class MockVectorValues extends AbstractMockVectorValues
}
MockVectorValues(float[][] values, int dimension, float[][] denseValues, int numVectors) {
- super(values, dimension, denseValues, numVectors);
+ this.dimension = dimension;
+ this.values = values;
+ this.denseValues = denseValues;
+ this.numVectors = numVectors;
this.scratch = new float[dimension];
}
+ @Override
+ public int size() {
+ return values.length;
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
@Override
public MockVectorValues copy() {
return new MockVectorValues(
@@ -54,20 +71,20 @@ class MockVectorValues extends AbstractMockVectorValues