From 9f8e886702dff3f12811bfaca7e0430e8de4387e Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Fri, 7 Jun 2024 07:29:15 +0200 Subject: [PATCH 01/29] Move entry in CHANGES.txt --- lucene/CHANGES.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index dbff31d8c18..31c3d68f59b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -204,6 +204,10 @@ Changes in Backwards Compatibility Policy Other --------------------- +* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving + memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes + were cleared. (Ignacio Vera) + * LUCENE-10376: Roll up the loop in VInt/VLong in DataInput. (Guo Feng) * LUCENE-10253: The @BadApple annotation has been removed from the test @@ -267,10 +271,8 @@ Bug Fixes (No changes) Other ---------------------- -* GITHUB#13459: Merges all immutable attributes in FieldInfos.FieldNumbers into one Hashmap saving - memory when writing big indices. Fixes an exotic bug when calling clear where not all attributes - were cleared. (Ignacio Vera) +-------------------- +(No changes) ======================== Lucene 9.11.0 ======================= From c7a7d48d657f29ba43ea87d539d05cff07dc67ad Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 7 Jun 2024 13:27:10 +0200 Subject: [PATCH 02/29] Reduce the heap use of BKDReader instances (#13464) We consume a lot of memory for the `indexIn` slices. If `indexIn` is of type `MemorySegmentIndexInput` the overhead of keeping loads of slices around just for cloning is far higher than the extra 12b per reader this adds (the slice description alone often costs a lot). In a number of Elasticsearch example uses with high segment counts I investigated, this change would save up to O(GB) of heap. --- .../java/org/apache/lucene/util/bkd/BKDReader.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index d2d326b3a15..b2b109769f5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -43,7 +43,9 @@ public class BKDReader extends PointValues { final int version; final long minLeafBlockFP; - final IndexInput packedIndex; + private final long indexStartPointer; + private final int numIndexBytes; + private final IndexInput indexIn; // if true, the tree is a legacy balanced tree private final boolean isTreeBalanced; @@ -95,8 +97,7 @@ public class BKDReader extends PointValues { pointCount = metaIn.readVLong(); docCount = metaIn.readVInt(); - int numIndexBytes = metaIn.readVInt(); - long indexStartPointer; + numIndexBytes = metaIn.readVInt(); if (version >= BKDWriter.VERSION_META_FILE) { minLeafBlockFP = metaIn.readLong(); indexStartPointer = metaIn.readLong(); @@ -105,7 +106,7 @@ public class BKDReader extends PointValues { minLeafBlockFP = indexIn.readVLong(); indexIn.seek(indexStartPointer); } - this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes); + this.indexIn = indexIn; this.in = dataIn; // for only one leaf, balanced and unbalanced trees can be handled the same way // we set it to unbalanced. @@ -158,7 +159,7 @@ public class BKDReader extends PointValues { @Override public PointTree getPointTree() throws IOException { return new BKDPointTree( - packedIndex.clone(), + indexIn.slice("packedIndex", indexStartPointer, numIndexBytes), this.in.clone(), config, numLeaves, From a5b4b8c8b0ddda28f5757e950c8d2a0da57d3bfa Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Fri, 7 Jun 2024 08:47:16 -0400 Subject: [PATCH 03/29] Document how to make tests run faster in IntelliJ (#13466) also make links to CONTRIBUTING.md more prominent, and demote link to dev-docs --- CONTRIBUTING.md | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d568b18def4..ac755475ee2 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -58,7 +58,7 @@ In case your contribution fixes a bug, please create a new test case that fails ### IDE support -- *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box. +- *IntelliJ* - IntelliJ idea can import and build gradle-based projects out of the box. However please note that it will default to running tests by calling the gradle wrapper, and while this works, it is for some reason quite slow. Instead we recommend configuring IntelliJ to use its own built-in test runner. You can modify this config (in 2024 version) by navigating to File/Settings/Build Execution & Deployment/Build Tools/Gradle and selecting "Build and Run using: IntelliJ IDEA" and "Run Tests using: IntelliJ IDEA". - *Eclipse* - Basic support ([help/IDEs.txt](https://github.com/apache/lucene/blob/main/help/IDEs.txt#L7)). - *Netbeans* - Not tested. diff --git a/README.md b/README.md index 7a167e7455d..fe523af81b2 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,8 @@ comprehensive documentation, visit: - Latest Releases: - Nightly: +- New contributors should start by reading [Contributing Guide](./CONTRIBUTING.md) - Build System Documentation: [help/](./help/) -- Developer Documentation: [dev-docs/](./dev-docs/) - Migration Guide: [lucene/MIGRATE.md](./lucene/MIGRATE.md) ## Building @@ -45,8 +45,6 @@ comprehensive documentation, visit: We'll assume that you know how to get and set up the JDK - if you don't, then we suggest starting at https://jdk.java.net/ and learning more about Java, before returning to this README. -See [Contributing Guide](./CONTRIBUTING.md) for details. - ## Contributing Bug fixes, improvements and new features are always welcome! @@ -54,6 +52,8 @@ Please review the [Contributing to Lucene Guide](./CONTRIBUTING.md) for information on contributing. +- Additional Developer Documentation: [dev-docs/](./dev-docs/) + ## Discussion and Support - [Users Mailing List](https://lucene.apache.org/core/discussion.html#java-user-list-java-userluceneapacheorg) From 2d62faa4bbea5bfc8496013e8389f070ae65778b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Fri, 7 Jun 2024 09:24:20 -0400 Subject: [PATCH 04/29] Add int8_hnsw backcompat index creawtion to dev tools scripts (#13465) --- dev-tools/scripts/addBackcompatIndexes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev-tools/scripts/addBackcompatIndexes.py b/dev-tools/scripts/addBackcompatIndexes.py index 7faacb8b8e3..80272ec0f0c 100755 --- a/dev-tools/scripts/addBackcompatIndexes.py +++ b/dev-tools/scripts/addBackcompatIndexes.py @@ -40,6 +40,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp 'cfs': 'index', 'nocfs': 'index', 'sorted': 'sorted', + 'int8_hnsw': 'int8_hnsw', 'moreterms': 'moreterms', 'dvupdates': 'dvupdates', 'emptyIndex': 'empty' @@ -60,6 +61,7 @@ def create_and_add_index(source, indextype, index_version, current_version, temp 'cfs': 'testCreateCFS', 'nocfs': 'testCreateNoCFS', 'sorted': 'testCreateSortedIndex', + 'int8_hnsw': 'testCreateInt8HNSWIndices', 'moreterms': 'testCreateMoreTermsIndex', 'dvupdates': 'testCreateIndexWithDocValuesUpdates', 'emptyIndex': 'testCreateEmptyIndex' @@ -204,6 +206,7 @@ def main(): current_version = scriptutil.Version.parse(scriptutil.find_current_version()) create_and_add_index(source, 'cfs', c.version, current_version, c.temp_dir) create_and_add_index(source, 'nocfs', c.version, current_version, c.temp_dir) + create_and_add_index(source, 'int8_hnsw', c.version, current_version, c.temp_dir) should_make_sorted = current_version.is_back_compat_with(c.version) \ and (c.version.major > 6 or (c.version.major == 6 and c.version.minor >= 2)) if should_make_sorted: From 262341b75399292814a161024c7f0287568cd84d Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Fri, 7 Jun 2024 08:29:22 -0400 Subject: [PATCH 05/29] on README.md, make links to CONTRIBUTING.md more prominent, and demote link to dev-docs --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fe523af81b2..f5e9ae7adb9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -