Report the time it took for building the FST in Test2BFST (#12847)

* Report the time it took for building the FST

* Update CHANGES

* Change ramBytesUsed to numBytes

* Report the verification time

* Rename to fstSizeInBytes
This commit is contained in:
Dzung Bui 2023-11-29 20:17:54 +09:00 committed by GitHub
parent 8703b541a5
commit bbf56f9419
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 11 deletions

View File

@ -89,6 +89,8 @@ Improvements
* GITHUB#12447: Hunspell: speed up the dictionary enumeration on suggestion (Peter Gromov)
* GITHUB#12847: Test2BFST now reports the time it took to build the FST and the real FST size (Anh Dung Bui)
Optimizations
---------------------
@ -178,7 +180,8 @@ API Changes
* GITHUB#12799: Make TaskExecutor constructor public and use TaskExecutor for concurrent
HNSW graph build. (Shubham Chaudhary)
*
* GITHUB#12758, GITHUB#12803: Remove FST constructor with DataInput for metadata. Please
use the constructor with FSTMetadata instead. (Anh Dung Bui)
New Features
---------------------
@ -248,6 +251,8 @@ Improvements
minimal FST. Inspired by this Rust FST implemention:
https://blog.burntsushi.net/transducers (Mike McCandless)
* GITHUB#12738: NodeHash now stores the FST nodes data instead of just node addresses (Anh Dung Bui)
Optimizations
---------------------
* GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)

View File

@ -867,6 +867,10 @@ public class FSTCompiler<T> {
return fst.ramBytesUsed();
}
public long fstSizeInBytes() {
return bytes.getPosition();
}
static final class CompiledNode implements Node {
long node;

View File

@ -66,6 +66,7 @@ public class Test2BFST extends LuceneTestCase {
Random r = new Random(seed);
int[] ints2 = new int[200];
IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
long startTime = System.nanoTime();
while (true) {
// System.out.println("add: " + input + " -> " + output);
for (int i = 10; i < ints2.length; i++) {
@ -78,9 +79,13 @@ public class Test2BFST extends LuceneTestCase {
count
+ ": "
+ fstCompiler.fstRamBytesUsed()
+ " bytes; "
+ " RAM bytes used; "
+ fstCompiler.fstSizeInBytes()
+ " FST bytes; "
+ fstCompiler.getNodeCount()
+ " nodes");
+ " nodes; took "
+ (long) ((System.nanoTime() - startTime) / 1e9)
+ " seconds");
}
if (fstCompiler.getNodeCount() > Integer.MAX_VALUE + 100L * 1024 * 1024) {
break;
@ -93,7 +98,7 @@ public class Test2BFST extends LuceneTestCase {
for (int verify = 0; verify < 2; verify++) {
System.out.println(
"\nTEST: now verify [fst size="
+ fst.ramBytesUsed()
+ fst.numBytes()
+ "; nodeCount="
+ fstCompiler.getNodeCount()
+ "; arcCount="
@ -103,9 +108,11 @@ public class Test2BFST extends LuceneTestCase {
Arrays.fill(ints2, 0);
r = new Random(seed);
startTime = System.nanoTime();
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
System.out.println(
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
}
for (int j = 10; j < ints2.length; j++) {
ints2[j] = r.nextInt(256);
@ -168,7 +175,7 @@ public class Test2BFST extends LuceneTestCase {
fstCompiler.add(input, BytesRef.deepCopyOf(output));
count++;
if (count % 10000 == 0) {
long size = fstCompiler.fstRamBytesUsed();
long size = fstCompiler.fstSizeInBytes();
if (count % 1000000 == 0) {
System.out.println(count + "...: " + size + " bytes");
}
@ -184,7 +191,7 @@ public class Test2BFST extends LuceneTestCase {
System.out.println(
"\nTEST: now verify [fst size="
+ fst.ramBytesUsed()
+ fst.numBytes()
+ "; nodeCount="
+ fstCompiler.getNodeCount()
+ "; arcCount="
@ -194,9 +201,12 @@ public class Test2BFST extends LuceneTestCase {
r = new Random(seed);
Arrays.fill(ints, 0);
long startTime = System.nanoTime();
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
System.out.println(
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
}
r.nextBytes(outputBytes);
assertEquals(output, Util.get(fst, input));
@ -255,7 +265,7 @@ public class Test2BFST extends LuceneTestCase {
output += 1 + r.nextInt(10);
count++;
if (count % 10000 == 0) {
long size = fstCompiler.fstRamBytesUsed();
long size = fstCompiler.fstSizeInBytes();
if (count % 1000000 == 0) {
System.out.println(count + "...: " + size + " bytes");
}
@ -272,7 +282,7 @@ public class Test2BFST extends LuceneTestCase {
System.out.println(
"\nTEST: now verify [fst size="
+ fst.ramBytesUsed()
+ fst.numBytes()
+ "; nodeCount="
+ fstCompiler.getNodeCount()
+ "; arcCount="
@ -283,9 +293,11 @@ public class Test2BFST extends LuceneTestCase {
output = 1;
r = new Random(seed);
long startTime = System.nanoTime();
for (int i = 0; i < count; i++) {
if (i % 1000000 == 0) {
System.out.println(i + "...: ");
System.out.println(
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
}
assertEquals(output, Util.get(fst, input).longValue());