mirror of https://github.com/apache/lucene.git
Report the time it took for building the FST in Test2BFST (#12847)
* Report the time it took for building the FST * Update CHANGES * Change ramBytesUsed to numBytes * Report the verification time * Rename to fstSizeInBytes
This commit is contained in:
parent
8703b541a5
commit
bbf56f9419
|
@ -89,6 +89,8 @@ Improvements
|
|||
|
||||
* GITHUB#12447: Hunspell: speed up the dictionary enumeration on suggestion (Peter Gromov)
|
||||
|
||||
* GITHUB#12847: Test2BFST now reports the time it took to build the FST and the real FST size (Anh Dung Bui)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
|
||||
|
@ -178,7 +180,8 @@ API Changes
|
|||
* GITHUB#12799: Make TaskExecutor constructor public and use TaskExecutor for concurrent
|
||||
HNSW graph build. (Shubham Chaudhary)
|
||||
|
||||
*
|
||||
* GITHUB#12758, GITHUB#12803: Remove FST constructor with DataInput for metadata. Please
|
||||
use the constructor with FSTMetadata instead. (Anh Dung Bui)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
@ -248,6 +251,8 @@ Improvements
|
|||
minimal FST. Inspired by this Rust FST implemention:
|
||||
https://blog.burntsushi.net/transducers (Mike McCandless)
|
||||
|
||||
* GITHUB#12738: NodeHash now stores the FST nodes data instead of just node addresses (Anh Dung Bui)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
* GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
|
||||
|
|
|
@ -867,6 +867,10 @@ public class FSTCompiler<T> {
|
|||
return fst.ramBytesUsed();
|
||||
}
|
||||
|
||||
public long fstSizeInBytes() {
|
||||
return bytes.getPosition();
|
||||
}
|
||||
|
||||
static final class CompiledNode implements Node {
|
||||
long node;
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
Random r = new Random(seed);
|
||||
int[] ints2 = new int[200];
|
||||
IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
|
||||
long startTime = System.nanoTime();
|
||||
while (true) {
|
||||
// System.out.println("add: " + input + " -> " + output);
|
||||
for (int i = 10; i < ints2.length; i++) {
|
||||
|
@ -78,9 +79,13 @@ public class Test2BFST extends LuceneTestCase {
|
|||
count
|
||||
+ ": "
|
||||
+ fstCompiler.fstRamBytesUsed()
|
||||
+ " bytes; "
|
||||
+ " RAM bytes used; "
|
||||
+ fstCompiler.fstSizeInBytes()
|
||||
+ " FST bytes; "
|
||||
+ fstCompiler.getNodeCount()
|
||||
+ " nodes");
|
||||
+ " nodes; took "
|
||||
+ (long) ((System.nanoTime() - startTime) / 1e9)
|
||||
+ " seconds");
|
||||
}
|
||||
if (fstCompiler.getNodeCount() > Integer.MAX_VALUE + 100L * 1024 * 1024) {
|
||||
break;
|
||||
|
@ -93,7 +98,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
for (int verify = 0; verify < 2; verify++) {
|
||||
System.out.println(
|
||||
"\nTEST: now verify [fst size="
|
||||
+ fst.ramBytesUsed()
|
||||
+ fst.numBytes()
|
||||
+ "; nodeCount="
|
||||
+ fstCompiler.getNodeCount()
|
||||
+ "; arcCount="
|
||||
|
@ -103,9 +108,11 @@ public class Test2BFST extends LuceneTestCase {
|
|||
Arrays.fill(ints2, 0);
|
||||
r = new Random(seed);
|
||||
|
||||
startTime = System.nanoTime();
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
System.out.println(
|
||||
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
|
||||
}
|
||||
for (int j = 10; j < ints2.length; j++) {
|
||||
ints2[j] = r.nextInt(256);
|
||||
|
@ -168,7 +175,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
fstCompiler.add(input, BytesRef.deepCopyOf(output));
|
||||
count++;
|
||||
if (count % 10000 == 0) {
|
||||
long size = fstCompiler.fstRamBytesUsed();
|
||||
long size = fstCompiler.fstSizeInBytes();
|
||||
if (count % 1000000 == 0) {
|
||||
System.out.println(count + "...: " + size + " bytes");
|
||||
}
|
||||
|
@ -184,7 +191,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
|
||||
System.out.println(
|
||||
"\nTEST: now verify [fst size="
|
||||
+ fst.ramBytesUsed()
|
||||
+ fst.numBytes()
|
||||
+ "; nodeCount="
|
||||
+ fstCompiler.getNodeCount()
|
||||
+ "; arcCount="
|
||||
|
@ -194,9 +201,12 @@ public class Test2BFST extends LuceneTestCase {
|
|||
r = new Random(seed);
|
||||
Arrays.fill(ints, 0);
|
||||
|
||||
long startTime = System.nanoTime();
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
System.out.println(
|
||||
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
|
||||
}
|
||||
r.nextBytes(outputBytes);
|
||||
assertEquals(output, Util.get(fst, input));
|
||||
|
@ -255,7 +265,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
output += 1 + r.nextInt(10);
|
||||
count++;
|
||||
if (count % 10000 == 0) {
|
||||
long size = fstCompiler.fstRamBytesUsed();
|
||||
long size = fstCompiler.fstSizeInBytes();
|
||||
if (count % 1000000 == 0) {
|
||||
System.out.println(count + "...: " + size + " bytes");
|
||||
}
|
||||
|
@ -272,7 +282,7 @@ public class Test2BFST extends LuceneTestCase {
|
|||
|
||||
System.out.println(
|
||||
"\nTEST: now verify [fst size="
|
||||
+ fst.ramBytesUsed()
|
||||
+ fst.numBytes()
|
||||
+ "; nodeCount="
|
||||
+ fstCompiler.getNodeCount()
|
||||
+ "; arcCount="
|
||||
|
@ -283,9 +293,11 @@ public class Test2BFST extends LuceneTestCase {
|
|||
|
||||
output = 1;
|
||||
r = new Random(seed);
|
||||
long startTime = System.nanoTime();
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (i % 1000000 == 0) {
|
||||
System.out.println(i + "...: ");
|
||||
System.out.println(
|
||||
i + "...: took " + (long) ((System.nanoTime() - startTime) / 1e9) + " seconds");
|
||||
}
|
||||
|
||||
assertEquals(output, Util.get(fst, input).longValue());
|
||||
|
|
Loading…
Reference in New Issue