java-tutorials/libraries/src/test/java/com/baeldung/lsh/LocalSensitiveHashingUnitTest.java
Eugen Paraschiv d2a2a65566 cleanup work
2018-03-04 17:39:09 +02:00

47 lines
1.7 KiB
Java

package com.baeldung.lsh;
import info.debatty.java.lsh.LSHMinHash;
import org.junit.Ignore;
import org.junit.Test;
import java.util.Arrays;
import static org.assertj.core.api.Assertions.assertThat;
public class LocalSensitiveHashingUnitTest {
@Ignore("for simplicity of the example number of input vectors is very low, that's why LSH may yield non deterministic results")
@Test()
public void givenNVectors_whenPerformLSH_thenShouldCalculateSameHashForSimilarVectors() {
// given
boolean[] vector1 = new boolean[] { true, true, true, true, true };
boolean[] vector2 = new boolean[] { false, false, false, true, false };
boolean[] vector3 = new boolean[] { false, false, true, true, false };
int sizeOfVectors = 5;
int numberOfBuckets = 10;
int stages = 4;
LSHMinHash lsh = new LSHMinHash(stages, numberOfBuckets, sizeOfVectors);
// when
int[] firstHash = lsh.hash(vector1);
int[] secondHash = lsh.hash(vector2);
int[] thirdHash = lsh.hash(vector3);
System.out.println(Arrays.toString(firstHash));
System.out.println(Arrays.toString(secondHash));
System.out.println(Arrays.toString(thirdHash));
// then
int lastIndexOfResult = stages - 1;
assertThat(firstHash[lastIndexOfResult]).isNotEqualTo(secondHash[lastIndexOfResult]);
assertThat(firstHash[lastIndexOfResult]).isNotEqualTo(thirdHash[lastIndexOfResult]);
assertThat(isCloseOrEqual(secondHash[lastIndexOfResult], thirdHash[lastIndexOfResult], numberOfBuckets)).isTrue();
}
private boolean isCloseOrEqual(int secondHash, int thirdHash, int numberOfBuckets) {
return Math.abs(secondHash - thirdHash) < numberOfBuckets / 2;
}
}