Added rule count for proper scoring implementation

This commit is contained in:
Nick Goupinets 2020-11-02 17:15:39 -05:00
parent 296b193bb5
commit 52e8761701
3 changed files with 35 additions and 16 deletions

View File

@ -26,6 +26,7 @@ import org.apache.commons.lang3.builder.ToStringBuilder;
* This data object captures the final outcome of an EMPI match
*/
public final class EmpiMatchOutcome {
public static final EmpiMatchOutcome POSSIBLE_DUPLICATE = new EmpiMatchOutcome(null, null).setMatchResultEnum(EmpiMatchResultEnum.POSSIBLE_DUPLICATE);
public static final EmpiMatchOutcome NO_MATCH = new EmpiMatchOutcome(null, null).setMatchResultEnum(EmpiMatchResultEnum.NO_MATCH);
public static final EmpiMatchOutcome NEW_PERSON_MATCH = new EmpiMatchOutcome(null, null).setMatchResultEnum(EmpiMatchResultEnum.MATCH).setNewPerson(true);
@ -58,6 +59,11 @@ public final class EmpiMatchOutcome {
*/
private EmpiMatchResultEnum myMatchResultEnum;
/**
* Total number of EMPI rules checked for this outcome
*/
private int myEmpiRuleCount;
public EmpiMatchOutcome(Long theVector, Double theScore) {
vector = theVector;
score = theScore;
@ -99,6 +105,19 @@ public final class EmpiMatchOutcome {
return myEidMatch;
}
/**
* Sets the number of EMPI rules checked for this match outcome
*
* @param theEmpiRuleCount
* Number of EMPI rules that were checked for this match outcome
* @return
* Returns this instance
*/
public EmpiMatchOutcome setEmpiRuleCount(int theEmpiRuleCount) {
myEmpiRuleCount = theEmpiRuleCount;
return this;
}
/** @param theEidMatch the link was established via a shared EID */
public EmpiMatchOutcome setEidMatch(boolean theEidMatch) {
myEidMatch = theEidMatch;
@ -112,11 +131,11 @@ public final class EmpiMatchOutcome {
* Returns the normalized score
*/
public Double getNormalizedScore() {
if (vector == 0) {
if (myEmpiRuleCount == 0) {
return 0.0;
}
double retVal = score / Long.bitCount(vector);
double retVal = score / myEmpiRuleCount;
if (retVal < 0) {
retVal = 0.0;
} else if (retVal > 1.0) {

View File

@ -125,6 +125,9 @@ public class EmpiResourceMatcherSvc {
}
score += matchEvaluation.score;
}
return new EmpiMatchOutcome(vector, score);
EmpiMatchOutcome retVal = new EmpiMatchOutcome(vector, score);
retVal.setEmpiRuleCount(myFieldMatchers.size());
return retVal;
}
}

View File

@ -13,28 +13,25 @@ class EmpiMatchOutcomeTest {
EmpiMatchOutcome outcome = new EmpiMatchOutcome(0l, 0.0);
assertEquals(0.0, outcome.getNormalizedScore());
outcome = new EmpiMatchOutcome(selectBits(10), 10.0);
outcome = new EmpiMatchOutcome(null, 10.0);
outcome.setEmpiRuleCount(10);
assertEquals(1.0, outcome.getNormalizedScore(), DELTA);
outcome = new EmpiMatchOutcome(selectBits(10), -10.0);
outcome = new EmpiMatchOutcome(null, -10.0);
outcome.setEmpiRuleCount(10);
assertEquals(0.0, outcome.getNormalizedScore());
outcome = new EmpiMatchOutcome(selectBits(3), 2.0);
outcome = new EmpiMatchOutcome(null, 2.0);
outcome.setEmpiRuleCount(3);
assertEquals(2.0 / 3.0, outcome.getNormalizedScore(), DELTA);
outcome = new EmpiMatchOutcome(selectBits(8), 4.0);
outcome = new EmpiMatchOutcome(null, 4.0);
outcome.setEmpiRuleCount(8);
assertEquals(4.0 / 8.0, outcome.getNormalizedScore(), DELTA);
outcome = new EmpiMatchOutcome(selectBits(5), 19.0);
outcome = new EmpiMatchOutcome(null, 19.0);
outcome.setEmpiRuleCount(5);
assertEquals(1.0, outcome.getNormalizedScore());
}
private long selectBits(int theN) {
long retVal = 0;
for (int i = 0; i < theN; i++) {
retVal |= (1 << i);
}
return retVal;
}
}