3335 allow alternative character encoding length in searchparam (#3336)

* added fixme

* 2714 added support for specifying max code lengths for supported phonetic encoders

* 3335git status

* 3335 updated changelog

* 3335 review fixes

* review fixes

* fixing test

Co-authored-by: Ken Stevens <khstevens@gmail.com>
Co-authored-by: leif stawnyczy <leifstawnyczy@leifs-MacBook-Pro.local>
This commit is contained in:
TipzCM 2022-01-27 12:29:07 -05:00 committed by GitHub
parent 6506527dd6
commit 41193c60db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 428 additions and 10 deletions

View File

@ -20,6 +20,7 @@ package ca.uhn.fhir.context.phonetic;
* #L%
*/
import ca.uhn.fhir.util.PhoneticEncoderUtil;
import org.apache.commons.codec.language.Caverphone1;
import org.apache.commons.codec.language.Caverphone2;
import org.apache.commons.codec.language.ColognePhonetic;
@ -38,16 +39,30 @@ public enum PhoneticEncoderEnum {
MATCH_RATING_APPROACH(new ApacheEncoder("MATCH_RATING_APPROACH", new MatchRatingApproachEncoder())),
METAPHONE(new ApacheEncoder("METAPHONE", new Metaphone())),
NYSIIS(new ApacheEncoder("NYSIIS", new Nysiis())),
NYSIIS_LONG(new ApacheEncoder("NYSIIS_LONG", new Nysiis(false))),
REFINED_SOUNDEX(new ApacheEncoder("REFINED_SOUNDEX", new RefinedSoundex())),
SOUNDEX(new ApacheEncoder("SOUNDEX", new Soundex())),
NUMERIC(new NumericEncoder());
private final IPhoneticEncoder myPhoneticEncoder;
/**
* Do not construct this enum via constructor.
*
* Use {@link PhoneticEncoderUtil} instead.
*/
@Deprecated
PhoneticEncoderEnum(IPhoneticEncoder thePhoneticEncoder) {
myPhoneticEncoder = thePhoneticEncoder;
}
/**
* Use PhoneticEncoderWrapper.getEncoderWrapper(PhoneticEncoderEnum.name())
*
* This is a deprecated method of getting the encoder (as they
* are static across the server and non-configurable).
*/
@Deprecated
public IPhoneticEncoder getPhoneticEncoder() {
return myPhoneticEncoder;
}

View File

@ -0,0 +1,171 @@
package ca.uhn.fhir.util;
import ca.uhn.fhir.context.phonetic.ApacheEncoder;
import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
import ca.uhn.fhir.context.phonetic.NumericEncoder;
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
import org.apache.commons.codec.language.Caverphone1;
import org.apache.commons.codec.language.Caverphone2;
import org.apache.commons.codec.language.ColognePhonetic;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.commons.codec.language.MatchRatingApproachEncoder;
import org.apache.commons.codec.language.Metaphone;
import org.apache.commons.codec.language.Nysiis;
import org.apache.commons.codec.language.RefinedSoundex;
import org.apache.commons.codec.language.Soundex;
import org.apache.commons.lang3.EnumUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class PhoneticEncoderUtil {
// embedded class only for parameter returns
private static class ParsedValues {
private final Integer maxCodeLength;
private final String encoderString;
public ParsedValues(String theString, Integer theMaxCode) {
maxCodeLength = theMaxCode;
encoderString = theString;
}
public Integer getMaxCodeLength() {
return maxCodeLength;
}
public String getEncoderString() {
return encoderString;
}
}
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderUtil.class);
private PhoneticEncoderUtil() {
}
/**
* Creates the phonetic encoder wrapper from
* an input string.
*
* <p>
* String must be in the format of...
* </p>
*
* PhoneticEncoderEnum(MAX_LENGTH)
*
* @return The IPhoneticEncoder
*/
public static IPhoneticEncoder getEncoder(String theString) {
ParsedValues values = parseIntValue(theString);
String encoderType = values.getEncoderString();
Integer encoderMaxString = values.getMaxCodeLength();
IPhoneticEncoder encoder = getEncoderFromString(encoderType, encoderMaxString);
if (encoder != null) {
return encoder;
}
else {
ourLog.warn("Invalid phonetic param string " + theString);
return null;
}
}
private static ParsedValues parseIntValue(String theString) {
String encoderType = null;
Integer encoderMaxString = null;
int braceIndex = theString.indexOf("(");
if (braceIndex != -1) {
int len = theString.length();
if (theString.charAt(len - 1) == ')') {
encoderType = theString.substring(0, braceIndex);
String num = theString.substring(braceIndex + 1, len - 1);
try {
encoderMaxString = Integer.parseInt(num);
} catch (NumberFormatException ex) {
// invalid number parse error
}
if (encoderMaxString == null
|| encoderMaxString < 0) {
// parse error
ourLog.error("Invalid encoder max character length: " + num);
encoderType = null;
}
}
// else - parse error
}
else {
encoderType = theString;
}
return new ParsedValues(encoderType, encoderMaxString);
}
private static IPhoneticEncoder getEncoderFromString(String theName, Integer theMax) {
IPhoneticEncoder encoder = null;
PhoneticEncoderEnum enumVal = EnumUtils.getEnum(PhoneticEncoderEnum.class, theName);
if (enumVal != null) {
switch (enumVal) {
case CAVERPHONE1:
Caverphone1 caverphone1 = new Caverphone1();
encoder = new ApacheEncoder(theName, caverphone1);
break;
case CAVERPHONE2:
Caverphone2 caverphone2 = new Caverphone2();
encoder = new ApacheEncoder(theName, caverphone2);
break;
case COLOGNE:
ColognePhonetic colognePhonetic = new ColognePhonetic();
encoder = new ApacheEncoder(theName, colognePhonetic);
break;
case DOUBLE_METAPHONE:
DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
if (theMax != null) {
doubleMetaphone.setMaxCodeLen(theMax);
}
encoder = new ApacheEncoder(theName, doubleMetaphone);
break;
case MATCH_RATING_APPROACH:
MatchRatingApproachEncoder matchRatingApproachEncoder = new MatchRatingApproachEncoder();
encoder = new ApacheEncoder(theName, matchRatingApproachEncoder);
break;
case METAPHONE:
Metaphone metaphone = new Metaphone();
if (theMax != null) {
metaphone.setMaxCodeLen(theMax);
}
encoder = new ApacheEncoder(theName, metaphone);
break;
case NYSIIS:
Nysiis nysiis = new Nysiis();
encoder = new ApacheEncoder(theName, nysiis);
break;
case NYSIIS_LONG:
Nysiis nysiis1_long = new Nysiis(false);
encoder = new ApacheEncoder(theName, nysiis1_long);
break;
case REFINED_SOUNDEX:
RefinedSoundex refinedSoundex = new RefinedSoundex();
encoder = new ApacheEncoder(theName, refinedSoundex);
break;
case SOUNDEX:
Soundex soundex = new Soundex();
// soundex has deprecated setting the max size
encoder = new ApacheEncoder(theName, soundex);
break;
case NUMERIC:
encoder = new NumericEncoder();
break;
default:
// we don't ever expect to be here
// this log message is purely for devs who update this
// enum, but not this method
ourLog.error("Unhandled PhoneticParamEnum value " + enumVal.name());
break;
}
}
return encoder;
}
}

View File

@ -1,5 +1,7 @@
package ca.uhn.fhir.context.phonetic;
import ca.uhn.fhir.util.PhoneticEncoderUtil;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
import org.slf4j.Logger;
@ -10,7 +12,7 @@ import static org.hamcrest.Matchers.endsWith;
import static org.hamcrest.Matchers.startsWith;
import static org.junit.jupiter.api.Assertions.assertEquals;
class PhoneticEncoderTest {
public class PhoneticEncoderTest {
private static final Logger ourLog = LoggerFactory.getLogger(PhoneticEncoderTest.class);
private static final String NUMBER = "123";
@ -21,7 +23,9 @@ class PhoneticEncoderTest {
@ParameterizedTest
@EnumSource(PhoneticEncoderEnum.class)
public void testEncodeAddress(PhoneticEncoderEnum thePhoneticEncoderEnum) {
String encoded = thePhoneticEncoderEnum.getPhoneticEncoder().encode(ADDRESS_LINE);
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(thePhoneticEncoderEnum.name());
Assertions.assertNotNull(encoder);
String encoded = encoder.encode(ADDRESS_LINE);
ourLog.info("{}: {}", thePhoneticEncoderEnum.name(), encoded);
if (thePhoneticEncoderEnum == PhoneticEncoderEnum.NUMERIC) {
assertEquals(NUMBER + SUITE, encoded);

View File

@ -0,0 +1,150 @@
package ca.uhn.fhir.util;
import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import ch.qos.logback.classic.spi.ILoggingEvent;
import ch.qos.logback.core.read.ListAppender;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
import org.mockito.Mockito;
import org.slf4j.LoggerFactory;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
public class PhoneticEncoderUtilTests {
private final Logger myLogger = (Logger) LoggerFactory.getLogger(PhoneticEncoderUtil.class);
private ListAppender<ILoggingEvent> myListAppender;
@BeforeEach
public void init() {
myListAppender = Mockito.mock(ListAppender.class);
myLogger.addAppender(myListAppender);
}
@Test
public void getEncoder_withNumberProvided_parsesOutCorrectValue() {
int num = 5;
PhoneticEncoderEnum enumVal = PhoneticEncoderEnum.DOUBLE_METAPHONE;
String enumString = enumVal.name() + "(" + num + ")";
// test
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(enumString);
assertNotNull(encoder);
assertEquals(enumVal.name(), encoder.name());
}
@Test
public void getEncoder_withNoNumber_parsesOutCorrectValue() {
// test
for (PhoneticEncoderEnum enumVal : PhoneticEncoderEnum.values()) {
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(enumVal.name());
assertNotNull(encoder);
assertEquals(enumVal.name(), encoder.name());
}
}
@Test
public void getEncoder_withInvalidNumber_returnsNullAndLogs() {
// setup
myLogger.setLevel(Level.ERROR);
String num = "A";
// test
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(
PhoneticEncoderEnum.METAPHONE.name() + "(" + num + ")"
);
// verify
assertNull(encoder);
ArgumentCaptor<ILoggingEvent> loggingCaptor = ArgumentCaptor.forClass(ILoggingEvent.class);
Mockito.verify(myListAppender).doAppend(loggingCaptor.capture());
assertEquals(1, loggingCaptor.getAllValues().size());
ILoggingEvent event = loggingCaptor.getValue();
assertEquals("Invalid encoder max character length: " + num,
event.getMessage());
}
@Test
public void getEncoder_unknownValue_returnsNull() {
// setup
myLogger.setLevel(Level.WARN);
String theString = "Not a valid encoder value";
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(theString);
// verify
assertNull(encoder);
ArgumentCaptor<ILoggingEvent> captor = ArgumentCaptor.forClass(ILoggingEvent.class);
Mockito.verify(myListAppender)
.doAppend(captor.capture());
assertEquals(1, captor.getAllValues().size());
ILoggingEvent event = captor.getValue();
assertEquals("Invalid phonetic param string " + theString,
event.getMessage());
}
@Test
public void getEncoder_emptyNumberValue_returnsNull() {
myLogger.setLevel(Level.ERROR);
// test
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(PhoneticEncoderEnum.METAPHONE.name() + "()");
verifyOutcome_getEncoder_NumberParseFailure(encoder, "");
}
@Test
public void getEncoder_invalidNumberValue_returnsNull() {
myLogger.setLevel(Level.ERROR);
// test
String num = "-1";
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(PhoneticEncoderEnum.METAPHONE.name() + "(" + num + ")");
verifyOutcome_getEncoder_NumberParseFailure(encoder, num);
}
@Test
public void getEncoder_incorrectBrackets_returnsNull() {
myLogger.setLevel(Level.ERROR);
// test
String num = "(";
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(PhoneticEncoderEnum.METAPHONE.name() + "(" + num + ")");
verifyOutcome_getEncoder_NumberParseFailure(encoder, num);
}
@Test
public void getEncoder_maxInt_returnsWrapper() {
// test
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(
PhoneticEncoderEnum.METAPHONE.name() + "(" + Integer.MAX_VALUE + ")"
);
assertNotNull(encoder);
assertEquals(PhoneticEncoderEnum.METAPHONE.name(), encoder.name());
}
/**
* Verifies the outcome encoder when an invalid string was passed in.
*/
private void verifyOutcome_getEncoder_NumberParseFailure(IPhoneticEncoder theEncoder, String theNumberParam) {
assertNull(theEncoder);
ArgumentCaptor<ILoggingEvent> loggingCaptor = ArgumentCaptor.forClass(ILoggingEvent.class);
Mockito.verify(myListAppender).doAppend(loggingCaptor.capture());
assertEquals(1, loggingCaptor.getAllValues().size());
ILoggingEvent event = loggingCaptor.getValue();
assertEquals("Invalid encoder max character length: " + theNumberParam,
event.getMessage());
}
}

View File

@ -0,0 +1,14 @@
---
type: add
issue: 3335
jira: 3603
title: "Added ability to specify max code lengths for supported
Phonetic Encoders (Metaphone, Double_Metaphone).
To specify max code length, append the code length to the searchparameter-phonetic-encoder
extension in brackets after the encoder type.
eg:
{
\"url\": \"http://hapifhir.io/fhir/StructureDefinition/searchparameter-phonetic-encoder\",
\"valueString\": \"METAPHONE(5)\"
}
"

View File

@ -1,6 +1,7 @@
package ca.uhn.fhir.jpa.dao.r4;
import ca.uhn.fhir.context.RuntimeSearchParam;
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
import ca.uhn.fhir.interceptor.api.HookParams;
import ca.uhn.fhir.interceptor.api.IAnonymousInterceptor;
import ca.uhn.fhir.interceptor.api.Pointcut;
@ -10,6 +11,7 @@ import ca.uhn.fhir.jpa.model.entity.NormalizedQuantitySearchLevel;
import ca.uhn.fhir.jpa.model.entity.ResourceIndexedSearchParamToken;
import ca.uhn.fhir.jpa.model.search.StorageProcessingMessage;
import ca.uhn.fhir.jpa.searchparam.SearchParameterMap;
import ca.uhn.fhir.model.api.IQueryParameterType;
import ca.uhn.fhir.model.api.Include;
import ca.uhn.fhir.rest.api.server.IBundleProvider;
import ca.uhn.fhir.rest.param.DateParam;
@ -20,6 +22,7 @@ import ca.uhn.fhir.rest.param.StringParam;
import ca.uhn.fhir.rest.param.TokenParam;
import ca.uhn.fhir.rest.server.exceptions.InvalidRequestException;
import ca.uhn.fhir.rest.server.exceptions.UnprocessableEntityException;
import ca.uhn.fhir.util.HapiExtensions;
import org.hamcrest.Matchers;
import org.hl7.fhir.instance.model.api.IIdType;
import org.hl7.fhir.r4.model.Appointment;
@ -53,6 +56,7 @@ import org.hl7.fhir.r4.model.ServiceRequest;
import org.hl7.fhir.r4.model.Specimen;
import org.hl7.fhir.r4.model.StringType;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
@ -77,6 +81,7 @@ import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.startsWith;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.fail;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.mock;
@ -197,6 +202,60 @@ public class FhirResourceDaoR4SearchCustomSearchParamTest extends BaseJpaR4Test
assertEquals(0, search.size());
}
@Test
public void testCreatePhoneticSearchParameterWithOptionalCharacterLength() {
String testString = "Richard Smith";
int modifiedLength = 7;
// create 2 different search parameters
// same encoder, but different lengths
SearchParameter searchParam = constructSearchParameter();
searchParam.setCode("fuzzydefault");
searchParam.addExtension()
.setUrl(HapiExtensions.EXT_SEARCHPARAM_PHONETIC_ENCODER)
.setValue(new StringType(PhoneticEncoderEnum.METAPHONE.name()));
mySearchParameterDao.create(searchParam, mySrd);
mySearchParamRegistry.forceRefresh();
SearchParameter searchParamModified = constructSearchParameter();
searchParamModified.setCode("fuzzymodified");
searchParamModified.addExtension()
.setUrl(HapiExtensions.EXT_SEARCHPARAM_PHONETIC_ENCODER)
.setValue(new StringType(PhoneticEncoderEnum.METAPHONE.name() + "(" + modifiedLength + ")"));
mySearchParameterDao.create(searchParamModified, mySrd);
mySearchParamRegistry.forceRefresh();
// check the 2 parameters are different
// when fetched from the system
RuntimeSearchParam paramdefault = mySearchParamRegistry.getActiveSearchParam("Patient",
"fuzzydefault");
RuntimeSearchParam parammodified = mySearchParamRegistry.getActiveSearchParam("Patient",
"fuzzymodified");
// verify the encoders are different!
assertNotEquals(paramdefault, parammodified);
String encodedDefault = paramdefault.encode(testString);
String encodedMod = parammodified.encode(testString);
assertEquals(modifiedLength, encodedMod.length());
assertNotEquals(encodedDefault.length(), encodedMod.length());
}
/**
* Constructs a search parameter for patients on name.
* No code or extentions are set
* (so the calling test should set these).
*/
private SearchParameter constructSearchParameter() {
SearchParameter sp = new SearchParameter();
sp.addBase("Patient");
sp.setStatus(Enumerations.PublicationStatus.ACTIVE);
sp.setType(Enumerations.SearchParamType.STRING);
sp.setExpression("Patient.name.given.first() + ' ' + Patient.name.family");
return sp;
}
/**
* Draft search parameters should be ok even if they aren't completely valid
*/
@ -299,7 +358,6 @@ public class FhirResourceDaoR4SearchCustomSearchParamTest extends BaseJpaR4Test
}
}
@Test
@Disabled
public void testCreateInvalidParamInvalidResourceName() {

View File

@ -23,14 +23,14 @@ package ca.uhn.fhir.jpa.searchparam.registry;
import ca.uhn.fhir.context.ComboSearchParamType;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.context.RuntimeSearchParam;
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
import ca.uhn.fhir.model.api.ExtensionDt;
import ca.uhn.fhir.rest.api.RestSearchParameterTypeEnum;
import ca.uhn.fhir.rest.server.exceptions.InternalErrorException;
import ca.uhn.fhir.util.DatatypeUtil;
import ca.uhn.fhir.util.FhirTerser;
import ca.uhn.fhir.util.HapiExtensions;
import org.apache.commons.lang3.EnumUtils;
import ca.uhn.fhir.util.PhoneticEncoderUtil;
import org.hl7.fhir.dstu3.model.Extension;
import org.hl7.fhir.dstu3.model.SearchParameter;
import org.hl7.fhir.instance.model.api.IBase;
@ -374,10 +374,15 @@ public class SearchParameterCanonicalizer {
private void setEncoder(RuntimeSearchParam theRuntimeSearchParam, IBaseDatatype theValue) {
if (theValue instanceof IPrimitiveType) {
String stringValue = ((IPrimitiveType<?>) theValue).getValueAsString();
PhoneticEncoderEnum encoderEnum = EnumUtils.getEnum(PhoneticEncoderEnum.class, stringValue);
if (encoderEnum != null) {
theRuntimeSearchParam.setPhoneticEncoder(encoderEnum.getPhoneticEncoder());
} else {
// every string creates a completely new encoder wrapper.
// this is fine, because the runtime search parameters are constructed at startup
// for every saved value
IPhoneticEncoder encoder = PhoneticEncoderUtil.getEncoder(stringValue);
if (encoder != null) {
theRuntimeSearchParam.setPhoneticEncoder(encoder);
}
else {
ourLog.error("Invalid PhoneticEncoderEnum value '" + stringValue + "'");
}
}

View File

@ -22,6 +22,7 @@ package ca.uhn.fhir.mdm.rules.matcher;
import ca.uhn.fhir.context.phonetic.IPhoneticEncoder;
import ca.uhn.fhir.context.phonetic.PhoneticEncoderEnum;
import ca.uhn.fhir.util.PhoneticEncoderUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -31,7 +32,7 @@ public class PhoneticEncoderMatcher implements IMdmStringMatcher {
private final IPhoneticEncoder myStringEncoder;
public PhoneticEncoderMatcher(PhoneticEncoderEnum thePhoneticEnum) {
myStringEncoder = thePhoneticEnum.getPhoneticEncoder();
myStringEncoder = PhoneticEncoderUtil.getEncoder(thePhoneticEnum.name());
}
@Override