Fix #832 - Correctly normalize text with Korean characters

This commit is contained in:
James Agnew 2018-01-21 14:55:14 +08:00
parent 3644151a69
commit 4dcce4e582
6 changed files with 122 additions and 47 deletions

View File

@ -1,5 +1,11 @@
package ca.uhn.fhir.i18n;
import ca.uhn.fhir.context.ConfigurationException;
import java.text.MessageFormat;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
/*
@ -22,30 +28,18 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank;
* #L%
*/
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import ca.uhn.fhir.context.ConfigurationException;
/**
* This feature is not yet in its final state and should be considered an internal part of HAPI for now - use with caution
*/
public class HapiLocalizer {
private static boolean ourFailOnMissingMessage;
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(HapiLocalizer.class);
@SuppressWarnings("WeakerAccess")
public static final String UNKNOWN_I18N_KEY_MESSAGE = "!MESSAGE!";
private List<ResourceBundle> myBundle = new ArrayList<ResourceBundle>();
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(HapiLocalizer.class);
private static boolean ourFailOnMissingMessage;
private final Map<String, MessageFormat> myKeyToMessageFormat = new ConcurrentHashMap<>();
private List<ResourceBundle> myBundle = new ArrayList<>();
private String[] myBundleNames;
private final Map<String, MessageFormat> myKeyToMessageFormat = new ConcurrentHashMap<String, MessageFormat>();
public HapiLocalizer() {
this(HapiLocalizer.class.getPackage().getName() + ".hapi-messages");
@ -56,7 +50,22 @@ public class HapiLocalizer {
init();
}
private String findFormatString(String theQualifiedKey) {
public Set<String> getAllKeys() {
HashSet<String> retVal = new HashSet<>();
for (ResourceBundle nextBundle : myBundle) {
Enumeration<String> keysEnum = nextBundle.getKeys();
while (keysEnum.hasMoreElements()) {
retVal.add(keysEnum.nextElement());
}
}
return retVal;
}
/**
* @return Returns the raw message format string for the given key, or returns {@link #UNKNOWN_I18N_KEY_MESSAGE} if not found
*/
@SuppressWarnings("WeakerAccess")
public String getFormatString(String theQualifiedKey) {
String formatString = null;
for (ResourceBundle nextBundle : myBundle) {
if (nextBundle.containsKey(theQualifiedKey)) {
@ -77,36 +86,24 @@ public class HapiLocalizer {
return formatString;
}
public Set<String> getAllKeys(){
HashSet<String> retVal = new HashSet<String>();
for (ResourceBundle nextBundle : myBundle) {
Enumeration<String> keysEnum = nextBundle.getKeys();
while (keysEnum.hasMoreElements()) {
retVal.add(keysEnum.nextElement());
}
}
return retVal;
}
public String getMessage(Class<?> theType, String theKey, Object... theParameters) {
return getMessage(theType.getName() + '.' + theKey, theParameters);
return getMessage(toKey(theType, theKey), theParameters);
}
public String getMessage(String theQualifiedKey, Object... theParameters) {
if (theParameters != null && theParameters.length > 0) {
MessageFormat format = myKeyToMessageFormat.get(theQualifiedKey);
if (format != null) {
return format.format(theParameters).toString();
return format.format(theParameters);
}
String formatString = findFormatString(theQualifiedKey);
String formatString = getFormatString(theQualifiedKey);
format = new MessageFormat(formatString.trim());
myKeyToMessageFormat.put(theQualifiedKey, format);
return format.format(theParameters).toString();
return format.format(theParameters);
}
String retVal = findFormatString(theQualifiedKey);
return retVal;
return getFormatString(theQualifiedKey);
}
protected void init() {
@ -124,4 +121,8 @@ public class HapiLocalizer {
HapiLocalizer.ourFailOnMissingMessage = ourFailOnMissingMessage;
}
public static String toKey(Class<?> theType, String theKey) {
return theType.getName() + '.' + theKey;
}
}

View File

@ -73,7 +73,10 @@ import javax.persistence.criteria.Predicate;
import javax.persistence.criteria.Root;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
import java.io.CharArrayWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.CharBuffer;
import java.text.Normalizer;
import java.util.*;
import java.util.Map.Entry;
@ -1999,7 +2002,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
}
public static String normalizeString(String theString) {
char[] out = new char[theString.length()];
CharArrayWriter outBuffer = new CharArrayWriter(theString.length());
/*
* The following block of code is used to strip out diacritical marks from latin script
@ -2009,18 +2012,19 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
* behind stripping 0300-036F
*
* See #454 for an issue where we were completely stripping non latin characters
* See #832 for an issue where we normalize korean characters, which are decomposed
*/
String string = Normalizer.normalize(theString, Normalizer.Form.NFD);
int j = 0;
for (int i = 0, n = string.length(); i < n; ++i) {
char c = string.charAt(i);
if (c >= '\u0300' && c <= '\u036F') {
continue;
} else {
out[j++] = c;
outBuffer.append(c);
}
}
return new String(out).toUpperCase();
return new String(outBuffer.toCharArray()).toUpperCase();
}
private static String parseNarrativeTextIntoWords(IBaseResource theResource) {

View File

@ -1,6 +1,8 @@
package ca.uhn.fhir.jpa.dao.r4;
import ca.uhn.fhir.jpa.dao.DaoConfig;
import ca.uhn.fhir.jpa.dao.SearchParameterMap;
import ca.uhn.fhir.rest.param.StringParam;
import ca.uhn.fhir.util.TestUtil;
import org.hl7.fhir.instance.model.api.IIdType;
import org.hl7.fhir.r4.model.Bundle;
@ -13,6 +15,10 @@ import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.matchesPattern;
import static org.junit.Assert.*;
@ -24,6 +30,34 @@ public class FhirResourceDaoR4CreateTest extends BaseJpaR4Test {
myDaoConfig.setResourceServerIdStrategy(new DaoConfig().getResourceServerIdStrategy());
}
@Test
public void testCreateResourceWithKoreanText() throws IOException {
String input = loadClasspath("/r4/bug832-korean-text.xml");
Patient p = myFhirCtx.newXmlParser().parseResource(Patient.class, input);
String id = myPatientDao.create(p).getId().toUnqualifiedVersionless().getValue();
SearchParameterMap map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_FAMILY, new StringParam(""));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam(""));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam("준수"));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam("")); // rightmost character only
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), empty());
}
@Test
public void testCreateWithUuidResourceStrategy() throws Exception {
myDaoConfig.setResourceServerIdStrategy(DaoConfig.IdStrategyEnum.UUID);

View File

@ -0,0 +1,33 @@
<Patient xmlns="http://hl7.org/fhir">
<identifier>
<use value="usual" />
<type>
<coding>
<system value="http://hl7.org/fhir/v2/0203" />
<code value="MR" />
</coding>
</type>
<system value="urn:oid:1.2.410.100110.10.41308301" />
<value value="88888888" />
<assigner>
<display value="Seoul Samsung" />
</assigner>
</identifier>
<active value="true" />
<name>
<use value="official" />
<family value="김" />
<given value="준수" />
</name>
<gender value="male" />
<birthDate value="1988-01-01" />
<contact>
<telecom>
<system value="phone" />
<value value="010-0000-1000" />
</telecom>
</contact>
<managingOrganization>
<display value="Seoul Samsung" />
</managingOrganization>
</Patient>

View File

@ -5,12 +5,11 @@ import java.util.List;
import javax.servlet.http.HttpServletRequest;
import org.hl7.fhir.dstu3.model.Bundle;
import org.hl7.fhir.dstu3.model.Subscription;
import org.springframework.ui.ModelMap;
import org.springframework.web.bind.annotation.RequestMapping;
import ca.uhn.fhir.model.dstu2.resource.Bundle;
import ca.uhn.fhir.model.dstu2.resource.Bundle.Entry;
import ca.uhn.fhir.model.dstu2.resource.Subscription;
import ca.uhn.fhir.rest.client.impl.GenericClient;
import ca.uhn.fhir.to.BaseController;
import ca.uhn.fhir.to.model.HomeRequest;
@ -43,7 +42,7 @@ public class SubscriptionPlaygroundController extends BaseController {
.execute();
List<Subscription> subscriptions = new ArrayList<Subscription>();
for (Entry next : resp.getEntry()) {
for (Bundle.BundleEntryComponent next : resp.getEntry()) {
if (next.getResource() instanceof Subscription) {
subscriptions.add((Subscription) next.getResource());
}

View File

@ -14,6 +14,10 @@
repeatable primitive. Thanks to Igor Sirkovich for providing a
test case!
</action>
<action type="fix" issue="832">
Fix an issue where the JPA server crashed while attempting to normalize string values
containing Korean text. Thanks to GitHub user @JoonggeonLee for reporting!
</action>
</release>
<release version="3.2.0" date="2018-01-13">
<action type="add">