Fix #832 - Correctly normalize text with Korean characters

This commit is contained in:
James Agnew 2018-01-21 14:55:14 +08:00
parent 3644151a69
commit 4dcce4e582
6 changed files with 122 additions and 47 deletions

View File

@ -1,5 +1,11 @@
package ca.uhn.fhir.i18n; package ca.uhn.fhir.i18n;
import ca.uhn.fhir.context.ConfigurationException;
import java.text.MessageFormat;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import static org.apache.commons.lang3.StringUtils.isNotBlank; import static org.apache.commons.lang3.StringUtils.isNotBlank;
/* /*
@ -11,9 +17,9 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank;
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -22,30 +28,18 @@ import static org.apache.commons.lang3.StringUtils.isNotBlank;
* #L% * #L%
*/ */
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import ca.uhn.fhir.context.ConfigurationException;
/** /**
* This feature is not yet in its final state and should be considered an internal part of HAPI for now - use with caution * This feature is not yet in its final state and should be considered an internal part of HAPI for now - use with caution
*/ */
public class HapiLocalizer { public class HapiLocalizer {
private static boolean ourFailOnMissingMessage; @SuppressWarnings("WeakerAccess")
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(HapiLocalizer.class);
public static final String UNKNOWN_I18N_KEY_MESSAGE = "!MESSAGE!"; public static final String UNKNOWN_I18N_KEY_MESSAGE = "!MESSAGE!";
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(HapiLocalizer.class);
private List<ResourceBundle> myBundle = new ArrayList<ResourceBundle>(); private static boolean ourFailOnMissingMessage;
private final Map<String, MessageFormat> myKeyToMessageFormat = new ConcurrentHashMap<>();
private List<ResourceBundle> myBundle = new ArrayList<>();
private String[] myBundleNames; private String[] myBundleNames;
private final Map<String, MessageFormat> myKeyToMessageFormat = new ConcurrentHashMap<String, MessageFormat>();
public HapiLocalizer() { public HapiLocalizer() {
this(HapiLocalizer.class.getPackage().getName() + ".hapi-messages"); this(HapiLocalizer.class.getPackage().getName() + ".hapi-messages");
@ -56,7 +50,22 @@ public class HapiLocalizer {
init(); init();
} }
private String findFormatString(String theQualifiedKey) { public Set<String> getAllKeys() {
HashSet<String> retVal = new HashSet<>();
for (ResourceBundle nextBundle : myBundle) {
Enumeration<String> keysEnum = nextBundle.getKeys();
while (keysEnum.hasMoreElements()) {
retVal.add(keysEnum.nextElement());
}
}
return retVal;
}
/**
* @return Returns the raw message format string for the given key, or returns {@link #UNKNOWN_I18N_KEY_MESSAGE} if not found
*/
@SuppressWarnings("WeakerAccess")
public String getFormatString(String theQualifiedKey) {
String formatString = null; String formatString = null;
for (ResourceBundle nextBundle : myBundle) { for (ResourceBundle nextBundle : myBundle) {
if (nextBundle.containsKey(theQualifiedKey)) { if (nextBundle.containsKey(theQualifiedKey)) {
@ -77,36 +86,24 @@ public class HapiLocalizer {
return formatString; return formatString;
} }
public Set<String> getAllKeys(){
HashSet<String> retVal = new HashSet<String>();
for (ResourceBundle nextBundle : myBundle) {
Enumeration<String> keysEnum = nextBundle.getKeys();
while (keysEnum.hasMoreElements()) {
retVal.add(keysEnum.nextElement());
}
}
return retVal;
}
public String getMessage(Class<?> theType, String theKey, Object... theParameters) { public String getMessage(Class<?> theType, String theKey, Object... theParameters) {
return getMessage(theType.getName() + '.' + theKey, theParameters); return getMessage(toKey(theType, theKey), theParameters);
} }
public String getMessage(String theQualifiedKey, Object... theParameters) { public String getMessage(String theQualifiedKey, Object... theParameters) {
if (theParameters != null && theParameters.length > 0) { if (theParameters != null && theParameters.length > 0) {
MessageFormat format = myKeyToMessageFormat.get(theQualifiedKey); MessageFormat format = myKeyToMessageFormat.get(theQualifiedKey);
if (format != null) { if (format != null) {
return format.format(theParameters).toString(); return format.format(theParameters);
} }
String formatString = findFormatString(theQualifiedKey); String formatString = getFormatString(theQualifiedKey);
format = new MessageFormat(formatString.trim()); format = new MessageFormat(formatString.trim());
myKeyToMessageFormat.put(theQualifiedKey, format); myKeyToMessageFormat.put(theQualifiedKey, format);
return format.format(theParameters).toString(); return format.format(theParameters);
} }
String retVal = findFormatString(theQualifiedKey); return getFormatString(theQualifiedKey);
return retVal;
} }
protected void init() { protected void init() {
@ -114,7 +111,7 @@ public class HapiLocalizer {
myBundle.add(ResourceBundle.getBundle(nextName)); myBundle.add(ResourceBundle.getBundle(nextName));
} }
} }
/** /**
* This <b>global setting</b> causes the localizer to fail if any attempts * This <b>global setting</b> causes the localizer to fail if any attempts
* are made to retrieve a key that does not exist. This method is primarily for * are made to retrieve a key that does not exist. This method is primarily for
@ -123,5 +120,9 @@ public class HapiLocalizer {
public static void setOurFailOnMissingMessage(boolean ourFailOnMissingMessage) { public static void setOurFailOnMissingMessage(boolean ourFailOnMissingMessage) {
HapiLocalizer.ourFailOnMissingMessage = ourFailOnMissingMessage; HapiLocalizer.ourFailOnMissingMessage = ourFailOnMissingMessage;
} }
public static String toKey(Class<?> theType, String theKey) {
return theType.getName() + '.' + theKey;
}
} }

View File

@ -73,7 +73,10 @@ import javax.persistence.criteria.Predicate;
import javax.persistence.criteria.Root; import javax.persistence.criteria.Root;
import javax.xml.stream.events.Characters; import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent; import javax.xml.stream.events.XMLEvent;
import java.io.CharArrayWriter;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.CharBuffer;
import java.text.Normalizer; import java.text.Normalizer;
import java.util.*; import java.util.*;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -1999,7 +2002,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
} }
public static String normalizeString(String theString) { public static String normalizeString(String theString) {
char[] out = new char[theString.length()]; CharArrayWriter outBuffer = new CharArrayWriter(theString.length());
/* /*
* The following block of code is used to strip out diacritical marks from latin script * The following block of code is used to strip out diacritical marks from latin script
@ -2009,18 +2012,19 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
* behind stripping 0300-036F * behind stripping 0300-036F
* *
* See #454 for an issue where we were completely stripping non latin characters * See #454 for an issue where we were completely stripping non latin characters
* See #832 for an issue where we normalize korean characters, which are decomposed
*/ */
String string = Normalizer.normalize(theString, Normalizer.Form.NFD); String string = Normalizer.normalize(theString, Normalizer.Form.NFD);
int j = 0;
for (int i = 0, n = string.length(); i < n; ++i) { for (int i = 0, n = string.length(); i < n; ++i) {
char c = string.charAt(i); char c = string.charAt(i);
if (c >= '\u0300' && c <= '\u036F') { if (c >= '\u0300' && c <= '\u036F') {
continue; continue;
} else { } else {
out[j++] = c; outBuffer.append(c);
} }
} }
return new String(out).toUpperCase();
return new String(outBuffer.toCharArray()).toUpperCase();
} }
private static String parseNarrativeTextIntoWords(IBaseResource theResource) { private static String parseNarrativeTextIntoWords(IBaseResource theResource) {

View File

@ -1,6 +1,8 @@
package ca.uhn.fhir.jpa.dao.r4; package ca.uhn.fhir.jpa.dao.r4;
import ca.uhn.fhir.jpa.dao.DaoConfig; import ca.uhn.fhir.jpa.dao.DaoConfig;
import ca.uhn.fhir.jpa.dao.SearchParameterMap;
import ca.uhn.fhir.rest.param.StringParam;
import ca.uhn.fhir.util.TestUtil; import ca.uhn.fhir.util.TestUtil;
import org.hl7.fhir.instance.model.api.IIdType; import org.hl7.fhir.instance.model.api.IIdType;
import org.hl7.fhir.r4.model.Bundle; import org.hl7.fhir.r4.model.Bundle;
@ -13,6 +15,10 @@ import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.matchesPattern; import static org.hamcrest.Matchers.matchesPattern;
import static org.junit.Assert.*; import static org.junit.Assert.*;
@ -24,6 +30,34 @@ public class FhirResourceDaoR4CreateTest extends BaseJpaR4Test {
myDaoConfig.setResourceServerIdStrategy(new DaoConfig().getResourceServerIdStrategy()); myDaoConfig.setResourceServerIdStrategy(new DaoConfig().getResourceServerIdStrategy());
} }
@Test
public void testCreateResourceWithKoreanText() throws IOException {
String input = loadClasspath("/r4/bug832-korean-text.xml");
Patient p = myFhirCtx.newXmlParser().parseResource(Patient.class, input);
String id = myPatientDao.create(p).getId().toUnqualifiedVersionless().getValue();
SearchParameterMap map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_FAMILY, new StringParam(""));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam(""));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam("준수"));
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), contains(id));
map= new SearchParameterMap();
map.setLoadSynchronous(true);
map.add(Patient.SP_GIVEN, new StringParam("")); // rightmost character only
assertThat(toUnqualifiedVersionlessIdValues(myPatientDao.search(map)), empty());
}
@Test @Test
public void testCreateWithUuidResourceStrategy() throws Exception { public void testCreateWithUuidResourceStrategy() throws Exception {
myDaoConfig.setResourceServerIdStrategy(DaoConfig.IdStrategyEnum.UUID); myDaoConfig.setResourceServerIdStrategy(DaoConfig.IdStrategyEnum.UUID);

View File

@ -0,0 +1,33 @@
<Patient xmlns="http://hl7.org/fhir">
<identifier>
<use value="usual" />
<type>
<coding>
<system value="http://hl7.org/fhir/v2/0203" />
<code value="MR" />
</coding>
</type>
<system value="urn:oid:1.2.410.100110.10.41308301" />
<value value="88888888" />
<assigner>
<display value="Seoul Samsung" />
</assigner>
</identifier>
<active value="true" />
<name>
<use value="official" />
<family value="김" />
<given value="준수" />
</name>
<gender value="male" />
<birthDate value="1988-01-01" />
<contact>
<telecom>
<system value="phone" />
<value value="010-0000-1000" />
</telecom>
</contact>
<managingOrganization>
<display value="Seoul Samsung" />
</managingOrganization>
</Patient>

View File

@ -5,12 +5,11 @@ import java.util.List;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import org.hl7.fhir.dstu3.model.Bundle;
import org.hl7.fhir.dstu3.model.Subscription;
import org.springframework.ui.ModelMap; import org.springframework.ui.ModelMap;
import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMapping;
import ca.uhn.fhir.model.dstu2.resource.Bundle;
import ca.uhn.fhir.model.dstu2.resource.Bundle.Entry;
import ca.uhn.fhir.model.dstu2.resource.Subscription;
import ca.uhn.fhir.rest.client.impl.GenericClient; import ca.uhn.fhir.rest.client.impl.GenericClient;
import ca.uhn.fhir.to.BaseController; import ca.uhn.fhir.to.BaseController;
import ca.uhn.fhir.to.model.HomeRequest; import ca.uhn.fhir.to.model.HomeRequest;
@ -43,7 +42,7 @@ public class SubscriptionPlaygroundController extends BaseController {
.execute(); .execute();
List<Subscription> subscriptions = new ArrayList<Subscription>(); List<Subscription> subscriptions = new ArrayList<Subscription>();
for (Entry next : resp.getEntry()) { for (Bundle.BundleEntryComponent next : resp.getEntry()) {
if (next.getResource() instanceof Subscription) { if (next.getResource() instanceof Subscription) {
subscriptions.add((Subscription) next.getResource()); subscriptions.add((Subscription) next.getResource());
} }

View File

@ -14,6 +14,10 @@
repeatable primitive. Thanks to Igor Sirkovich for providing a repeatable primitive. Thanks to Igor Sirkovich for providing a
test case! test case!
</action> </action>
<action type="fix" issue="832">
Fix an issue where the JPA server crashed while attempting to normalize string values
containing Korean text. Thanks to GitHub user @JoonggeonLee for reporting!
</action>
</release> </release>
<release version="3.2.0" date="2018-01-13"> <release version="3.2.0" date="2018-01-13">
<action type="add"> <action type="add">