Merge pull request #1536 from hapifhir/2024-01-gg-extended-unicode

extended unicode support
This commit is contained in:
Grahame Grieve 2024-01-10 13:13:28 +11:00 committed by GitHub
commit 96c1b07211
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 89 additions and 68 deletions

View File

@ -391,7 +391,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -440,7 +440,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1767,7 +1767,7 @@ public class FHIRPathEngine {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -933,7 +933,7 @@ public class Turtle {
}
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc);
b.append(Character.toString(uc));
i = i + l;
break;
default:

View File

@ -482,7 +482,7 @@ public class TurtleTests {
@Test
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundariesNT() throws Exception {
doTest(TestingUtilities.resourceNameToFile("turtle",
"localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.nt"), true);
"localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.nt"), false);
}
@Test
@ -523,7 +523,7 @@ public class TurtleTests {
@Test
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundariesNT() throws Exception {
doTest(TestingUtilities.resourceNameToFile("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.nt"),
true);
false);
}
@Test

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -500,7 +500,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1768,7 +1768,7 @@ public class FHIRPathEngine {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -499,8 +499,8 @@ public class FHIRLexer {
break;
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
int uc = Integer.parseInt(s.substring(i, i+4), 32);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1779,7 +1779,7 @@ public class FHIRPathEngine {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -135,7 +135,9 @@ public class TerminologyClientManager {
if (!internalErrors.contains(msg)) {
internalErrors.add(msg);
}
e.printStackTrace();
if (!monitorServiceURL.contains("tx.fhir.org")) {
e.printStackTrace();
}
}
return getMasterClient().getAddress();
@ -211,7 +213,7 @@ public class TerminologyClientManager {
this.cache = cache;
this.cacheFile = null;
if (cache != null) {
if (cache != null && cache.getFolder() != null) {
try {
cacheFile = new File(Utilities.path(cache.getFolder(), "system-map.json"));
if (cacheFile.exists()) {
@ -227,7 +229,7 @@ public class TerminologyClientManager {
}
private void save() {
if (cacheFile != null) {
if (cacheFile != null && cache.getFolder() != null) {
JsonObject json = new JsonObject();
for (String s : Utilities.sorted(resMap.keySet())) {
JsonObject si = new JsonObject();

View File

@ -179,12 +179,12 @@ public class TextFile {
//while (sr.ready()) { Commented out by Claude Nanjo (1/14/2014) - sr.ready() always returns false - please remove if change does not impact other areas of codebase
int i = -1;
while((i = sr.read()) > -1) {
char c = (char) i;
b.append(c);
String s = Character.toString(i);
b.append(s);
}
sr.close();
return b.toString().replace("\uFEFF", "");
return b.toString().replace("\uFEFF", "");
}
public static byte[] streamToBytes(InputStream input) throws IOException {

View File

@ -528,7 +528,6 @@ public class Utilities {
return value.replace("\r\n", "\r").replace("\n", "\r").replace("\r", "\r\n");
}
public static String unescapeXml(String xml) throws FHIRException {
if (xml == null)
return null;
@ -601,7 +600,7 @@ public class Utilities {
break;
case 'u':
String hex = json.substring(i + 1, i + 5);
b.append((char) Integer.parseInt(hex, 16));
b.append(Character.toString(Integer.parseInt(hex, 16)));
break;
default:
throw new FHIRException("Unknown JSON escape \\" + ch);

View File

@ -49,7 +49,7 @@ import org.hl7.fhir.utilities.Utilities;
public class Turtle {
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\\x{00A0}-\\x{10FFFF}";
public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+";
public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
@ -974,15 +974,21 @@ public class Turtle {
case 'u':
i++;
int l = 4;
int uc = Integer.parseInt(s.substring(i, i+l), 16);
String ss = s.substring(i, i+l);
int uc = Integer.parseInt(ss, 16);
if (uc < (isUri ? 33 : 32)) {
l = 8;
uc = Integer.parseInt(s.substring(i, i+8), 16);
ss = s.substring(i, i+l);
uc = Integer.parseInt(ss, 16);
}
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc);
i = i + l;
try {
b.append(Character.toString(uc));
} catch (Exception e) {
throw new FHIRFormatError("Illegal Unicode Sequence: "+ss);
}
i = i + l - 1; // -1 cause we're about to i++
break;
default:
throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));

View File

@ -137,52 +137,64 @@ public class XhtmlComposer {
String src = node.getContent();
int i = 0;
while (i < src.length()) {
char c = src.charAt(i);
if (autoLinks && c == 'h' && Utilities.startsWithInList(src.substring(i), "http://", "https://")) {
int j = i;
while (i < src.length() && isValidUrlChar(src.charAt(i))) {
i++;
}
String url = src.substring(j, i);
if (url.endsWith(".") || url.endsWith(",")) {
i--;
url = url.substring(0, url.length()-1);
}
url = Utilities.escapeXml(url);
dst.append("<a href=\""+url+"\">"+ url +"</a>");
int ci = src.codePointAt(i);
if (ci > 65535) {
dst.append("&#x");
dst.append(Integer.toHexString(ci).toUpperCase());
dst.append(";");
i += Character.charCount(ci);
} else {
i++;
if (c == '&') {
dst.append("&amp;");
} else if (c == '<') {
dst.append("&lt;");
} else if (c == '>') {
dst.append("&gt;");
} else if (xml) {
if (c == '"')
dst.append("&quot;");
else
dst.append(c);
char c = (char) ci;
if (autoLinks && c == 'h' && Utilities.startsWithInList(src.substring(i), "http://", "https://")) {
int j = i;
while (i < src.length() && isValidUrlChar(src.charAt(i))) {
i++;
}
String url = src.substring(j, i);
if (url.endsWith(".") || url.endsWith(",")) {
i--;
url = url.substring(0, url.length()-1);
}
url = Utilities.escapeXml(url);
dst.append("<a href=\""+url+"\">"+ url +"</a>");
} else {
if (c == XhtmlNode.NBSP.charAt(0))
dst.append("&nbsp;");
else if (c == (char) 0xA7)
dst.append("&sect;");
else if (c == (char) 169)
dst.append("&copy;");
else if (c == (char) 8482)
dst.append("&trade;");
else if (c == (char) 956)
dst.append("&mu;");
else if (c == (char) 174)
dst.append("&reg;");
else
dst.append(c);
i++;
if (c == '&') {
dst.append("&amp;");
} else if (c == '<') {
dst.append("&lt;");
} else if (c == '>') {
dst.append("&gt;");
} else if (xml) {
if (c == '"')
dst.append("&quot;");
else
dst.append(c);
} else {
if (c == XhtmlNode.NBSP.charAt(0))
dst.append("&nbsp;");
else if (c == (char) 0xA7)
dst.append("&sect;");
else if (c == (char) 169)
dst.append("&copy;");
else if (c == (char) 8482)
dst.append("&trade;");
else if (c == (char) 956)
dst.append("&mu;");
else if (c == (char) 174)
dst.append("&reg;");
else
dst.append(c);
}
}
}
}
}
boolean isTwoCharUnicodeCodePoint(char c1, char c2) {
return false;
}
private void writeComment(String indent, XhtmlNode node, boolean noPrettyOverride) throws IOException {
dst.append(indent + "<!-- " + node.getContent().trim() + " -->" + (pretty && !noPrettyOverride ? "\r\n" : ""));
}

View File

@ -135,7 +135,7 @@ public class TurtleTests {
@Test
public void test_labeled_blank_node_with_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
@Test
@ -478,7 +478,7 @@ public class TurtleTests {
@Test
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
// don't need to support property names with ':'
@ -517,7 +517,7 @@ public class TurtleTests {
@Test
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
@Test

View File

@ -217,6 +217,8 @@ public class XhtmlNodeTest {
public void testEntityNumberGreaterThanFFFF_Hex() throws IOException {
XhtmlNode x = new XhtmlParser().parse("<div>&#x1F637;</div>", "div");
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
String html = new XhtmlComposer(false).compose(x);
Assertions.assertEquals("<div>&#x1F637;</div>", html);
}