Merge pull request #1536 from hapifhir/2024-01-gg-extended-unicode
extended unicode support
This commit is contained in:
commit
96c1b07211
|
@ -391,7 +391,7 @@ public class FHIRLexer {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
@ -440,7 +440,7 @@ public class FHIRLexer {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1767,7 +1767,7 @@ public class FHIRPathEngine {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 3;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -933,7 +933,7 @@ public class Turtle {
|
|||
}
|
||||
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
|
||||
throw new FHIRFormatError("Illegal unicode character");
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + l;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -482,7 +482,7 @@ public class TurtleTests {
|
|||
@Test
|
||||
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundariesNT() throws Exception {
|
||||
doTest(TestingUtilities.resourceNameToFile("turtle",
|
||||
"localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.nt"), true);
|
||||
"localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.nt"), false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -523,7 +523,7 @@ public class TurtleTests {
|
|||
@Test
|
||||
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundariesNT() throws Exception {
|
||||
doTest(TestingUtilities.resourceNameToFile("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.nt"),
|
||||
true);
|
||||
false);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -451,7 +451,7 @@ public class FHIRLexer {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
@ -500,7 +500,7 @@ public class FHIRLexer {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1768,7 +1768,7 @@ public class FHIRPathEngine {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 3;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -451,7 +451,7 @@ public class FHIRLexer {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
@ -499,8 +499,8 @@ public class FHIRLexer {
|
|||
break;
|
||||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 16);
|
||||
b.append((char) uc);
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 32);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 4;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1779,7 +1779,7 @@ public class FHIRPathEngine {
|
|||
case 'u':
|
||||
i++;
|
||||
int uc = Integer.parseInt(s.substring(i, i+4), 16);
|
||||
b.append((char) uc);
|
||||
b.append(Character.toString(uc));
|
||||
i = i + 3;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -135,7 +135,9 @@ public class TerminologyClientManager {
|
|||
if (!internalErrors.contains(msg)) {
|
||||
internalErrors.add(msg);
|
||||
}
|
||||
e.printStackTrace();
|
||||
if (!monitorServiceURL.contains("tx.fhir.org")) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
return getMasterClient().getAddress();
|
||||
|
||||
|
@ -211,7 +213,7 @@ public class TerminologyClientManager {
|
|||
this.cache = cache;
|
||||
this.cacheFile = null;
|
||||
|
||||
if (cache != null) {
|
||||
if (cache != null && cache.getFolder() != null) {
|
||||
try {
|
||||
cacheFile = new File(Utilities.path(cache.getFolder(), "system-map.json"));
|
||||
if (cacheFile.exists()) {
|
||||
|
@ -227,7 +229,7 @@ public class TerminologyClientManager {
|
|||
}
|
||||
|
||||
private void save() {
|
||||
if (cacheFile != null) {
|
||||
if (cacheFile != null && cache.getFolder() != null) {
|
||||
JsonObject json = new JsonObject();
|
||||
for (String s : Utilities.sorted(resMap.keySet())) {
|
||||
JsonObject si = new JsonObject();
|
||||
|
|
|
@ -179,12 +179,12 @@ public class TextFile {
|
|||
//while (sr.ready()) { Commented out by Claude Nanjo (1/14/2014) - sr.ready() always returns false - please remove if change does not impact other areas of codebase
|
||||
int i = -1;
|
||||
while((i = sr.read()) > -1) {
|
||||
char c = (char) i;
|
||||
b.append(c);
|
||||
String s = Character.toString(i);
|
||||
b.append(s);
|
||||
}
|
||||
sr.close();
|
||||
|
||||
return b.toString().replace("\uFEFF", "");
|
||||
return b.toString().replace("\uFEFF", "");
|
||||
}
|
||||
|
||||
public static byte[] streamToBytes(InputStream input) throws IOException {
|
||||
|
|
|
@ -528,7 +528,6 @@ public class Utilities {
|
|||
return value.replace("\r\n", "\r").replace("\n", "\r").replace("\r", "\r\n");
|
||||
}
|
||||
|
||||
|
||||
public static String unescapeXml(String xml) throws FHIRException {
|
||||
if (xml == null)
|
||||
return null;
|
||||
|
@ -601,7 +600,7 @@ public class Utilities {
|
|||
break;
|
||||
case 'u':
|
||||
String hex = json.substring(i + 1, i + 5);
|
||||
b.append((char) Integer.parseInt(hex, 16));
|
||||
b.append(Character.toString(Integer.parseInt(hex, 16)));
|
||||
break;
|
||||
default:
|
||||
throw new FHIRException("Unknown JSON escape \\" + ch);
|
||||
|
|
|
@ -49,7 +49,7 @@ import org.hl7.fhir.utilities.Utilities;
|
|||
|
||||
public class Turtle {
|
||||
|
||||
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
|
||||
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\\x{00A0}-\\x{10FFFF}";
|
||||
|
||||
public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+";
|
||||
public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
|
||||
|
@ -974,15 +974,21 @@ public class Turtle {
|
|||
case 'u':
|
||||
i++;
|
||||
int l = 4;
|
||||
int uc = Integer.parseInt(s.substring(i, i+l), 16);
|
||||
String ss = s.substring(i, i+l);
|
||||
int uc = Integer.parseInt(ss, 16);
|
||||
if (uc < (isUri ? 33 : 32)) {
|
||||
l = 8;
|
||||
uc = Integer.parseInt(s.substring(i, i+8), 16);
|
||||
ss = s.substring(i, i+l);
|
||||
uc = Integer.parseInt(ss, 16);
|
||||
}
|
||||
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
|
||||
throw new FHIRFormatError("Illegal unicode character");
|
||||
b.append((char) uc);
|
||||
i = i + l;
|
||||
try {
|
||||
b.append(Character.toString(uc));
|
||||
} catch (Exception e) {
|
||||
throw new FHIRFormatError("Illegal Unicode Sequence: "+ss);
|
||||
}
|
||||
i = i + l - 1; // -1 cause we're about to i++
|
||||
break;
|
||||
default:
|
||||
throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));
|
||||
|
|
|
@ -137,52 +137,64 @@ public class XhtmlComposer {
|
|||
String src = node.getContent();
|
||||
int i = 0;
|
||||
while (i < src.length()) {
|
||||
char c = src.charAt(i);
|
||||
if (autoLinks && c == 'h' && Utilities.startsWithInList(src.substring(i), "http://", "https://")) {
|
||||
int j = i;
|
||||
while (i < src.length() && isValidUrlChar(src.charAt(i))) {
|
||||
i++;
|
||||
}
|
||||
String url = src.substring(j, i);
|
||||
if (url.endsWith(".") || url.endsWith(",")) {
|
||||
i--;
|
||||
url = url.substring(0, url.length()-1);
|
||||
}
|
||||
url = Utilities.escapeXml(url);
|
||||
dst.append("<a href=\""+url+"\">"+ url +"</a>");
|
||||
int ci = src.codePointAt(i);
|
||||
if (ci > 65535) {
|
||||
dst.append("&#x");
|
||||
dst.append(Integer.toHexString(ci).toUpperCase());
|
||||
dst.append(";");
|
||||
i += Character.charCount(ci);
|
||||
} else {
|
||||
i++;
|
||||
if (c == '&') {
|
||||
dst.append("&");
|
||||
} else if (c == '<') {
|
||||
dst.append("<");
|
||||
} else if (c == '>') {
|
||||
dst.append(">");
|
||||
} else if (xml) {
|
||||
if (c == '"')
|
||||
dst.append(""");
|
||||
else
|
||||
dst.append(c);
|
||||
char c = (char) ci;
|
||||
if (autoLinks && c == 'h' && Utilities.startsWithInList(src.substring(i), "http://", "https://")) {
|
||||
int j = i;
|
||||
while (i < src.length() && isValidUrlChar(src.charAt(i))) {
|
||||
i++;
|
||||
}
|
||||
String url = src.substring(j, i);
|
||||
if (url.endsWith(".") || url.endsWith(",")) {
|
||||
i--;
|
||||
url = url.substring(0, url.length()-1);
|
||||
}
|
||||
url = Utilities.escapeXml(url);
|
||||
dst.append("<a href=\""+url+"\">"+ url +"</a>");
|
||||
} else {
|
||||
if (c == XhtmlNode.NBSP.charAt(0))
|
||||
dst.append(" ");
|
||||
else if (c == (char) 0xA7)
|
||||
dst.append("§");
|
||||
else if (c == (char) 169)
|
||||
dst.append("©");
|
||||
else if (c == (char) 8482)
|
||||
dst.append("™");
|
||||
else if (c == (char) 956)
|
||||
dst.append("μ");
|
||||
else if (c == (char) 174)
|
||||
dst.append("®");
|
||||
else
|
||||
dst.append(c);
|
||||
i++;
|
||||
if (c == '&') {
|
||||
dst.append("&");
|
||||
} else if (c == '<') {
|
||||
dst.append("<");
|
||||
} else if (c == '>') {
|
||||
dst.append(">");
|
||||
} else if (xml) {
|
||||
if (c == '"')
|
||||
dst.append(""");
|
||||
else
|
||||
dst.append(c);
|
||||
} else {
|
||||
if (c == XhtmlNode.NBSP.charAt(0))
|
||||
dst.append(" ");
|
||||
else if (c == (char) 0xA7)
|
||||
dst.append("§");
|
||||
else if (c == (char) 169)
|
||||
dst.append("©");
|
||||
else if (c == (char) 8482)
|
||||
dst.append("™");
|
||||
else if (c == (char) 956)
|
||||
dst.append("μ");
|
||||
else if (c == (char) 174)
|
||||
dst.append("®");
|
||||
else
|
||||
dst.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean isTwoCharUnicodeCodePoint(char c1, char c2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
private void writeComment(String indent, XhtmlNode node, boolean noPrettyOverride) throws IOException {
|
||||
dst.append(indent + "<!-- " + node.getContent().trim() + " -->" + (pretty && !noPrettyOverride ? "\r\n" : ""));
|
||||
}
|
||||
|
|
|
@ -135,7 +135,7 @@ public class TurtleTests {
|
|||
|
||||
@Test
|
||||
public void test_labeled_blank_node_with_PN_CHARS_BASE_character_boundaries() throws Exception {
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), false);
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), true);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -478,7 +478,7 @@ public class TurtleTests {
|
|||
|
||||
@Test
|
||||
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
|
||||
}
|
||||
|
||||
// don't need to support property names with ':'
|
||||
|
@ -517,7 +517,7 @@ public class TurtleTests {
|
|||
|
||||
@Test
|
||||
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
|
||||
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
@ -217,6 +217,8 @@ public class XhtmlNodeTest {
|
|||
public void testEntityNumberGreaterThanFFFF_Hex() throws IOException {
|
||||
XhtmlNode x = new XhtmlParser().parse("<div>😷</div>", "div");
|
||||
Assertions.assertEquals("\uD83D\uDE37", x.getFirstElement().getChildNodes().get(0).getContent());
|
||||
String html = new XhtmlComposer(false).compose(x);
|
||||
Assertions.assertEquals("<div>😷</div>", html);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue