extended unicode support

This commit is contained in:
Grahame Grieve 2024-01-10 08:00:32 +11:00
parent b1868e165a
commit 7c6d5fa688
10 changed files with 30 additions and 24 deletions

View File

@ -391,7 +391,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -440,7 +440,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1767,7 +1767,7 @@ public class FHIRPathEngine {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -933,7 +933,7 @@ public class Turtle {
}
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc);
b.append(Character.toString(uc));
i = i + l;
break;
default:

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -499,8 +499,8 @@ public class FHIRLexer {
break;
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
int uc = Integer.parseInt(s.substring(i, i+4), 32);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1767,8 +1767,8 @@ public class FHIRPathEngine {
break;
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc);
int uc = Integer.parseInt(s.substring(i, i + 4), 32);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 4;
break;
default:
@ -499,8 +499,8 @@ public class FHIRLexer {
break;
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
int uc = Integer.parseInt(s.substring(i, i+4), 32);
b.append(Character.toString(uc));
i = i + 4;
break;
default:

View File

@ -1779,7 +1779,7 @@ public class FHIRPathEngine {
case 'u':
i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc);
b.append(Character.toString(uc));
i = i + 3;
break;
default:

View File

@ -179,12 +179,12 @@ public class TextFile {
//while (sr.ready()) { Commented out by Claude Nanjo (1/14/2014) - sr.ready() always returns false - please remove if change does not impact other areas of codebase
int i = -1;
while((i = sr.read()) > -1) {
char c = (char) i;
b.append(c);
String s = Character.toString(i);
b.append(s);
}
sr.close();
return b.toString().replace("\uFEFF", "");
return b.toString().replace("\uFEFF", "");
}
public static byte[] streamToBytes(InputStream input) throws IOException {

View File

@ -49,7 +49,7 @@ import org.hl7.fhir.utilities.Utilities;
public class Turtle {
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE";
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\\x{00A0}-\\x{10FFFF}";
public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+";
public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
@ -974,15 +974,21 @@ public class Turtle {
case 'u':
i++;
int l = 4;
int uc = Integer.parseInt(s.substring(i, i+l), 16);
String ss = s.substring(i, i+l);
int uc = Integer.parseInt(ss, 16);
if (uc < (isUri ? 33 : 32)) {
l = 8;
uc = Integer.parseInt(s.substring(i, i+8), 16);
ss = s.substring(i, i+l);
uc = Integer.parseInt(ss, 16);
}
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc);
i = i + l;
try {
b.append(Character.toString(uc));
} catch (Exception e) {
throw new FHIRFormatError("Illegal Unicode Sequence: "+ss);
}
i = i + l - 1; // -1 cause we're about to i++
break;
default:
throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));

View File

@ -135,7 +135,7 @@ public class TurtleTests {
@Test
public void test_labeled_blank_node_with_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
@Test
@ -478,7 +478,7 @@ public class TurtleTests {
@Test
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
// don't need to support property names with ':'
@ -517,7 +517,7 @@ public class TurtleTests {
@Test
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false);
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
}
@Test