extended unicode support

This commit is contained in:
Grahame Grieve 2024-01-10 08:00:32 +11:00
parent b1868e165a
commit 7c6d5fa688
10 changed files with 30 additions and 24 deletions

View File

@ -391,7 +391,7 @@ public class FHIRLexer {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16); int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:
@ -440,7 +440,7 @@ public class FHIRLexer {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16); int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:

View File

@ -1767,7 +1767,7 @@ public class FHIRPathEngine {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16); int uc = Integer.parseInt(s.substring(i, i + 4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 3; i = i + 3;
break; break;
default: default:

View File

@ -933,7 +933,7 @@ public class Turtle {
} }
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character"); throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc); b.append(Character.toString(uc));
i = i + l; i = i + l;
break; break;
default: default:

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16); int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:
@ -499,8 +499,8 @@ public class FHIRLexer {
break; break;
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16); int uc = Integer.parseInt(s.substring(i, i+4), 32);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:

View File

@ -1767,8 +1767,8 @@ public class FHIRPathEngine {
break; break;
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i + 4), 16); int uc = Integer.parseInt(s.substring(i, i + 4), 32);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 3; i = i + 3;
break; break;
default: default:

View File

@ -451,7 +451,7 @@ public class FHIRLexer {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16); int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:
@ -499,8 +499,8 @@ public class FHIRLexer {
break; break;
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16); int uc = Integer.parseInt(s.substring(i, i+4), 32);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 4; i = i + 4;
break; break;
default: default:

View File

@ -1779,7 +1779,7 @@ public class FHIRPathEngine {
case 'u': case 'u':
i++; i++;
int uc = Integer.parseInt(s.substring(i, i+4), 16); int uc = Integer.parseInt(s.substring(i, i+4), 16);
b.append((char) uc); b.append(Character.toString(uc));
i = i + 3; i = i + 3;
break; break;
default: default:

View File

@ -179,8 +179,8 @@ public class TextFile {
//while (sr.ready()) { Commented out by Claude Nanjo (1/14/2014) - sr.ready() always returns false - please remove if change does not impact other areas of codebase //while (sr.ready()) { Commented out by Claude Nanjo (1/14/2014) - sr.ready() always returns false - please remove if change does not impact other areas of codebase
int i = -1; int i = -1;
while((i = sr.read()) > -1) { while((i = sr.read()) > -1) {
char c = (char) i; String s = Character.toString(i);
b.append(c); b.append(s);
} }
sr.close(); sr.close();

View File

@ -49,7 +49,7 @@ import org.hl7.fhir.utilities.Utilities;
public class Turtle { public class Turtle {
public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uFFFE"; public static final String GOOD_IRI_CHAR = "a-zA-Z0-9\\x{00A0}-\\x{10FFFF}";
public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+"; public static final String IRI_URL = "(([a-z])+:)*((%[0-9a-fA-F]{2})|[&'\\(\\)*+,;:@_~?!$\\/\\-\\#.\\="+GOOD_IRI_CHAR+"])+";
public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?"; public static final String LANG_REGEX = "[a-z]{2}(\\-[a-zA-Z]{2})?";
@ -974,15 +974,21 @@ public class Turtle {
case 'u': case 'u':
i++; i++;
int l = 4; int l = 4;
int uc = Integer.parseInt(s.substring(i, i+l), 16); String ss = s.substring(i, i+l);
int uc = Integer.parseInt(ss, 16);
if (uc < (isUri ? 33 : 32)) { if (uc < (isUri ? 33 : 32)) {
l = 8; l = 8;
uc = Integer.parseInt(s.substring(i, i+8), 16); ss = s.substring(i, i+l);
uc = Integer.parseInt(ss, 16);
} }
if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E))) if (uc < (isUri ? 33 : 32) || (isUri && (uc == 0x3C || uc == 0x3E)))
throw new FHIRFormatError("Illegal unicode character"); throw new FHIRFormatError("Illegal unicode character");
b.append((char) uc); try {
i = i + l; b.append(Character.toString(uc));
} catch (Exception e) {
throw new FHIRFormatError("Illegal Unicode Sequence: "+ss);
}
i = i + l - 1; // -1 cause we're about to i++
break; break;
default: default:
throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i)); throw new FHIRFormatError("Unknown character escape \\"+s.charAt(i));

View File

@ -135,7 +135,7 @@ public class TurtleTests {
@Test @Test
public void test_labeled_blank_node_with_PN_CHARS_BASE_character_boundaries() throws Exception { public void test_labeled_blank_node_with_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), false); doTest(BaseTestingUtilities.loadTestResource("turtle", "labeled_blank_node_with_PN_CHARS_BASE_character_boundaries.ttl"), true);
} }
@Test @Test
@ -478,7 +478,7 @@ public class TurtleTests {
@Test @Test
public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries() throws Exception { public void test_localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false); doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
} }
// don't need to support property names with ':' // don't need to support property names with ':'
@ -517,7 +517,7 @@ public class TurtleTests {
@Test @Test
public void test_localName_with_nfc_PN_CHARS_BASE_character_boundaries() throws Exception { public void test_localName_with_nfc_PN_CHARS_BASE_character_boundaries() throws Exception {
doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), false); doTest(BaseTestingUtilities.loadTestResource("turtle", "localName_with_nfc_PN_CHARS_BASE_character_boundaries.ttl"), true);
} }
@Test @Test