This commit is contained in:
dotasek.dev 2024-03-20 11:37:54 -04:00
parent d331c3a3e3
commit 77203326db
3 changed files with 245 additions and 205 deletions

View File

@ -1,56 +1,12 @@
package org.hl7.fhir.utilities.i18n.subtag; package org.hl7.fhir.utilities.i18n.subtag;
import java.io.IOException; import java.io.IOException;
import java.net.URL; import java.util.HashMap;
import java.util.*; import java.util.Map;
import java.util.Set;
public class LanguageSubtagRegistry { public class LanguageSubtagRegistry {
public static class Record {
final Map<String, String> fields = new HashMap<>();
final Map<String, List<String>> multiFields = new HashMap<>();
public void addField(String field, String value) {
if (isMultiField(field)) {
List<String> list = multiFields.get(field);
if (list == null) {
list = new ArrayList<>();
}
list.add(value);
multiFields.put(field, list);
} else {
fields.put(field, value);
}
}
}
//Fields
//[Type, Added, Description, Scope, Deprecated, Preferred-Value, Suppress-Script, Comments, Macrolanguage, Subtag, Prefix, Tag]
public static final String TYPE = "Type";
public static final String ADDED = "Added";
public static final String DESCRIPTION = "Description";
public static final String SCOPE = "Scope";
public static final String DEPRECATED = "Deprecated";
public static final String PREFERRED_VALUE = "Preferred-Value";
public static final String SUPPRESS_SCRIPT = "Suppress-Script";
public static final String COMMENTS = "Comments";
public static final String MACROLANGUAGE = "Macrolanguage";
public static final String SUBTAG = "Subtag";
public static final String PREFIX = "Prefix";
//public static final String TAG = "Tag";
//Types
// [grandfathered, variant, language, region, script, redundant, extlang]
public static final String VARIANT = "variant";
public static final String LANGUAGE = "language";
public static final String REGION = "region";
public static final String SCRIPT = "script";
public static final String EXTLANG = "extlang";
public static final String REDUNDANT = "redundant";
public static final String GRANDFATHERED = "grandfathered";
private final Map<String, LanguageSubtag> languages = new HashMap<>(); private final Map<String, LanguageSubtag> languages = new HashMap<>();
@ -58,6 +14,10 @@ public class LanguageSubtagRegistry {
return languages.keySet(); return languages.keySet();
} }
protected LanguageSubtag addLanguage(String key, LanguageSubtag language) {
return languages.put(key, language);
}
public boolean containsLanguage(String key) { public boolean containsLanguage(String key) {
return languages.containsKey(key); return languages.containsKey(key);
} }
@ -67,6 +27,10 @@ public class LanguageSubtagRegistry {
private final Map<String, ExtLangSubtag> extLangs = new HashMap<>(); private final Map<String, ExtLangSubtag> extLangs = new HashMap<>();
protected ExtLangSubtag addExtLang(String key, ExtLangSubtag extLang) {
return extLangs.put(key, extLang);
}
public Set<String> getExtLangKeys() { public Set<String> getExtLangKeys() {
return extLangs.keySet(); return extLangs.keySet();
} }
@ -80,10 +44,13 @@ public class LanguageSubtagRegistry {
private final Map<String, ScriptSubtag> scripts = new HashMap<>(); private final Map<String, ScriptSubtag> scripts = new HashMap<>();
protected ScriptSubtag addScript(String key, ScriptSubtag script) {
return scripts.put(key, script);
}
public Set<String> getScriptKeys() { public Set<String> getScriptKeys() {
return scripts.keySet(); return scripts.keySet();
} }
public boolean containsScript(String key) { public boolean containsScript(String key) {
return scripts.containsKey(key); return scripts.containsKey(key);
} }
@ -93,6 +60,10 @@ public class LanguageSubtagRegistry {
private final Map<String, RegionSubtag> regions = new HashMap<>(); private final Map<String, RegionSubtag> regions = new HashMap<>();
protected RegionSubtag addRegion(String key, RegionSubtag region) {
return regions.put(key, region);
}
public Set<String> getRegionKeys() { public Set<String> getRegionKeys() {
return regions.keySet(); return regions.keySet();
} }
@ -106,6 +77,10 @@ public class LanguageSubtagRegistry {
private final Map<String, VariantSubtag> variants = new HashMap<>(); private final Map<String, VariantSubtag> variants = new HashMap<>();
protected VariantSubtag addVariant(String key, VariantSubtag variant) {
return variants.put(key, variant);
}
public Set<String> getVariantKeys() { public Set<String> getVariantKeys() {
return variants.keySet(); return variants.keySet();
} }
@ -116,161 +91,5 @@ public class LanguageSubtagRegistry {
public VariantSubtag getVariant(String key) { public VariantSubtag getVariant(String key) {
return variants.get(key); return variants.get(key);
} }
public LanguageSubtagRegistry() throws IOException {
this("lang.dat.txt");
}
public static boolean isMultiField(String field){
return DESCRIPTION.equals(field)
|| COMMENTS.equals(field)
|| PREFIX.equals(field);
}
protected LanguageSubtagRegistry(String resourceName) throws IOException {
ClassLoader classLoader = getClass().getClassLoader();
URL resourceUrl = classLoader.getResource(resourceName);
assert resourceUrl != null;
Scanner scanner = new Scanner(resourceUrl.openStream());
Record record = null;
String currentField = null;
String currentValue = null;
while (scanner.hasNext()) {
String line = scanner.nextLine();
if (line.equals("%%")) {
if (record != null) {
record.addField(currentField, currentValue);
}
record = processRecord(record);
currentField = null;
currentValue = null;
} else {
if (line.startsWith(" ")) {
assert currentValue != null;
currentValue = currentValue + " " + line.trim();
} else {
if (currentField != null && currentValue != null) {
record.addField(currentField, currentValue);
}
String[] split = line.split(":\\s");
if (split.length == 2) {
currentField = split[0];
currentValue = split[1];
}
}
}
}
if (record != null) {
record.addField(currentField, currentValue);
}
processRecord(record);
}
protected Record processRecord(Record record) {
if (record == null) {
return new Record();
}
String typeValue = record.fields.get(TYPE);
assert record.fields.containsKey(ADDED);
final Subtag subtag;
switch (typeValue) {
case LANGUAGE: subtag = processLanguageRecord(record); break;
case EXTLANG: subtag = processExtLangRecord(record); break;
case SCRIPT: subtag = processScriptRecord(record); break;
case REGION: subtag = processRegionRecord(record); break;
case VARIANT: subtag = processVariantRecord(record); break;
default: subtag = null;
}
assert subtag != null || typeValue.equals(GRANDFATHERED) || typeValue.equals(REDUNDANT);
if (subtag != null) {
addSubtag(subtag);
}
return new Record();
}
protected void addSubtag(Subtag subtag) {
assert subtag.getSubtag() != null;
if (subtag instanceof LanguageSubtag)
languages.put(subtag.getSubtag(), (LanguageSubtag) subtag);
else if (subtag instanceof ExtLangSubtag)
extLangs.put(subtag.getSubtag(), (ExtLangSubtag) subtag);
else if (subtag instanceof ScriptSubtag)
scripts.put(subtag.getSubtag(), (ScriptSubtag) subtag);
else if (subtag instanceof RegionSubtag)
regions.put(subtag.getSubtag(), (RegionSubtag) subtag);
else if (subtag instanceof VariantSubtag)
variants.put(subtag.getSubtag(), (VariantSubtag) subtag);
}
protected Subtag processVariantRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
VariantSubtag variant = new VariantSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(variant, record);
variant.setPreferredValue(record.fields.get(PREFERRED_VALUE));
if (record.multiFields.containsKey(PREFIX))
for (String prefix : record.multiFields.get(PREFIX)) {
variant.addPrefix(prefix);
}
return variant;
}
protected Subtag processScriptRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ScriptSubtag script = new ScriptSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(script, record);
return script;
}
protected Subtag processRegionRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
RegionSubtag region = new RegionSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(region, record);
region.setPreferredValue(record.fields.get(PREFERRED_VALUE));
return region;
}
protected Subtag processExtLangRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ExtLangSubtag extLang = new ExtLangSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(extLang, record);
extLang.setPreferredValue(record.fields.get(PREFERRED_VALUE));
extLang.setMacrolanguage(record.fields.get(MACROLANGUAGE));
if (record.multiFields.containsKey(PREFIX)) {
assert record.multiFields.get(PREFIX).size() == 1;
extLang.setPrefix(record.multiFields.get(PREFIX).get(0));
}
return extLang;
}
protected Subtag processLanguageRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
LanguageSubtag language = new LanguageSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(language, record);
language.setScope(record.fields.get(SCOPE));
language.setPreferredValue(record.fields.get(PREFERRED_VALUE));
language.setSuppressScript(record.fields.get(SUPPRESS_SCRIPT));
language.setMacrolanguage(record.fields.get(MACROLANGUAGE));
return language;
}
private void addCommonFieldsToSubtag(Subtag subtag, Record record) {
if (record.multiFields.containsKey(DESCRIPTION))
for (String description : record.multiFields.get(DESCRIPTION)) {
subtag.addDescription(description);
}
if (record.multiFields.containsKey(COMMENTS))
for (String comment : record.multiFields.get(COMMENTS)) {
subtag.addComments(comment);
}
subtag.setAdded(record.fields.get(ADDED));
subtag.setDeprecated(record.fields.get(DEPRECATED));
}
} }

View File

@ -0,0 +1,219 @@
package org.hl7.fhir.utilities.i18n.subtag;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.*;
public class LanguageSubtagRegistryLoader {
public static boolean isMultiField(String field){
return DESCRIPTION.equals(field)
|| COMMENTS.equals(field)
|| PREFIX.equals(field);
}
public static class Record {
final Map<String, String> fields = new HashMap<>();
final Map<String, List<String>> multiFields = new HashMap<>();
public void addField(String field, String value) {
if (isMultiField(field)) {
List<String> list = multiFields.get(field);
if (list == null) {
list = new ArrayList<>();
}
list.add(value);
multiFields.put(field, list);
} else {
fields.put(field, value);
}
}
}
//Fields
//[Type, Added, Description, Scope, Deprecated, Preferred-Value, Suppress-Script, Comments, Macrolanguage, Subtag, Prefix, Tag]
public static final String TYPE = "Type";
public static final String ADDED = "Added";
public static final String DESCRIPTION = "Description";
public static final String SCOPE = "Scope";
public static final String DEPRECATED = "Deprecated";
public static final String PREFERRED_VALUE = "Preferred-Value";
public static final String SUPPRESS_SCRIPT = "Suppress-Script";
public static final String COMMENTS = "Comments";
public static final String MACROLANGUAGE = "Macrolanguage";
public static final String SUBTAG = "Subtag";
public static final String PREFIX = "Prefix";
//public static final String TAG = "Tag";
//Types
// [grandfathered, variant, language, region, script, redundant, extlang]
public static final String VARIANT = "variant";
public static final String LANGUAGE = "language";
public static final String REGION = "region";
public static final String SCRIPT = "script";
public static final String EXTLANG = "extlang";
public static final String REDUNDANT = "redundant";
public static final String GRANDFATHERED = "grandfathered";
private final LanguageSubtagRegistry registry;
public LanguageSubtagRegistryLoader(LanguageSubtagRegistry registry) {
this.registry = registry;
}
public void loadFromDefaultResource() throws IOException {
loadFromResource("lang.dat.txt");
}
public void loadFromResource(String resourceName) throws IOException {
ClassLoader classLoader = getClass().getClassLoader();
URL resourceUrl = classLoader.getResource(resourceName);
assert resourceUrl != null;
load(resourceUrl.openStream());
}
private void load(InputStream inputStream) throws IOException {
Scanner scanner = new Scanner(inputStream);
Record record = null;
String currentField = null;
String currentValue = null;
while (scanner.hasNext()) {
String line = scanner.nextLine();
if (line.equals("%%")) {
if (record != null) {
record.addField(currentField, currentValue);
}
record = processRecord(record);
currentField = null;
currentValue = null;
} else {
if (line.startsWith(" ")) {
assert currentValue != null;
currentValue = currentValue + " " + line.trim();
} else {
if (currentField != null && currentValue != null) {
record.addField(currentField, currentValue);
}
String[] split = line.split(":\\s");
if (split.length == 2) {
currentField = split[0];
currentValue = split[1];
}
}
}
}
if (record != null) {
record.addField(currentField, currentValue);
}
processRecord(record);
}
protected Record processRecord(Record record) {
if (record == null) {
return new Record();
}
String typeValue = record.fields.get(TYPE);
assert record.fields.containsKey(ADDED);
final Subtag subtag;
switch (typeValue) {
case LANGUAGE: subtag = processLanguageRecord(record); break;
case EXTLANG: subtag = processExtLangRecord(record); break;
case SCRIPT: subtag = processScriptRecord(record); break;
case REGION: subtag = processRegionRecord(record); break;
case VARIANT: subtag = processVariantRecord(record); break;
default: subtag = null;
}
assert subtag != null || typeValue.equals(GRANDFATHERED) || typeValue.equals(REDUNDANT);
if (subtag != null) {
addSubtag(subtag);
}
return new Record();
}
protected void addSubtag(Subtag subtag) {
assert subtag.getSubtag() != null;
if (subtag instanceof LanguageSubtag)
registry.addLanguage(subtag.getSubtag(), (LanguageSubtag) subtag);
else if (subtag instanceof ExtLangSubtag)
registry.addExtLang(subtag.getSubtag(), (ExtLangSubtag) subtag);
else if (subtag instanceof ScriptSubtag)
registry.addScript(subtag.getSubtag(), (ScriptSubtag) subtag);
else if (subtag instanceof RegionSubtag)
registry.addRegion(subtag.getSubtag(), (RegionSubtag) subtag);
else if (subtag instanceof VariantSubtag)
registry.addVariant(subtag.getSubtag(), (VariantSubtag) subtag);
}
protected Subtag processVariantRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
VariantSubtag variant = new VariantSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(variant, record);
variant.setPreferredValue(record.fields.get(PREFERRED_VALUE));
if (record.multiFields.containsKey(PREFIX))
for (String prefix : record.multiFields.get(PREFIX)) {
variant.addPrefix(prefix);
}
return variant;
}
protected Subtag processScriptRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ScriptSubtag script = new ScriptSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(script, record);
return script;
}
protected Subtag processRegionRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
RegionSubtag region = new RegionSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(region, record);
region.setPreferredValue(record.fields.get(PREFERRED_VALUE));
return region;
}
protected Subtag processExtLangRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ExtLangSubtag extLang = new ExtLangSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(extLang, record);
extLang.setPreferredValue(record.fields.get(PREFERRED_VALUE));
extLang.setMacrolanguage(record.fields.get(MACROLANGUAGE));
if (record.multiFields.containsKey(PREFIX)) {
assert record.multiFields.get(PREFIX).size() == 1;
extLang.setPrefix(record.multiFields.get(PREFIX).get(0));
}
return extLang;
}
protected Subtag processLanguageRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
LanguageSubtag language = new LanguageSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(language, record);
language.setScope(record.fields.get(SCOPE));
language.setPreferredValue(record.fields.get(PREFERRED_VALUE));
language.setSuppressScript(record.fields.get(SUPPRESS_SCRIPT));
language.setMacrolanguage(record.fields.get(MACROLANGUAGE));
return language;
}
private void addCommonFieldsToSubtag(Subtag subtag, Record record) {
if (record.multiFields.containsKey(DESCRIPTION))
for (String description : record.multiFields.get(DESCRIPTION)) {
subtag.addDescription(description);
}
if (record.multiFields.containsKey(COMMENTS))
for (String comment : record.multiFields.get(COMMENTS)) {
subtag.addComments(comment);
}
subtag.setAdded(record.fields.get(ADDED));
subtag.setDeprecated(record.fields.get(DEPRECATED));
}
}

View File

@ -19,6 +19,8 @@ public class LanguageSubtagRegistryTest {
*/ */
LanguageSubtagRegistry registry = new LanguageSubtagRegistry(); LanguageSubtagRegistry registry = new LanguageSubtagRegistry();
LanguageSubtagRegistryLoader loader = new LanguageSubtagRegistryLoader(registry);
loader.loadFromDefaultResource();
/* Test entries of every subtag type (language, script, variant, extLang, region) /* Test entries of every subtag type (language, script, variant, extLang, region)
These should cover both simple, and more complex entries with a larger number These should cover both simple, and more complex entries with a larger number