Basic lang file loading test and initial implementation

This commit is contained in:
dotasek.dev 2024-03-19 17:50:20 -04:00
parent 426a77cb0b
commit 38e9aea8ac
9 changed files with 49420 additions and 0 deletions

View File

@ -0,0 +1,24 @@
package org.hl7.fhir.utilities.i18n.subtag;
import lombok.Getter;
import lombok.Setter;
/*
Preferred-Value
Macrolanguage
Prefix
*/
public class ExtLangSubtag extends Subtag {
@Getter @Setter
private String preferredValue;
@Getter @Setter
private String macrolanguage;
@Getter @Setter
private String prefix;
protected ExtLangSubtag(String subtag) {
super(subtag);
}
}

View File

@ -0,0 +1,29 @@
package org.hl7.fhir.utilities.i18n.subtag;
import lombok.Getter;
import lombok.Setter;
/*
Scope
Preferred-Value
Suppress-Script
Macrolanguage
*/
public class LanguageSubtag extends Subtag {
@Getter @Setter
private String scope;
@Getter @Setter
private String preferredValue;
@Getter @Setter
private String suppressScript;
@Getter @Setter
private String macrolanguage;
protected LanguageSubtag(String subtag) {
super(subtag);
}
}

View File

@ -0,0 +1,276 @@
package org.hl7.fhir.utilities.i18n.subtag;
import java.io.IOException;
import java.net.URL;
import java.util.*;
public class LanguageSubtagRegistry {
public static class Record {
final Map<String, String> fields = new HashMap<>();
final Map<String, List<String>> multiFields = new HashMap<>();
public void addField(String field, String value) {
if (isMultiField(field)) {
List<String> list = multiFields.get(field);
if (list == null) {
list = new ArrayList<>();
}
list.add(value);
multiFields.put(field, list);
} else {
fields.put(field, value);
}
}
}
//Fields
//[Type, Added, Description, Scope, Deprecated, Preferred-Value, Suppress-Script, Comments, Macrolanguage, Subtag, Prefix, Tag]
public static final String TYPE = "Type";
public static final String ADDED = "Added";
public static final String DESCRIPTION = "Description";
public static final String SCOPE = "Scope";
public static final String DEPRECATED = "Deprecated";
public static final String PREFERRED_VALUE = "Preferred-Value";
public static final String SUPPRESS_SCRIPT = "Suppress-Script";
public static final String COMMENTS = "Comments";
public static final String MACROLANGUAGE = "Macrolanguage";
public static final String SUBTAG = "Subtag";
public static final String PREFIX = "Prefix";
//public static final String TAG = "Tag";
//Types
// [grandfathered, variant, language, region, script, redundant, extlang]
public static final String VARIANT = "variant";
public static final String LANGUAGE = "language";
public static final String REGION = "region";
public static final String SCRIPT = "script";
public static final String EXTLANG = "extlang";
public static final String REDUNDANT = "redundant";
public static final String GRANDFATHERED = "grandfathered";
private final Map<String, LanguageSubtag> languages = new HashMap<>();
public Set<String> getLanguageKeys() {
return languages.keySet();
}
public boolean containsLanguage(String key) {
return languages.containsKey(key);
}
public LanguageSubtag getLanguage(String key) {
return languages.get(key);
}
private final Map<String, ExtLangSubtag> extLangs = new HashMap<>();
public Set<String> getExtLangKeys() {
return extLangs.keySet();
}
public boolean containsExtLang(String key) {
return extLangs.containsKey(key);
}
public ExtLangSubtag getExtLang(String key) {
return extLangs.get(key);
}
private final Map<String, ScriptSubtag> scripts = new HashMap<>();
public Set<String> getScriptKeys() {
return scripts.keySet();
}
public boolean containsScript(String key) {
return scripts.containsKey(key);
}
public ScriptSubtag getScript(String key) {
return scripts.get(key);
}
private final Map<String, RegionSubtag> regions = new HashMap<>();
public Set<String> getRegionKeys() {
return regions.keySet();
}
public boolean containsRegion(String key) {
return regions.containsKey(key);
}
public RegionSubtag getRegion(String key) {
return regions.get(key);
}
private final Map<String, VariantSubtag> variants = new HashMap<>();
public Set<String> getVariantKeys() {
return variants.keySet();
}
public boolean containsVariant(String key) {
return variants.containsKey(key);
}
public VariantSubtag getVariant(String key) {
return variants.get(key);
}
public LanguageSubtagRegistry() throws IOException {
this("lang.dat.txt");
}
public static boolean isMultiField(String field){
return DESCRIPTION.equals(field)
|| COMMENTS.equals(field)
|| PREFIX.equals(field);
}
protected LanguageSubtagRegistry(String resourceName) throws IOException {
ClassLoader classLoader = getClass().getClassLoader();
URL resourceUrl = classLoader.getResource(resourceName);
assert resourceUrl != null;
Scanner scanner = new Scanner(resourceUrl.openStream());
Record record = null;
String currentField = null;
String currentValue = null;
while (scanner.hasNext()) {
String line = scanner.nextLine();
if (line.equals("%%")) {
if (record != null) {
record.addField(currentField, currentValue);
}
record = processRecord(record);
currentField = null;
currentValue = null;
} else {
if (line.startsWith(" ")) {
assert currentValue != null;
currentValue = currentValue + " " + line.trim();
} else {
if (currentField != null && currentValue != null) {
record.addField(currentField, currentValue);
}
String[] split = line.split(":\\s");
if (split.length == 2) {
currentField = split[0];
currentValue = split[1];
}
}
}
}
if (record != null) {
record.addField(currentField, currentValue);
}
processRecord(record);
}
protected Record processRecord(Record record) {
if (record == null) {
return new Record();
}
String typeValue = record.fields.get(TYPE);
assert record.fields.containsKey(ADDED);
final Subtag subtag;
switch (typeValue) {
case LANGUAGE: subtag = processLanguageRecord(record); break;
case EXTLANG: subtag = processExtLangRecord(record); break;
case SCRIPT: subtag = processScriptRecord(record); break;
case REGION: subtag = processRegionRecord(record); break;
case VARIANT: subtag = processVariantRecord(record); break;
default: subtag = null;
}
assert subtag != null || typeValue.equals(GRANDFATHERED) || typeValue.equals(REDUNDANT);
if (subtag != null) {
addSubtag(subtag);
}
return new Record();
}
protected void addSubtag(Subtag subtag) {
assert subtag.getSubtag() != null;
if (subtag instanceof LanguageSubtag)
languages.put(subtag.getSubtag(), (LanguageSubtag) subtag);
else if (subtag instanceof ExtLangSubtag)
extLangs.put(subtag.getSubtag(), (ExtLangSubtag) subtag);
else if (subtag instanceof ScriptSubtag)
scripts.put(subtag.getSubtag(), (ScriptSubtag) subtag);
else if (subtag instanceof RegionSubtag)
regions.put(subtag.getSubtag(), (RegionSubtag) subtag);
else if (subtag instanceof VariantSubtag)
variants.put(subtag.getSubtag(), (VariantSubtag) subtag);
}
protected Subtag processVariantRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
VariantSubtag variant = new VariantSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(variant, record);
variant.setPreferredValue(record.fields.get(PREFERRED_VALUE));
if (record.multiFields.containsKey(PREFIX))
for (String prefix : record.multiFields.get(PREFIX)) {
variant.addPrefix(prefix);
}
return variant;
}
protected Subtag processScriptRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ScriptSubtag script = new ScriptSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(script, record);
return script;
}
protected Subtag processRegionRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
RegionSubtag region = new RegionSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(region, record);
region.setPreferredValue(record.fields.get(PREFERRED_VALUE));
return region;
}
protected Subtag processExtLangRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
ExtLangSubtag extLang = new ExtLangSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(extLang, record);
extLang.setPreferredValue(record.fields.get(PREFERRED_VALUE));
extLang.setMacrolanguage(record.fields.get(MACROLANGUAGE));
if (record.multiFields.containsKey(PREFIX)) {
assert record.multiFields.get(PREFIX).size() == 1;
extLang.setPrefix(record.multiFields.get(PREFIX).get(0));
}
return extLang;
}
protected Subtag processLanguageRecord(Record record) {
assert record.fields.containsKey(SUBTAG);
LanguageSubtag language = new LanguageSubtag(record.fields.get(SUBTAG));
addCommonFieldsToSubtag(language, record);
language.setScope(record.fields.get(SCOPE));
language.setPreferredValue(record.fields.get(PREFERRED_VALUE));
language.setSuppressScript(record.fields.get(SUPPRESS_SCRIPT));
language.setMacrolanguage(record.fields.get(MACROLANGUAGE));
return language;
}
private void addCommonFieldsToSubtag(Subtag subtag, Record record) {
if (record.multiFields.containsKey(DESCRIPTION))
for (String description : record.multiFields.get(DESCRIPTION)) {
subtag.addDescription(description);
}
if (record.multiFields.containsKey(COMMENTS))
for (String comment : record.multiFields.get(COMMENTS)) {
subtag.addComments(comment);
}
subtag.setAdded(record.fields.get(ADDED));
subtag.setDeprecated(record.fields.get(DEPRECATED));
}
}

View File

@ -0,0 +1,16 @@
package org.hl7.fhir.utilities.i18n.subtag;
import lombok.Getter;
import lombok.Setter;
/*
Preferred-Value
*/
public class RegionSubtag extends Subtag {
@Getter @Setter
private String preferredValue;
protected RegionSubtag(String subtag) {
super(subtag);
}
}

View File

@ -0,0 +1,10 @@
package org.hl7.fhir.utilities.i18n.subtag;
/*
*/
public class ScriptSubtag extends Subtag {
protected ScriptSubtag(String subtag) {
super(subtag);
}
}

View File

@ -0,0 +1,45 @@
package org.hl7.fhir.utilities.i18n.subtag;
import lombok.Getter;
import lombok.Setter;
import java.util.ArrayList;
import java.util.List;
public abstract class Subtag {
@Getter @Setter
private String added;
@Getter @Setter
private String deprecated;
private List<String> descriptions = new ArrayList<>();
private List<String> comments = new ArrayList<>();
@Getter
private final String subtag;
protected Subtag(String subtag){
this.subtag = subtag;
}
protected boolean addDescription(String description) {
return descriptions.add(description);
}
public List<String> getDescriptions() {
return List.copyOf(descriptions);
}
protected boolean addComments(String comment) {
return comments.add(comment);
}
public List<String> getComments() {
return List.copyOf(comments);
}
}

View File

@ -0,0 +1,30 @@
package org.hl7.fhir.utilities.i18n.subtag;
import lombok.Getter;
import lombok.Setter;
import java.util.ArrayList;
import java.util.List;
/*
Preferred-Value
Prefix
*/
public class VariantSubtag extends Subtag {
@Getter @Setter
private String preferredValue;
private List<String> prefixes = new ArrayList<>();
protected VariantSubtag(String subtag) {
super(subtag);
}
protected boolean addPrefix(String description) {
return prefixes.add(description);
}
public List<String> getPrefixes() {
return List.copyOf(prefixes);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
package org.hl7.fhir.utilities.i18n.subtag;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.*;
public class LanguageSubtagRegistryTest {
@Test
public void initializationTest() throws IOException {
/*
languages.size(): 8259
extLangs.size(): 253
scripts.size(): 222
regions.size(): 305
variants.size(): 113
*/
LanguageSubtagRegistry registry = new LanguageSubtagRegistry();
// pick some random entries.
/*
Type: language
Subtag: ppa
Description: Pao
Added: 2009-07-29
Deprecated: 2016-05-30
Preferred-Value: bfy
*/
LanguageSubtag ppa = registry.getLanguage("ppa");
assertEquals("Pao", ppa.getDescriptions().get(0));
assertEquals("2009-07-29", ppa.getAdded());
assertEquals("2016-05-30", ppa.getDeprecated());
assertEquals("bfy", ppa.getPreferredValue());
assertNull(ppa.getScope());
assertNull(ppa.getSuppressScript());
assertNull(ppa.getMacrolanguage());
/*
Type: language
Subtag: ia
Description: Interlingua (International Auxiliary Language
Association)
Added: 2005-10-16
*/
LanguageSubtag ia = registry.getLanguage("ia");
assertEquals("Interlingua (International Auxiliary Language Association)", ia.getDescriptions().get(0));
/*
Type: script
Subtag: Cpmn
Description: Cypro-Minoan
Added: 2017-08-13
*/
ScriptSubtag cpmn = registry.getScript("Cpmn");
assertEquals("Cypro-Minoan", cpmn.getDescriptions().get(0));
assertEquals("2017-08-13", cpmn.getAdded());
//assertNull(cpmn.getComments());
assertNull(cpmn.getDeprecated());
}
}