commit
e054bff5a6
|
@ -644,6 +644,10 @@ public class Utilities {
|
|||
return s.toString();
|
||||
}
|
||||
|
||||
public static String padRight(int num, char c, int len) {
|
||||
return padRight(Integer.toString(num), c, len);
|
||||
}
|
||||
|
||||
|
||||
public static String padLeft(String src, char c, int len) {
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
|
|
@ -139,6 +139,7 @@ public class ValidatorCli {
|
|||
new SpreadsheetTask(),
|
||||
new TestsTask(),
|
||||
new TxTestsTask(),
|
||||
new AiTestsTask(),
|
||||
new TransformTask(),
|
||||
new VersionTask(),
|
||||
new CodeGenTask(),
|
||||
|
|
|
@ -3,8 +3,19 @@ package org.hl7.fhir.validation.ai;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.hl7.fhir.utilities.json.model.JsonObject;
|
||||
|
||||
public abstract class AIAPI {
|
||||
|
||||
|
||||
protected JsonObject config;
|
||||
|
||||
public AIAPI(JsonObject config) {
|
||||
super();
|
||||
this.config = config;
|
||||
}
|
||||
|
||||
|
||||
public abstract List<CodeAndTextValidationResult> validateCodings(List<CodeAndTextValidationRequest> requests) throws IOException;
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,241 @@
|
|||
package org.hl7.fhir.validation.ai;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.hl7.fhir.utilities.Utilities;
|
||||
import org.hl7.fhir.utilities.http.ManagedWebAccess;
|
||||
import org.hl7.fhir.utilities.json.model.JsonObject;
|
||||
import org.hl7.fhir.utilities.json.parser.JsonParser;
|
||||
import org.hl7.fhir.utilities.xhtml.HierarchicalTableGenerator;
|
||||
|
||||
public class AITests {
|
||||
|
||||
public class StatsRecord {
|
||||
|
||||
public int total;
|
||||
public int correct;
|
||||
public int correctNeg;
|
||||
public int correctPos;
|
||||
public int wrong;
|
||||
public int falseNegative;
|
||||
public int actualNegatives;
|
||||
public int falsePositive;
|
||||
|
||||
public String summary() {
|
||||
// % corr. 9| %false+ 9| %false- 9| sensitivity 13| specificity 13| PPV 5
|
||||
StringBuilder b = new StringBuilder();
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight((correct * 100) / total, ' ', 7));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight((correctNeg * 100) / total, ' ', 5));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight((correctPos * 100) / total, ' ', 5));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight(falsePosRate(), ' ', 7));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight(falseNegRate(), ' ', 7));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight(sensitivity(), ' ', 12));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight(specificity(), ' ', 12));
|
||||
b.append("| ");
|
||||
b.append(Utilities.padRight(ppv(), ' ', 5));
|
||||
b.append("|");
|
||||
return b.toString();
|
||||
}
|
||||
|
||||
private int ppv() {
|
||||
double tp = total - actualNegatives;
|
||||
double fp = falsePositive;
|
||||
double ppv = tp / (tp + fp);
|
||||
return (int) (ppv * 100);
|
||||
}
|
||||
|
||||
private int specificity() {
|
||||
double tn = actualNegatives;
|
||||
double fp = falsePositive;
|
||||
double specificity = tn / (tn + fp);
|
||||
return (int) (specificity * 100);
|
||||
}
|
||||
|
||||
private int sensitivity() {
|
||||
double tp = total = actualNegatives;
|
||||
double fn = falseNegative;
|
||||
double sensitivity = tp / (tp + fn);
|
||||
return (int) (sensitivity * 100);
|
||||
}
|
||||
|
||||
private int falseNegRate() {
|
||||
double fn = falseNegative;
|
||||
double tp = total - actualNegatives;
|
||||
double fnr = fn / (fn + tp);
|
||||
return (int) (fnr * 100);
|
||||
}
|
||||
|
||||
private int falsePosRate() {
|
||||
double fp = falsePositive;
|
||||
double tn = actualNegatives;
|
||||
double fpr = fp / (fp + tn);
|
||||
return (int) (fpr * 100);
|
||||
}
|
||||
|
||||
public void update(boolean expected, boolean passed) {
|
||||
total++;
|
||||
if (expected) {
|
||||
if (passed == expected) {
|
||||
correctPos++;
|
||||
}
|
||||
} else {
|
||||
if (passed == expected) {
|
||||
correctNeg++;
|
||||
}
|
||||
actualNegatives++;
|
||||
}
|
||||
if (passed == expected) {
|
||||
correct++;
|
||||
} else {
|
||||
wrong++;
|
||||
if (expected) {
|
||||
falseNegative++;
|
||||
} else {
|
||||
falsePositive++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
new AITests().execute(args[0], args.length == 1 ? null : args[1], args.length == 2 ? true : "true".equals(args[2]));
|
||||
}
|
||||
|
||||
public void execute(String testFilename, String config, boolean useServers) throws IOException {
|
||||
ManagedWebAccess.loadFromFHIRSettings();
|
||||
|
||||
InputStream cfg = null;
|
||||
if (config == null) {
|
||||
ClassLoader classLoader = HierarchicalTableGenerator.class.getClassLoader();
|
||||
cfg = classLoader.getResourceAsStream("ai-prompts.json");
|
||||
} else {
|
||||
cfg = new FileInputStream(config);
|
||||
}
|
||||
JsonObject jcfg = JsonParser.parseObject(cfg);
|
||||
|
||||
JsonObject tests = JsonParser.parseObject(new File(testFilename));
|
||||
List<CodeAndTextValidationRequest> requests = new ArrayList<>();
|
||||
int c = 0;
|
||||
for (JsonObject test : tests.getJsonArray("cases").asJsonObjects()) {
|
||||
requests.add(new CodeAndTextValidationRequest(null, test.asString("path"), test.asString("lang"), test.asString("system"), test.asString("code"),
|
||||
test.asString("display"),test.asString("text")).setData(test));
|
||||
boolean expected = test.asString("goal").startsWith("valid");
|
||||
if (expected) {
|
||||
c++;
|
||||
}
|
||||
}
|
||||
System.out.println("Found "+requests.size()+" tests, "+c+" should be valid");
|
||||
|
||||
long t;
|
||||
if (useServers) {
|
||||
|
||||
System.out.print("Ollama");
|
||||
t = System.currentTimeMillis();
|
||||
List<CodeAndTextValidationResult> resOllama = new Ollama(jcfg.forceObject("ollama"), null).validateCodings(requests);
|
||||
System.out.println(": "+Utilities.describeDuration(System.currentTimeMillis() - t));
|
||||
|
||||
|
||||
System.out.print("ChatGPT");
|
||||
t = System.currentTimeMillis();
|
||||
List<CodeAndTextValidationResult> resChatGPT = new ChatGPTAPI(jcfg.forceObject("chatGPT")).validateCodings(requests);
|
||||
System.out.println(": "+Utilities.describeDuration(System.currentTimeMillis() - t));
|
||||
|
||||
System.out.print("Claude");
|
||||
t = System.currentTimeMillis();
|
||||
List<CodeAndTextValidationResult> resClaude = new ClaudeAPI(jcfg.forceObject("claude")).validateCodings(requests);
|
||||
System.out.println(": "+Utilities.describeDuration(System.currentTimeMillis() - t));
|
||||
|
||||
|
||||
System.out.println("");
|
||||
|
||||
for (int i = 0; i < requests.size(); i++) {
|
||||
CodeAndTextValidationRequest req = requests.get(i);
|
||||
JsonObject test = (JsonObject) req.getData();
|
||||
System.out.println("Case "+req.getSystem()+"#"+req.getCode()+" ('"+req.getDisplay()+"') :: '"+req.getText()+"'");
|
||||
CodeAndTextValidationResult res = resClaude.get(i);
|
||||
System.out.println(" Claude : "+check(test, res, "claude")+"; "+res.summary());
|
||||
res = resChatGPT.get(i);
|
||||
System.out.println(" ChatGPT: "+check(test, res, "chatgpt")+"; "+res.summary());
|
||||
res = resOllama.get(i);
|
||||
System.out.println(" Ollama : "+check(test, res, "ollama")+"; "+res.summary());
|
||||
System.out.println("");
|
||||
}
|
||||
}
|
||||
|
||||
StatsRecord claude = new StatsRecord();
|
||||
StatsRecord chatGPT = new StatsRecord();
|
||||
StatsRecord ollama = new StatsRecord();
|
||||
|
||||
for (int i = 0; i < requests.size(); i++) {
|
||||
System.out.print(".");
|
||||
CodeAndTextValidationRequest req = requests.get(i);
|
||||
JsonObject test = (JsonObject) req.getData();
|
||||
test.remove("disagrement");
|
||||
test.remove("unanimous");
|
||||
boolean expected = test.asString("goal").startsWith("valid");
|
||||
boolean bClaude = test.getJsonObject("claude").asBoolean("valid");
|
||||
boolean bChatGPT = test.getJsonObject("chatgpt").asBoolean("valid");
|
||||
boolean bOllama = test.getJsonObject("ollama").asBoolean("valid");
|
||||
claude.update(expected, bClaude);
|
||||
chatGPT.update(expected, bChatGPT);
|
||||
ollama.update(expected, bOllama);
|
||||
// boolean agreement = (bClaude == expected) && (bChatGPT == expected) && (bOllama == expected);
|
||||
// boolean unanimous = (bClaude == bChatGPT) && (bClaude == bOllama);
|
||||
// if (!agreement) {
|
||||
// test.add("disagrement", true);
|
||||
// if (unanimous) {
|
||||
// test.add("unanimous", true);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
// JsonParser.compose(tests, new File(testFilename), true);
|
||||
|
||||
System.out.println("");
|
||||
System.out.println(" | Number tests correct | %False results | Classic Diagnostic Statistics |");
|
||||
System.out.println(" | #All | #Neg | #Pos | %F.Pos | %F.Neg | sensitivity | specificity | PPV |");
|
||||
System.out.println("-------------------------------------------------------------------------------------");
|
||||
System.out.println("Claude "+claude.summary());
|
||||
System.out.println("ChatGPT "+chatGPT.summary());
|
||||
System.out.println("Ollama "+ollama.summary());
|
||||
|
||||
doTable("Claude", claude);
|
||||
doTable("ChatGPT", chatGPT);
|
||||
doTable("Ollama", ollama);
|
||||
}
|
||||
|
||||
private void doTable(String name, StatsRecord rec) {
|
||||
System.out.println("");
|
||||
System.out.println("");
|
||||
System.out.println(Utilities.padRight(name, ' ', 7)+" | Valid | Invalid |");
|
||||
System.out.println("--------------------------|");
|
||||
System.out.println("Correct | "+Utilities.padRight(rec.correctPos, ' ', 5)+" | "+Utilities.padRight(rec.correctNeg, ' ', 7)+" |");
|
||||
System.out.println("Wrong | "+Utilities.padRight(rec.falsePositive, ' ', 5)+" | "+Utilities.padRight(rec.falseNegative, ' ', 7)+" |");
|
||||
}
|
||||
|
||||
private String check(JsonObject test, CodeAndTextValidationResult res, String code) {
|
||||
boolean passed = res.isValid();
|
||||
boolean expected = test.asString("goal").startsWith("valid");
|
||||
JsonObject o = test.forceObject(code);
|
||||
o.set("valid", res.isValid());
|
||||
o.set("explanation", res.getExplanation());
|
||||
o.set("confidence", res.getConfidence());
|
||||
if (passed == expected) {
|
||||
return "T ";
|
||||
} else {
|
||||
return "F:"+(passed ? "T" : "F");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,7 +17,13 @@ import org.hl7.fhir.utilities.json.parser.JsonParser;
|
|||
public class ChatGPTAPI extends AIAPI {
|
||||
private static final String API_URL = "https://api.openai.com/v1/chat/completions";
|
||||
private static final String MODEL = "gpt-4o-mini";
|
||||
private String model;
|
||||
|
||||
protected ChatGPTAPI(JsonObject config) {
|
||||
super(config);
|
||||
model = config.has("model") ? config.asString("model") : MODEL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CodeAndTextValidationResult> validateCodings(List<CodeAndTextValidationRequest> requests) throws IOException {
|
||||
// limit to 5 in a batch
|
||||
|
@ -31,21 +37,24 @@ public class ChatGPTAPI extends AIAPI {
|
|||
for (List<CodeAndTextValidationRequest> chunk : chunks) {
|
||||
|
||||
StringBuilder prompt = new StringBuilder();
|
||||
prompt.append("For each of the following cases, determine if the text is not compatible with the code. The text may contain significantly more or less information than the code.\n\n");
|
||||
prompt.append("Respond in JSON format with an array of objects containing 'index', 'isCompatible', 'explanation', and 'confidence'.\n\n");
|
||||
for (String s : config.forceArray("prompt").asStrings()) {
|
||||
prompt.append(s);
|
||||
prompt.append("\n");
|
||||
}
|
||||
|
||||
for (int i = 0; i < chunk.size(); i++) {
|
||||
CodeAndTextValidationRequest req = chunk.get(i);
|
||||
prompt.append(String.format("%d. Is '%s' in conflict with the %s code %s (display '%s')\n",
|
||||
i + 1, req.getText(), getSystemName(req.getSystem()), req.getCode(), req.getDisplay()));
|
||||
prompt.append(String.format(config.asString("item"),
|
||||
Integer.toString(i + 1), req.getText(), getSystemName(req.getSystem()), req.getCode(), req.getDisplay(), req.getContext(), req.getLang()));
|
||||
}
|
||||
|
||||
String systemPrompt = "You are a medical terminology expert. Evaluate whether text descriptions match their\n"+
|
||||
"associated clinical codes. Provide detailed explanations for any mismatches. "+
|
||||
"Express your confidence level based on how certain you are of the relationship.";
|
||||
|
||||
System.out.print(""+c+" ");
|
||||
JsonArray json = getResponse(prompt.toString(), systemPrompt);
|
||||
StringBuilder systemPrompt = new StringBuilder();
|
||||
for (String s : config.forceArray("prompt").asStrings()) {
|
||||
systemPrompt.append(s);
|
||||
systemPrompt.append("\n");
|
||||
}
|
||||
System.out.print(".");
|
||||
JsonArray json = getResponse(prompt.toString(), systemPrompt.toString());
|
||||
|
||||
parseValidationResponse(json, chunk, results);
|
||||
c += 4;
|
||||
|
@ -55,7 +64,7 @@ public class ChatGPTAPI extends AIAPI {
|
|||
|
||||
public JsonArray getResponse(String prompt, String systemPrompt) throws IOException {
|
||||
JsonObject json = new JsonObject();
|
||||
json.add("model", MODEL);
|
||||
json.add("model", model);
|
||||
json.forceArray("messages").addObject().add("role", "system").add("content", systemPrompt);
|
||||
json.forceArray("messages").addObject().add("role", "user").add("content", prompt);
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
package org.hl7.fhir.validation.ai;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -17,6 +16,12 @@ public class ClaudeAPI extends AIAPI {
|
|||
|
||||
private static final String API_URL = "https://api.anthropic.com/v1/messages";
|
||||
private static final String MODEL = "claude-3-5-sonnet-20241022";
|
||||
private String model;
|
||||
|
||||
protected ClaudeAPI(JsonObject config) {
|
||||
super(config);
|
||||
model = config.has("model") ? config.asString("model") : MODEL;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<CodeAndTextValidationResult> validateCodings(List<CodeAndTextValidationRequest> requests) throws IOException {
|
||||
|
@ -31,21 +36,26 @@ public class ClaudeAPI extends AIAPI {
|
|||
for (List<CodeAndTextValidationRequest> chunk : chunks) {
|
||||
|
||||
StringBuilder prompt = new StringBuilder();
|
||||
prompt.append("For each of the following cases, determine if the text can't be a description of the same situation as the code. The text may contain significantly more or less information than the code.\n\n");
|
||||
prompt.append("Respond in JSON format with an array of objects containing 'index', 'isCompatible', 'explanation', and 'confidence'. Please evaluate all the items in a single go\n\n");
|
||||
for (String s : config.forceArray("prompt").asStrings()) {
|
||||
prompt.append(s);
|
||||
prompt.append("\n");
|
||||
}
|
||||
|
||||
for (int i = 0; i < chunk.size(); i++) {
|
||||
CodeAndTextValidationRequest req = chunk.get(i);
|
||||
prompt.append(String.format("%d. Is '%s' in conflict with the %s code %s (display = %s)?\n",
|
||||
i + 1, req.getText(), getSystemName(req.getSystem()), req.getCode(), req.getDisplay()));
|
||||
prompt.append(String.format(config.asString("item"),
|
||||
Integer.toString(i + 1), req.getText(), getSystemName(req.getSystem()), req.getCode(), req.getDisplay(), req.getContext(), req.getLang()));
|
||||
prompt.append("\n");
|
||||
}
|
||||
|
||||
String systemPrompt = "You are a medical terminology expert. Evaluate whether text descriptions match their\n"+
|
||||
"associated clinical codes. Provide detailed explanations for any mismatches. "+
|
||||
"Express your confidence level based on how certain you are of the relationship.";
|
||||
StringBuilder systemPrompt = new StringBuilder();
|
||||
for (String s : config.forceArray("prompt").asStrings()) {
|
||||
systemPrompt.append(s);
|
||||
systemPrompt.append("\n");
|
||||
}
|
||||
|
||||
System.out.print(""+c+" ");
|
||||
JsonObject json = getResponse(prompt.toString(), systemPrompt);
|
||||
System.out.print(".");
|
||||
JsonObject json = getResponse(prompt.toString(), systemPrompt.toString());
|
||||
|
||||
parseValidationResponse(json, chunk, results);
|
||||
c+= 4;
|
||||
|
@ -55,7 +65,7 @@ public class ClaudeAPI extends AIAPI {
|
|||
|
||||
public JsonObject getResponse(String prompt, String systemPrompt) throws IOException {
|
||||
JsonObject j = new JsonObject();
|
||||
j.add("model", MODEL);
|
||||
j.add("model", model);
|
||||
j.add("system", systemPrompt);
|
||||
j.add("max_tokens", 1024);
|
||||
j.forceArray("messages").addObject().add("role", "user").add("content", prompt);
|
||||
|
|
|
@ -6,14 +6,16 @@ public class CodeAndTextValidationRequest {
|
|||
private Object data;
|
||||
|
||||
private NodeStack location;
|
||||
private String context;
|
||||
private String lang;
|
||||
private String system;
|
||||
private String code;
|
||||
private String display;
|
||||
private String text;
|
||||
public CodeAndTextValidationRequest(NodeStack location, String lang, String system, String code, String display, String text) {
|
||||
public CodeAndTextValidationRequest(NodeStack location, String context, String lang, String system, String code, String display, String text) {
|
||||
super();
|
||||
this.location = location;
|
||||
this.context = context;
|
||||
this.lang = lang == null ? "en" : lang;
|
||||
this.system = system;
|
||||
this.code = code;
|
||||
|
@ -45,4 +47,7 @@ public class CodeAndTextValidationRequest {
|
|||
this.data = data;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
public String getContext() {
|
||||
return context;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
package org.hl7.fhir.validation.ai;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DatabaseMetaData;
|
||||
import java.sql.DriverManager;
|
||||
|
@ -14,6 +16,9 @@ import java.util.List;
|
|||
|
||||
import org.hl7.fhir.exceptions.FHIRException;
|
||||
import org.hl7.fhir.utilities.Utilities;
|
||||
import org.hl7.fhir.utilities.json.model.JsonObject;
|
||||
import org.hl7.fhir.utilities.json.parser.JsonParser;
|
||||
import org.hl7.fhir.utilities.xhtml.HierarchicalTableGenerator;
|
||||
|
||||
public class CodeAndTextValidator {
|
||||
|
||||
|
@ -63,24 +68,29 @@ public class CodeAndTextValidator {
|
|||
query.add(req);
|
||||
}
|
||||
}
|
||||
|
||||
ClassLoader classLoader = HierarchicalTableGenerator.class.getClassLoader();
|
||||
InputStream cfg = classLoader.getResourceAsStream("ai-prompts.json");
|
||||
JsonObject jcfg = JsonParser.parseObject(cfg);
|
||||
|
||||
List<CodeAndTextValidationResult> outcomes = null;
|
||||
if (query.size() > 0) {
|
||||
switch (aiService.toLowerCase()) {
|
||||
case "claude" :
|
||||
System.out.println("Consulting Claude about "+query.size()+" code/text combinations");
|
||||
outcomes = new ClaudeAPI().validateCodings(query);
|
||||
outcomes = new ClaudeAPI(jcfg.forceObject("claude")).validateCodings(query);
|
||||
break;
|
||||
case "chatgpt" :
|
||||
System.out.println("Consulting ChatGPT about "+query.size()+" code/text combinations");
|
||||
outcomes = new ChatGPTAPI().validateCodings(query);
|
||||
outcomes = new ChatGPTAPI(jcfg.forceObject("chatGPT")).validateCodings(query);
|
||||
break;
|
||||
case "ollama" :
|
||||
System.out.println("Consulting Ollama about "+query.size()+" code/text combinations");
|
||||
outcomes = new Ollama(null).validateCodings(query);
|
||||
outcomes = new Ollama(jcfg.forceObject("ollama"), null).validateCodings(query);
|
||||
break;
|
||||
default:
|
||||
if (aiService.toLowerCase().startsWith("ollama:")) {
|
||||
Ollama ollama = new Ollama(aiService.substring(7));
|
||||
Ollama ollama = new Ollama(jcfg.forceObject("ollama"), aiService.substring(7));
|
||||
System.out.println("Consulting Ollama at "+ollama.details()+" "+query.size()+" code/text combinations");
|
||||
outcomes = ollama.validateCodings(query);
|
||||
} else {
|
||||
|
|
|
@ -2,8 +2,6 @@ package org.hl7.fhir.validation.ai;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -23,9 +21,9 @@ public class Ollama extends AIAPI {
|
|||
private String url;
|
||||
private String model;
|
||||
|
||||
protected Ollama(String details) throws MalformedURLException {
|
||||
super();
|
||||
model = MODEL;
|
||||
protected Ollama(JsonObject config, String details) throws MalformedURLException {
|
||||
super(config);
|
||||
model = config.has("model") ? config.asString("model") : MODEL;
|
||||
if (details == null) {
|
||||
url = "http://localhost:11434/api/generate";
|
||||
} else {
|
||||
|
@ -52,24 +50,12 @@ public class Ollama extends AIAPI {
|
|||
List<CodeAndTextValidationResult> res = new ArrayList<>();
|
||||
for ( CodeAndTextValidationRequest req : requests) {
|
||||
StringBuilder prompt = new StringBuilder();
|
||||
// prompt.append("You are a medical terminology expert. Evaluate whether the text description '"+req.getText()+"' matches the\n"+
|
||||
// "clinical code '"+req.getCode()+"' from '"+getSystemName(req.getSystem())+"' which has a display of '"+req.getDisplay()+"'. Provide detailed explanations for any mismatches. "+
|
||||
// "It's ok if the text includes more details than the code. Express your confidence level based on how certain you are of the relationship.\n\n");
|
||||
// prompt.append("Respond in JSON format with an object containing 'isValid', 'explanation', and 'confidence'.\n\n");
|
||||
|
||||
prompt.append("Evaluate if B can't be a description of the same situation as the data presented in A.\r\n");
|
||||
prompt.append("\r\n");
|
||||
prompt.append("* B may be significantly more or less specific than A.\r\n");
|
||||
prompt.append("* Provide detailed explanations for your reasoning.\r\n");
|
||||
prompt.append("* It's ok if the text includes more or less information than the code.\r\n");
|
||||
prompt.append("* Respond in JSON format with an object containing a boolean property 'isCompatible', and string properties 'explanation' and 'confidence'\r\n");
|
||||
prompt.append("\r\n");
|
||||
prompt.append("A\r\n");
|
||||
prompt.append("Code: "+getSystemName(req.getSystem())+", '"+req.getCode()+"'\r\n");
|
||||
prompt.append("Text: '"+req.getDisplay()+"'\r\n");
|
||||
prompt.append("\r\n");
|
||||
prompt.append("B\r\n");
|
||||
prompt.append(req.getText()+"\r\n");
|
||||
for (String s : config.forceArray("prompt").asStrings()) {
|
||||
prompt.append(String.format(s,
|
||||
Integer.toString(1), req.getText(), getSystemName(req.getSystem()), req.getCode(), req.getDisplay(), req.getContext(), req.getLang()));
|
||||
prompt.append("\n");
|
||||
}
|
||||
|
||||
System.out.print(".");
|
||||
JsonObject json = getResponse(prompt.toString());
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package org.hl7.fhir.validation.cli.tasks;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import org.hl7.fhir.utilities.SystemExitManager;
|
||||
import org.hl7.fhir.utilities.TimeTracker;
|
||||
import org.hl7.fhir.utilities.Utilities;
|
||||
import org.hl7.fhir.utilities.json.JsonException;
|
||||
import org.hl7.fhir.utilities.json.model.JsonObject;
|
||||
import org.hl7.fhir.utilities.json.parser.JsonParser;
|
||||
import org.hl7.fhir.validation.ai.AITests;
|
||||
import org.hl7.fhir.validation.cli.model.CliContext;
|
||||
import org.hl7.fhir.validation.cli.utils.Params;
|
||||
import org.hl7.fhir.validation.special.TxTester;
|
||||
|
||||
public class AiTestsTask extends StandaloneTask{
|
||||
@Override
|
||||
public String getName() {
|
||||
return "aiTests";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDisplayName() {
|
||||
return "AI Tests";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHidden() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldExecuteTask(CliContext cliContext, String[] args) {
|
||||
return Params.hasParam(args, Params.AI_TESTS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void printHelp(PrintStream out) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void executeTask(CliContext cliContext, String[] args, TimeTracker tt, TimeTracker.Session tts) throws Exception {
|
||||
String source = Params.getParam(args, Params.SOURCE);
|
||||
String config = Params.getParam(args, Params.CONFIG);
|
||||
boolean runTests = Params.hasParam(args, Params.RUN_TESTS);
|
||||
|
||||
AITests ai = new AITests();
|
||||
ai.execute(source, config, runTests);
|
||||
SystemExitManager.finish();
|
||||
}
|
||||
|
||||
}
|
|
@ -32,6 +32,7 @@ public class Params {
|
|||
public static final String PROXY_AUTH = "-auth";
|
||||
public static final String PROFILE = "-profile";
|
||||
public static final String PROFILES = "-profiles";
|
||||
public static final String CONFIG = "-config";
|
||||
public static final String OPTION = "-option";
|
||||
public static final String OPTIONS = "-options";
|
||||
public static final String BUNDLE = "-bundle";
|
||||
|
@ -78,6 +79,7 @@ public class Params {
|
|||
public static final String FHIRPATH = "-fhirpath";
|
||||
public static final String TEST = "-tests";
|
||||
public static final String TX_TESTS = "-txTests";
|
||||
public static final String AI_TESTS = "-aiTests";
|
||||
public static final String HELP = "help";
|
||||
public static final String COMPARE = "-compare";
|
||||
public static final String SPREADSHEET = "-spreadsheet";
|
||||
|
|
|
@ -1390,7 +1390,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
|
|||
CodeableConcept cc = ObjectConverter.readAsCodeableConcept(element);
|
||||
if (cc.hasText() && cc.hasCoding()) {
|
||||
for (Coding c : cc.getCoding()) {
|
||||
recordCodeTextCombo(stack, c, cc.getText());
|
||||
recordCodeTextCombo(stack, theElementCntext.getBase().getPath(), c, cc.getText());
|
||||
}
|
||||
}
|
||||
if (binding.hasValueSet()) {
|
||||
|
@ -1426,7 +1426,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
|
|||
CodeableConcept cc = ObjectConverter.readAsCodeableConcept(element);
|
||||
if (cc.hasText() && cc.hasCoding()) {
|
||||
for (Coding c : cc.getCoding()) {
|
||||
recordCodeTextCombo(stack, c, cc.getText());
|
||||
recordCodeTextCombo(stack, theElementCntext.getBase().getPath(), c, cc.getText());
|
||||
}
|
||||
}
|
||||
if (cc.hasCoding()) {
|
||||
|
@ -1443,7 +1443,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
|
|||
return checkDisp;
|
||||
}
|
||||
|
||||
private void recordCodeTextCombo(NodeStack node, Coding c, String text) {
|
||||
private void recordCodeTextCombo(NodeStack node, String path, Coding c, String text) {
|
||||
if (!c.hasDisplay() || !c.getDisplay().equals(text)) {
|
||||
ValidationResult vr = context.validateCode(baseOptions.setDisplayWarningMode(false)
|
||||
.setLanguages(node.getWorkingLang()), c.getSystem(), c.getVersion(), c.getCode(), text);
|
||||
|
@ -1451,7 +1451,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
|
|||
int key = (c.getSystem()+"||"+c.getCode()+"||"+text).hashCode();
|
||||
if (!textsToCheckKeys.contains(key)) {
|
||||
textsToCheckKeys.add(key);
|
||||
textsToCheck.add(new CodeAndTextValidationRequest(node, node.getWorkingLang() == null ? context.getLocale().toLanguageTag() : node.getWorkingLang(), c.getSystem(), c.getCode(), vr.getDisplay(), text));
|
||||
textsToCheck.add(new CodeAndTextValidationRequest(node, path, node.getWorkingLang() == null ? context.getLocale().toLanguageTag() : node.getWorkingLang(), c.getSystem(), c.getCode(), vr.getDisplay(), text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1929,7 +1929,7 @@ public class InstanceValidator extends BaseValidator implements IResourceValidat
|
|||
ok.see(convertCDACodeToCodeableConcept(errors, path, element, logical, cc));
|
||||
if (cc.hasText() && cc.hasCoding()) {
|
||||
for (Coding c : cc.getCoding()) {
|
||||
recordCodeTextCombo(stack, c, cc.getText());
|
||||
recordCodeTextCombo(stack, theElementCntext.getBase().getPath(), c, cc.getText());
|
||||
}
|
||||
}
|
||||
ElementDefinitionBindingComponent binding = theElementCntext.getBinding();
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
{
|
||||
"item-parameters" : ["index", "text", "system/name", "code", "display", "FHIR Element Path", "lang-code"],
|
||||
"claude" : {
|
||||
"model" : "claude-3-5-sonnet-20241022",
|
||||
"note" : "up to four items at a time - longer can cause timeouts and refusals",
|
||||
"system-prompt": [
|
||||
"You are a medical terminology expert. Evaluate whether text descriptions match their",
|
||||
"associated clinical codes. Provide detailed explanations for any mismatches. ",
|
||||
"Express your confidence level based on how certain you are of the relationship."
|
||||
],
|
||||
"prompt" : [
|
||||
"For each of the following cases, determine if the text can't be a description of the same situation as the code. The text may contain significantly more or less information than the code.",
|
||||
"Respond in JSON format with an array of objects containing 'index', 'isCompatible', 'explanation', and 'confidence'. Please evaluate all the items in a single go"
|
||||
],
|
||||
"item" : "%d. Is '%s' in conflict with the %s code %s (display = %s)?"
|
||||
},
|
||||
"chatGPT" : {
|
||||
"model" : "gpt-4o-mini",
|
||||
"note" : "up to four items at a time - longer can cause timeouts and refusals",
|
||||
"system-prompt": [
|
||||
"You are a medical terminology expert. Evaluate whether text descriptions match their",
|
||||
"associated clinical codes. Provide detailed explanations for any mismatches.",
|
||||
"Express your confidence level based on how certain you are of the relationship."
|
||||
],
|
||||
"prompt" : [
|
||||
"For each of the following cases, determine if the text is not compatible with the code. The text may contain significantly more or less information than the code.",
|
||||
"Respond in JSON format with an array of objects containing 'index', 'isCompatible', 'explanation', and 'confidence'."
|
||||
],
|
||||
"item" : "%d. Is '%s' in conflict with the %s code %s (display '%s')"
|
||||
},
|
||||
"ollama" : {
|
||||
"model" : "llama3.2",
|
||||
"note" : "No system prompt, and only one item per call",
|
||||
"prompt" : [
|
||||
"Evaluate if B can't be a description of the same situation as the data presented in A.",
|
||||
"",
|
||||
"* B may be significantly more or less specific than A.",
|
||||
"* Provide detailed explanations for your reasoning.",
|
||||
"* It's ok if the text includes more or less information than the code.",
|
||||
"* Respond in JSON format with an object containing a boolean property 'isCompatible', and string properties 'explanation' and 'confidence'",
|
||||
"",
|
||||
"A",
|
||||
"Code: %3$s, '%4$s'",
|
||||
"Text: '%5$s'",
|
||||
"",
|
||||
"B",
|
||||
"Text: %2$s"
|
||||
]
|
||||
}
|
||||
}
|
|
@ -75,6 +75,12 @@ public class ValidatorCliTests {
|
|||
@Override
|
||||
public void executeTask(CliContext cliContext, String[] args, TimeTracker tt, TimeTracker.Session tts) {}
|
||||
};
|
||||
|
||||
|
||||
AiTestsTask aiTestsTask = new AiTestsTask() {
|
||||
@Override
|
||||
public void executeTask(CliContext cliContext, String[] args, TimeTracker tt, TimeTracker.Session tts) {}
|
||||
};
|
||||
@Spy
|
||||
TransformTask transformTask;
|
||||
|
||||
|
@ -125,6 +131,7 @@ public class ValidatorCliTests {
|
|||
spreadsheetTask,
|
||||
testsTask,
|
||||
txTestsTask,
|
||||
aiTestsTask,
|
||||
transformTask,
|
||||
versionTask,
|
||||
codeGenTask,
|
||||
|
@ -327,6 +334,7 @@ public class ValidatorCliTests {
|
|||
Mockito.verify(txTestsTask).executeTask(same(cliContext), eq(args), any(TimeTracker.class), any(TimeTracker.Session.class));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testsTest() throws Exception {
|
||||
final String[] args = new String[]{"-tests"};
|
||||
|
|
Loading…
Reference in New Issue