Work on fulltext suggestions

This commit is contained in:
jamesagnew 2015-11-03 21:34:35 -05:00
parent 3fc7a16735
commit 1fb9f02d14
7 changed files with 146 additions and 53 deletions

View File

@ -106,6 +106,7 @@ import ca.uhn.fhir.model.dstu.resource.BaseResource;
import ca.uhn.fhir.model.dstu2.composite.MetaDt;
import ca.uhn.fhir.model.primitive.IdDt;
import ca.uhn.fhir.model.primitive.InstantDt;
import ca.uhn.fhir.model.primitive.StringDt;
import ca.uhn.fhir.model.valueset.BundleEntryTransactionMethodEnum;
import ca.uhn.fhir.parser.DataFormatException;
import ca.uhn.fhir.parser.IParser;
@ -174,7 +175,7 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
// @PersistenceContext(name = "FHIR_UT", type = PersistenceContextType.TRANSACTION, unitName = "FHIR_UT")
@PersistenceContext(type = PersistenceContextType.TRANSACTION)
private EntityManager myEntityManager;
protected EntityManager myEntityManager;
@Autowired
private PlatformTransactionManager myPlatformTransactionManager;
@ -1546,19 +1547,19 @@ public abstract class BaseHapiFhirDao<T extends IBaseResource> implements IDao {
}
private String parseContentTextIntoWords(IResource theResource) {
StringBuilder b = new StringBuilder();
StringBuilder retVal = new StringBuilder();
@SuppressWarnings("rawtypes")
List<IPrimitiveType> childElements = getContext().newTerser().getAllPopulatedChildElementsOfType(theResource, IPrimitiveType.class);
for (@SuppressWarnings("rawtypes") IPrimitiveType nextType : childElements) {
if (nextType instanceof StringDt) {
String nextValue = nextType.getValueAsString();
if (isNotBlank(nextValue)) {
if (b.length() > 0 && b.charAt(b.length() - 1) != ' ') {
b.append(' ');
}
b.append(nextValue);
retVal.append(nextValue.replace("\n", " ").replace("\r", " "));
retVal.append("\n");
}
}
return b.toString();
}
return retVal.toString();
}
public BaseHasResource readEntity(IIdType theValueId) {

View File

@ -26,8 +26,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import javax.persistence.Tuple;
import javax.persistence.TypedQuery;
@ -60,9 +58,6 @@ public abstract class BaseHapiFhirSystemDao<T> extends BaseHapiFhirDao<IBaseReso
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(BaseHapiFhirSystemDao.class);
@PersistenceContext()
protected EntityManager myEntityManager;
@Autowired
private PlatformTransactionManager myTxManager;
@ -222,10 +217,6 @@ public abstract class BaseHapiFhirSystemDao<T> extends BaseHapiFhirDao<IBaseReso
}
}
public void setEntityManager(EntityManager theEntityManager) {
myEntityManager = theEntityManager;
}
public void setTxManager(PlatformTransactionManager theTxManager) {
myTxManager = theTxManager;
}

View File

@ -35,16 +35,13 @@ import javax.persistence.PersistenceContextType;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenGroup;
import org.hibernate.search.jpa.FullTextEntityManager;
import org.hibernate.search.jpa.FullTextQuery;
@ -56,7 +53,6 @@ import org.springframework.transaction.annotation.Transactional;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import ca.uhn.fhir.jpa.dao.FhirSearchDao.MySuggestionFormatter;
import ca.uhn.fhir.jpa.entity.ResourceTable;
import ca.uhn.fhir.model.api.IQueryParameterType;
import ca.uhn.fhir.model.dstu.resource.BaseResource;
@ -207,16 +203,29 @@ public class FhirSearchDao extends BaseHapiFhirDao<IBaseResource> implements ISe
String nextValue = (String) nextAsArray[0];
try {
MySuggestionFormatter formatter = new MySuggestionFormatter(suggestions);
MySuggestionFormatter formatter = new MySuggestionFormatter(theText, suggestions);
Scorer scorer = new QueryScorer(textQuery);
Highlighter highlighter = new Highlighter(formatter, scorer);
Analyzer analyzer = em.getSearchFactory().getAnalyzer(ResourceTable.class);
highlighter.getBestFragment(analyzer.tokenStream("myContentText", nextValue), nextValue);
highlighter.getBestFragment(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue);
highlighter.getBestFragment(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue);
highlighter.getBestFragment(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue);
formatter.setAnalyzer("myContentTextPhonetic");
highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
formatter.setAnalyzer("myContentTextNGram");
highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
formatter.setFindPhrasesWith();
formatter.setAnalyzer("myContentTextEdgeNGram");
highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
// formatter.setAnalyzer("myContentText");
// highlighter.getBestFragments(analyzer.tokenStream("myContentText", nextValue), nextValue, 10);
// formatter.setAnalyzer("myContentTextNGram");
// highlighter.getBestFragments(analyzer.tokenStream("myContentTextNGram", nextValue), nextValue, 10);
// formatter.setAnalyzer("myContentTextEdgeNGram");
// highlighter.getBestFragments(analyzer.tokenStream("myContentTextEdgeNGram", nextValue), nextValue, 10);
// formatter.setAnalyzer("myContentTextPhonetic");
// highlighter.getBestFragments(analyzer.tokenStream("myContentTextPhonetic", nextValue), nextValue, 10);
} catch (Exception e) {
throw new InternalErrorException(e);
}
@ -227,7 +236,11 @@ public class FhirSearchDao extends BaseHapiFhirDao<IBaseResource> implements ISe
Set<String> terms = Sets.newHashSet();
for (Iterator<Suggestion> iter = suggestions.iterator(); iter.hasNext(); ) {
if (!terms.add(iter.next().getTerm())) {
String nextTerm = iter.next().getTerm().toLowerCase();
// if (nextTerm.contains("\n")) {
// iter.remove();
// } else
if (!terms.add(nextTerm)) {
iter.remove();
}
}
@ -269,16 +282,52 @@ public class FhirSearchDao extends BaseHapiFhirDao<IBaseResource> implements ISe
public class MySuggestionFormatter implements Formatter {
private List<Suggestion> mySuggestions;
private String myAnalyzer;
private ArrayList<String> myPartialMatchPhrases;
private ArrayList<Float> myPartialMatchScores;
private String myOriginalSearch;
public MySuggestionFormatter(List<Suggestion> theSuggestions) {
public MySuggestionFormatter(String theOriginalSearch, List<Suggestion> theSuggestions) {
myOriginalSearch = theOriginalSearch;
mySuggestions = theSuggestions;
}
public void setFindPhrasesWith() {
myPartialMatchPhrases = new ArrayList<String>();
myPartialMatchScores = new ArrayList<Float>();
for (Suggestion next : mySuggestions) {
myPartialMatchPhrases.add(' ' + next.myTerm);
myPartialMatchScores.add(next.myScore);
}
myPartialMatchPhrases.add(myOriginalSearch);
myPartialMatchScores.add(1.0f);
}
public void setAnalyzer(String theString) {
myAnalyzer = theString;
}
@Override
public String highlightTerm(String theOriginalText, TokenGroup theTokenGroup) {
ourLog.info("{} Found {} with score {}", new Object[] {myAnalyzer, theOriginalText, theTokenGroup.getTotalScore()});
if (theTokenGroup.getTotalScore() > 0) {
mySuggestions.add(new Suggestion(theOriginalText, theTokenGroup.getTotalScore()));
float score = theTokenGroup.getTotalScore();
if (theOriginalText.equalsIgnoreCase(myOriginalSearch)) {
score = score + 1.0f;
}
mySuggestions.add(new Suggestion(theOriginalText, score));
} else if (myPartialMatchPhrases != null) {
if (theOriginalText.length() < 100) {
for (int i = 0; i < myPartialMatchPhrases.size(); i++) {
if (theOriginalText.contains(myPartialMatchPhrases.get(i))) {
mySuggestions.add(new Suggestion(theOriginalText, myPartialMatchScores.get(i) - 0.5f));
}
}
}
}
return null;
}

View File

@ -41,14 +41,14 @@ import javax.persistence.Transient;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory;
import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory;
import org.apache.lucene.analysis.ngram.NGramFilterFactory;
import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory;
import org.apache.lucene.analysis.pattern.PatternTokenizerFactory;
import org.apache.lucene.analysis.phonetic.PhoneticFilterFactory;
import org.apache.lucene.analysis.shingle.ShingleFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.apache.lucene.analysis.standard.StandardFilterFactory;
import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
@ -81,19 +81,22 @@ import ca.uhn.fhir.rest.server.exceptions.UnprocessableEntityException;
})
@AnalyzerDefs({
@AnalyzerDef(name = "autocompleteEdgeAnalyzer",
tokenizer = @TokenizerDef(factory = KeywordTokenizerFactory.class),
filters = {
@TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
@Parameter(name = "pattern",value = "([^a-zA-Z0-9\\.])"),
@Parameter(name = "replacement", value = " "),
@Parameter(name = "replace", value = "all")
tokenizer = @TokenizerDef(factory = PatternTokenizerFactory.class, params= {
@Parameter(name="pattern", value="(.*)"),
@Parameter(name="group", value="1")
}),
filters = {
// @TokenFilterDef(factory = PatternReplaceFilterFactory.class, params = {
// @Parameter(name = "pattern",value = "([^a-zA-Z0-9\\.])"),
// @Parameter(name = "replacement", value = " "),
// @Parameter(name = "replace", value = "all")
// }),
@TokenFilterDef(factory = LowerCaseFilterFactory.class),
@TokenFilterDef(factory = StopFilterFactory.class),
@TokenFilterDef(factory = EdgeNGramFilterFactory.class, params = {
@Parameter(name = "minGramSize", value = "3"),
@Parameter(name = "maxGramSize", value = "50")
})
}),
}),
@AnalyzerDef(name = "autocompletePhoneticAnalyzer",
tokenizer = @TokenizerDef(factory=StandardTokenizerFactory.class),

View File

@ -52,6 +52,7 @@ import ca.uhn.fhir.model.dstu2.resource.DiagnosticReport;
import ca.uhn.fhir.model.dstu2.resource.Encounter;
import ca.uhn.fhir.model.dstu2.resource.Immunization;
import ca.uhn.fhir.model.dstu2.resource.Location;
import ca.uhn.fhir.model.dstu2.resource.Media;
import ca.uhn.fhir.model.dstu2.resource.Medication;
import ca.uhn.fhir.model.dstu2.resource.MedicationOrder;
import ca.uhn.fhir.model.dstu2.resource.Observation;
@ -126,6 +127,9 @@ public abstract class BaseJpaDstu2Test extends BaseJpaTest {
@Qualifier("myPatientDaoDstu2")
protected IFhirResourceDaoPatient<Patient> myPatientDao;
@Autowired
@Qualifier("myMediaDaoDstu2")
protected IFhirResourceDao<Media> myMediaDao;
@Autowired
@Qualifier("myPractitionerDaoDstu2")
protected IFhirResourceDao<Practitioner> myPractitionerDao;
@Autowired

View File

@ -16,8 +16,10 @@ import org.junit.Test;
import ca.uhn.fhir.jpa.dao.FhirSearchDao.Suggestion;
import ca.uhn.fhir.model.dstu2.resource.Device;
import ca.uhn.fhir.model.dstu2.resource.Media;
import ca.uhn.fhir.model.dstu2.resource.Observation;
import ca.uhn.fhir.model.dstu2.resource.Patient;
import ca.uhn.fhir.model.primitive.Base64BinaryDt;
import ca.uhn.fhir.model.primitive.StringDt;
import ca.uhn.fhir.rest.param.StringAndListParam;
import ca.uhn.fhir.rest.param.StringOrListParam;
@ -28,6 +30,46 @@ public class FhirResourceDaoDstu2SearchFtTest extends BaseJpaDstu2Test {
private static final org.slf4j.Logger ourLog = org.slf4j.LoggerFactory.getLogger(FhirResourceDaoDstu2SearchFtTest.class);
@Test
public void testSuggestIgnoresBase64Content() {
Patient patient = new Patient();
patient.addName().addFamily("testSuggest");
IIdType ptId = myPatientDao.create(patient).getId().toUnqualifiedVersionless();
Media med = new Media();
med.getSubject().setReference(ptId);
med.getSubtype().setText("Systolic Blood Pressure");
med.getContent().setContentType("LCws");
med.getContent().setData(new Base64BinaryDt(new byte[] {44,44,44,44,44,44,44,44}));
med.getContent().setTitle("bbbb syst");
myMediaDao.create(med);
ourLog.info(myFhirCtx.newJsonParser().encodeResourceToString(med));
List<Suggestion> output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "press");
ourLog.info("Found: " + output);
assertEquals(2, output.size());
assertEquals("Pressure", output.get(0).getTerm());
assertEquals("Systolic Blood Pressure", output.get(1).getTerm());
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "prezure");
ourLog.info("Found: " + output);
assertEquals(2, output.size());
assertEquals("Pressure", output.get(0).getTerm());
assertEquals("Systolic Blood Pressure", output.get(1).getTerm());
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "syst");
ourLog.info("Found: " + output);
assertEquals(4, output.size());
assertEquals("syst", output.get(0).getTerm());
assertEquals("bbbb syst", output.get(1).getTerm());
assertEquals("Systolic", output.get(2).getTerm());
assertEquals("Systolic Blood Pressure", output.get(3).getTerm());
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "LCws");
ourLog.info("Found: " + output);
assertEquals(0, output.size());
}
@Test
public void testSuggest() {
Patient patient = new Patient();
@ -47,6 +89,7 @@ public class FhirResourceDaoDstu2SearchFtTest extends BaseJpaDstu2Test {
obs = new Observation();
obs.getSubject().setReference(ptId);
obs.getCode().setText("ZXC HELLO");
obs.addComponent().getCode().setText("HHHHHHHHHH");
myObservationDao.create(obs);
/*
@ -79,8 +122,9 @@ public class FhirResourceDaoDstu2SearchFtTest extends BaseJpaDstu2Test {
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "HELO");
ourLog.info("Found: " + output);
assertEquals(1, output.size());
assertEquals(2, output.size());
assertEquals("HELLO", output.get(0).getTerm());
assertEquals("ZXC HELLO", output.get(1).getTerm());
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "Z");
ourLog.info("Found: " + output);
@ -88,8 +132,9 @@ public class FhirResourceDaoDstu2SearchFtTest extends BaseJpaDstu2Test {
output = mySearchDao.suggestKeywords("Patient/" + ptId.getIdPart() + "/$everything", "_content", "ZX");
ourLog.info("Found: " + output);
assertEquals(1, output.size());
assertEquals(2, output.size());
assertEquals("ZXC", output.get(0).getTerm());
assertEquals("ZXC HELLO", output.get(1).getTerm());
}

20
pom.xml
View File

@ -425,47 +425,47 @@
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-http</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlets</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-servlet</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-webapp</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.websocket</groupId>
<artifactId>websocket-api</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.websocket</groupId>
<artifactId>websocket-client</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.eclipse.jetty.websocket</groupId>
<artifactId>websocket-server</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</dependency>
<dependency>
<groupId>org.fusesource.jansi</groupId>
@ -742,7 +742,7 @@
<plugin>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-maven-plugin</artifactId>
<version>9.2.6.v20141205</version>
<version>9.2.13.v20150730</version>
</plugin>
<plugin>
<groupId>org.eluder.coveralls</groupId>