Change token and reference search params to 'keyword' field type (#3310)

This commit is contained in:
Jaison B 2022-01-18 15:57:51 -07:00 committed by GitHub
parent 3062430bc5
commit cc2ca9e491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 21 additions and 16 deletions

View File

@ -23,6 +23,7 @@ package ca.uhn.fhir.jpa.model.search;
import org.hibernate.search.engine.backend.document.DocumentElement;
import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaElement;
import org.hibernate.search.engine.backend.document.model.dsl.IndexSchemaObjectField;
import org.hibernate.search.engine.backend.types.Aggregable;
import org.hibernate.search.engine.backend.types.ObjectStructure;
import org.hibernate.search.engine.backend.types.Projectable;
import org.hibernate.search.engine.backend.types.dsl.IndexFieldTypeFactory;
@ -40,11 +41,10 @@ import static ca.uhn.fhir.jpa.model.search.HibernateSearchIndexWriter.IDX_STRING
/**
* Allows hibernate search to index
*
* <p>
* CodeableConcept.text
* Coding.display
* Identifier.type.text
*
*/
public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBridge<ExtendedLuceneIndexData> {
@ -69,9 +69,8 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
//create them adhoc. https://docs.jboss.org/hibernate/search/6.0/reference/en-US/html_single/#mapper-orm-bridge-index-field-dsl-dynamic
//I _think_ im doing the right thing here by indicating that everything matching this template uses this analyzer.
IndexFieldTypeFactory indexFieldTypeFactory = thePropertyBindingContext.typeFactory();
StringIndexFieldTypeOptionsStep<?> standardAnalyzer =
indexFieldTypeFactory.asString()
// TODO mb Once Ken finishes extracting a common base, we can share these constants with HapiElasticsearchAnalysisConfigurer and HapiLuceneAnalysisConfigurer
StringIndexFieldTypeOptionsStep<?> standardAnalyzer = indexFieldTypeFactory.asString()
.analyzer("standardAnalyzer")
.projectable(Projectable.NO);
@ -80,11 +79,14 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
.analyzer("exactAnalyzer") // default max-length is 256. Is that enough for code system uris?
.projectable(Projectable.NO);
StringIndexFieldTypeOptionsStep<?> normStringAnalyzer =
indexFieldTypeFactory.asString()
StringIndexFieldTypeOptionsStep<?> normStringAnalyzer = indexFieldTypeFactory.asString()
.analyzer("normStringAnalyzer")
.projectable(Projectable.NO);
// TODO JB: may have to add normalizer to support case insensitive searches depending on token flags
StringIndexFieldTypeOptionsStep<?> keywordFieldType = indexFieldTypeFactory.asString()
.projectable(Projectable.NO)
.aggregable(Aggregable.YES);
// the old style for _text and _contains
@ -119,11 +121,14 @@ public class SearchParamTextPropertyBinder implements PropertyBinder, PropertyBr
// But the standard tokenizers aren't that flexible. As second best, it would be nice to use elastic multi-fields
// to apply three different tokenizers to a single value.
// Instead, just be simple and expand into three full fields for now
spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob("*.token");
spfield.fieldTemplate("token-code", exactAnalyzer).matchingPathGlob("*.token.code").multiValued();
spfield.fieldTemplate("token-code-system", exactAnalyzer).matchingPathGlob("*.token.code-system").multiValued();
spfield.fieldTemplate("token-system", exactAnalyzer).matchingPathGlob("*.token.system").multiValued();
spfield.fieldTemplate("reference-value", exactAnalyzer).matchingPathGlob("*.reference.value").multiValued();
String tokenPathGlob = "*.token";
spfield.objectFieldTemplate("tokenIndex", ObjectStructure.FLATTENED).matchingPathGlob(tokenPathGlob);
spfield.fieldTemplate("token-code", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code").multiValued();
spfield.fieldTemplate("token-code-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".code-system").multiValued();
spfield.fieldTemplate("token-system", keywordFieldType).matchingPathGlob(tokenPathGlob + ".system").multiValued();
// reference
spfield.fieldTemplate("reference-value", keywordFieldType).matchingPathGlob("*.reference.value").multiValued();
// last, since the globs are matched in declaration order, and * matches even nested nodes.
spfield.objectFieldTemplate("spObject", ObjectStructure.FLATTENED).matchingPathGlob("*");