NIFI-12629 - adding metadata filtering to QueryPinecone (#8264)

* NIFI-12629 - adding metadata filtering to QueryPinecone
This commit is contained in:
Pierre Villard 2024-01-19 22:03:38 +04:00 committed by GitHub
parent 6649115a34
commit f402970132
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 11 additions and 2 deletions

View File

@ -18,6 +18,7 @@ from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency
import QueryUtils import QueryUtils
import pinecone import pinecone
import json
from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service
@ -109,7 +110,14 @@ class QueryPinecone(FlowFileTransform):
) )
NAMESPACE = PropertyDescriptor( NAMESPACE = PropertyDescriptor(
name="Namespace", name="Namespace",
description="The name of the Pinecone Namespace to put the documents to.", description="The name of the Pinecone Namespace to query into.",
required=False,
validators=[StandardValidators.NON_EMPTY_VALIDATOR],
expression_language_scope=ExpressionLanguageScope.FLOWFILE_ATTRIBUTES
)
FILTER = PropertyDescriptor(
name="Metadata Filter",
description="Optional metadata filter to apply with the query. For example: { \"author\": {\"$eq\": \"john.doe\"} }",
required=False, required=False,
validators=[StandardValidators.NON_EMPTY_VALIDATOR], validators=[StandardValidators.NON_EMPTY_VALIDATOR],
expression_language_scope=ExpressionLanguageScope.FLOWFILE_ATTRIBUTES expression_language_scope=ExpressionLanguageScope.FLOWFILE_ATTRIBUTES
@ -164,8 +172,9 @@ class QueryPinecone(FlowFileTransform):
index = pinecone.Index(index_name) index = pinecone.Index(index_name)
text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue() text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
filter = context.getProperty(self.FILTER).evaluateAttributeExpressions(flowfile).getValue()
vectorstore = Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace) vectorstore = Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace)
results = vectorstore.similarity_search_with_score(query, num_results) results = vectorstore.similarity_search_with_score(query, num_results, filter=None if filter is None else json.loads(filter))
documents = [] documents = []
for result in results: for result in results: