diff --git a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py index 5b8e3738fe..a372a20690 100644 --- a/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py +++ b/nifi-python-extensions/nifi-openai-module/src/main/python/PromptChatGPT.py @@ -33,7 +33,7 @@ class PromptChatGPT(FlowFileTransform): version = '2.0.0-SNAPSHOT' description = "Submits a prompt to ChatGPT, writing the results either to a FlowFile attribute or to the contents of the FlowFile" tags = ["text", "chatgpt", "gpt", "machine learning", "ML", "artificial intelligence", "ai", "document", "langchain"] - dependencies = ['langchain==0.0.331', 'openai==0.28.1', 'jsonpath-ng'] + dependencies = ['langchain==0.1.2', 'openai==1.9.0', 'jsonpath-ng'] MODEL = PropertyDescriptor( diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py index 9b0218c9c0..1b9ed90ba6 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/EmbeddingUtils.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency, ExpressionLanguageScope +from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py index 67abfa6b09..495f41fea3 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/PutPinecone.py @@ -13,13 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -from langchain.vectorstores import Pinecone +import langchain.vectorstores from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency -import pinecone +from pinecone import Pinecone import json from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service -from nifiapi.documentation import use_case, multi_processor_use_case, ProcessorConfiguration +from nifiapi.documentation import use_case @use_case(description="Create vectors/embeddings that represent text content and send the vectors to Pinecone", notes="This use case assumes that the data has already been formatted in JSONL format with the text to store in Pinecone provided in the 'text' field.", @@ -149,6 +149,7 @@ class PutPinecone(FlowFileTransform): DOC_ID_FIELD_NAME] embeddings = None + pc = None def __init__(self, **kwargs): pass @@ -157,15 +158,12 @@ class PutPinecone(FlowFileTransform): return self.properties def onScheduled(self, context): - api_key = context.getProperty(self.PINECONE_API_KEY).getValue() - pinecone_env = context.getProperty(self.PINECONE_ENV).getValue() - # initialize pinecone - pinecone.init( - api_key=api_key, - environment=pinecone_env, + self.pc = Pinecone( + api_key=context.getProperty(self.PINECONE_API_KEY).getValue(), + environment=context.getProperty(self.PINECONE_ENV).getValue() ) - + # initialize embedding service self.embeddings = create_embedding_service(context) def transform(self, context, flowfile): @@ -174,7 +172,7 @@ class PutPinecone(FlowFileTransform): namespace = context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue() id_field_name = context.getProperty(self.DOC_ID_FIELD_NAME).evaluateAttributeExpressions(flowfile).getValue() - index = pinecone.Index(index_name) + index = self.pc.Index(index_name) # Read the FlowFile content as "json-lines". json_lines = flowfile.getContentsAsBytes().decode() @@ -210,6 +208,6 @@ class PutPinecone(FlowFileTransform): i += 1 text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue() - vectorstore = Pinecone(index, self.embeddings.embed_query, text_key) + vectorstore = langchain.vectorstores.Pinecone(index, self.embeddings.embed_query, text_key) vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids, namespace=namespace) return FlowFileTransformResult(relationship="success") diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py index ee0b62ebe4..947b281058 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryChroma.py @@ -16,7 +16,7 @@ import json from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult -from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency +from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope import ChromaUtils import EmbeddingUtils import QueryUtils diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py index b5ed592378..0202388196 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryPinecone.py @@ -13,11 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from langchain.vectorstores import Pinecone +import langchain.vectorstores from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency import QueryUtils -import pinecone +from pinecone import Pinecone import json from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service @@ -143,6 +143,7 @@ class QueryPinecone(FlowFileTransform): embeddings = None query_utils = None + pc = None def __init__(self, **kwargs): pass @@ -151,18 +152,15 @@ class QueryPinecone(FlowFileTransform): return self.properties def onScheduled(self, context): - api_key = context.getProperty(self.PINECONE_API_KEY).getValue() - pinecone_env = context.getProperty(self.PINECONE_ENV).getValue() - # initialize pinecone - pinecone.init( - api_key=api_key, - environment=pinecone_env, + self.pc = Pinecone( + api_key=context.getProperty(self.PINECONE_API_KEY).getValue(), + environment=context.getProperty(self.PINECONE_ENV).getValue() ) - self.embeddings = create_embedding_service(context) + # initialize embedding service + self.embeddings = create_embedding_service(context) self.query_utils = QueryUtils.QueryUtils(context) - def transform(self, context, flowfile): # First, check if our index already exists. If it doesn't, we create it index_name = context.getProperty(self.INDEX_NAME).evaluateAttributeExpressions(flowfile).getValue() @@ -170,11 +168,11 @@ class QueryPinecone(FlowFileTransform): namespace = context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue() num_results = context.getProperty(self.NUMBER_OF_RESULTS).evaluateAttributeExpressions(flowfile).asInteger() - index = pinecone.Index(index_name) + index = self.pc.Index(index_name) text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue() filter = context.getProperty(self.FILTER).evaluateAttributeExpressions(flowfile).getValue() - vectorstore = Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace) + vectorstore = langchain.vectorstores.Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace) results = vectorstore.similarity_search_with_score(query, num_results, filter=None if filter is None else json.loads(filter)) documents = [] diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py index f27a7f8a8d..0ca33fdd94 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/QueryUtils.py @@ -14,7 +14,7 @@ # limitations under the License. from typing import Tuple -from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency +from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency import json ROW_ORIENTED = "Row-Oriented" diff --git a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt index 4e0669a38e..f3fea58948 100644 --- a/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt +++ b/nifi-python-extensions/nifi-text-embeddings-module/src/main/python/vectorstores/requirements.txt @@ -14,16 +14,16 @@ # limitations under the License. # Shared requirements -openai==0.28.1 +openai==1.9.0 # Chroma requirements -chromadb==0.4.14 +chromadb==0.4.22 onnxruntime tokenizers tqdm requests # Pinecone requirements -pinecone-client +pinecone-client==3.0.1 tiktoken -langchain==0.0.331 +langchain==0.1.2