mirror of https://github.com/apache/nifi.git
NIFI-12636 - Upgrade dependencies for Pinecone, ChromaDB and OpenAI processors
Signed-off-by: Joe Gresock <jgresock@gmail.com> This closes #8293.
This commit is contained in:
parent
85dc637a96
commit
3afeac6341
|
@ -33,7 +33,7 @@ class PromptChatGPT(FlowFileTransform):
|
|||
version = '2.0.0-SNAPSHOT'
|
||||
description = "Submits a prompt to ChatGPT, writing the results either to a FlowFile attribute or to the contents of the FlowFile"
|
||||
tags = ["text", "chatgpt", "gpt", "machine learning", "ML", "artificial intelligence", "ai", "document", "langchain"]
|
||||
dependencies = ['langchain==0.0.331', 'openai==0.28.1', 'jsonpath-ng']
|
||||
dependencies = ['langchain==0.1.2', 'openai==1.9.0', 'jsonpath-ng']
|
||||
|
||||
|
||||
MODEL = PropertyDescriptor(
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency, ExpressionLanguageScope
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency
|
||||
from langchain.embeddings.openai import OpenAIEmbeddings
|
||||
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
|
||||
|
||||
|
|
|
@ -13,13 +13,13 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from langchain.vectorstores import Pinecone
|
||||
import langchain.vectorstores
|
||||
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency
|
||||
import pinecone
|
||||
from pinecone import Pinecone
|
||||
import json
|
||||
from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service
|
||||
from nifiapi.documentation import use_case, multi_processor_use_case, ProcessorConfiguration
|
||||
from nifiapi.documentation import use_case
|
||||
|
||||
@use_case(description="Create vectors/embeddings that represent text content and send the vectors to Pinecone",
|
||||
notes="This use case assumes that the data has already been formatted in JSONL format with the text to store in Pinecone provided in the 'text' field.",
|
||||
|
@ -149,6 +149,7 @@ class PutPinecone(FlowFileTransform):
|
|||
DOC_ID_FIELD_NAME]
|
||||
|
||||
embeddings = None
|
||||
pc = None
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
@ -157,15 +158,12 @@ class PutPinecone(FlowFileTransform):
|
|||
return self.properties
|
||||
|
||||
def onScheduled(self, context):
|
||||
api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
|
||||
pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
|
||||
|
||||
# initialize pinecone
|
||||
pinecone.init(
|
||||
api_key=api_key,
|
||||
environment=pinecone_env,
|
||||
self.pc = Pinecone(
|
||||
api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
|
||||
environment=context.getProperty(self.PINECONE_ENV).getValue()
|
||||
)
|
||||
|
||||
# initialize embedding service
|
||||
self.embeddings = create_embedding_service(context)
|
||||
|
||||
def transform(self, context, flowfile):
|
||||
|
@ -174,7 +172,7 @@ class PutPinecone(FlowFileTransform):
|
|||
namespace = context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
|
||||
id_field_name = context.getProperty(self.DOC_ID_FIELD_NAME).evaluateAttributeExpressions(flowfile).getValue()
|
||||
|
||||
index = pinecone.Index(index_name)
|
||||
index = self.pc.Index(index_name)
|
||||
|
||||
# Read the FlowFile content as "json-lines".
|
||||
json_lines = flowfile.getContentsAsBytes().decode()
|
||||
|
@ -210,6 +208,6 @@ class PutPinecone(FlowFileTransform):
|
|||
i += 1
|
||||
|
||||
text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
|
||||
vectorstore = Pinecone(index, self.embeddings.embed_query, text_key)
|
||||
vectorstore = langchain.vectorstores.Pinecone(index, self.embeddings.embed_query, text_key)
|
||||
vectorstore.add_texts(texts=texts, metadatas=metadatas, ids=ids, namespace=namespace)
|
||||
return FlowFileTransformResult(relationship="success")
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import json
|
||||
|
||||
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope
|
||||
import ChromaUtils
|
||||
import EmbeddingUtils
|
||||
import QueryUtils
|
||||
|
|
|
@ -13,11 +13,11 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from langchain.vectorstores import Pinecone
|
||||
import langchain.vectorstores
|
||||
from nifiapi.flowfiletransform import FlowFileTransform, FlowFileTransformResult
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency
|
||||
import QueryUtils
|
||||
import pinecone
|
||||
from pinecone import Pinecone
|
||||
import json
|
||||
from EmbeddingUtils import OPENAI, HUGGING_FACE, EMBEDDING_MODEL, create_embedding_service
|
||||
|
||||
|
@ -143,6 +143,7 @@ class QueryPinecone(FlowFileTransform):
|
|||
|
||||
embeddings = None
|
||||
query_utils = None
|
||||
pc = None
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
pass
|
||||
|
@ -151,18 +152,15 @@ class QueryPinecone(FlowFileTransform):
|
|||
return self.properties
|
||||
|
||||
def onScheduled(self, context):
|
||||
api_key = context.getProperty(self.PINECONE_API_KEY).getValue()
|
||||
pinecone_env = context.getProperty(self.PINECONE_ENV).getValue()
|
||||
|
||||
# initialize pinecone
|
||||
pinecone.init(
|
||||
api_key=api_key,
|
||||
environment=pinecone_env,
|
||||
self.pc = Pinecone(
|
||||
api_key=context.getProperty(self.PINECONE_API_KEY).getValue(),
|
||||
environment=context.getProperty(self.PINECONE_ENV).getValue()
|
||||
)
|
||||
# initialize embedding service
|
||||
self.embeddings = create_embedding_service(context)
|
||||
self.query_utils = QueryUtils.QueryUtils(context)
|
||||
|
||||
|
||||
def transform(self, context, flowfile):
|
||||
# First, check if our index already exists. If it doesn't, we create it
|
||||
index_name = context.getProperty(self.INDEX_NAME).evaluateAttributeExpressions(flowfile).getValue()
|
||||
|
@ -170,11 +168,11 @@ class QueryPinecone(FlowFileTransform):
|
|||
namespace = context.getProperty(self.NAMESPACE).evaluateAttributeExpressions(flowfile).getValue()
|
||||
num_results = context.getProperty(self.NUMBER_OF_RESULTS).evaluateAttributeExpressions(flowfile).asInteger()
|
||||
|
||||
index = pinecone.Index(index_name)
|
||||
index = self.pc.Index(index_name)
|
||||
|
||||
text_key = context.getProperty(self.TEXT_KEY).evaluateAttributeExpressions().getValue()
|
||||
filter = context.getProperty(self.FILTER).evaluateAttributeExpressions(flowfile).getValue()
|
||||
vectorstore = Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace)
|
||||
vectorstore = langchain.vectorstores.Pinecone(index, self.embeddings.embed_query, text_key, namespace=namespace)
|
||||
results = vectorstore.similarity_search_with_score(query, num_results, filter=None if filter is None else json.loads(filter))
|
||||
|
||||
documents = []
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
# limitations under the License.
|
||||
from typing import Tuple
|
||||
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, ExpressionLanguageScope, PropertyDependency
|
||||
from nifiapi.properties import PropertyDescriptor, StandardValidators, PropertyDependency
|
||||
import json
|
||||
|
||||
ROW_ORIENTED = "Row-Oriented"
|
||||
|
|
|
@ -14,16 +14,16 @@
|
|||
# limitations under the License.
|
||||
|
||||
# Shared requirements
|
||||
openai==0.28.1
|
||||
openai==1.9.0
|
||||
|
||||
# Chroma requirements
|
||||
chromadb==0.4.14
|
||||
chromadb==0.4.22
|
||||
onnxruntime
|
||||
tokenizers
|
||||
tqdm
|
||||
requests
|
||||
|
||||
# Pinecone requirements
|
||||
pinecone-client
|
||||
pinecone-client==3.0.1
|
||||
tiktoken
|
||||
langchain==0.0.331
|
||||
langchain==0.1.2
|
||||
|
|
Loading…
Reference in New Issue