Fixes for embeddings and truncate (#67)
This commit is contained in:
parent
9ae8f86850
commit
739b314312
|
@ -14,6 +14,9 @@ module DiscourseAi
|
|||
tokenize(text).size
|
||||
end
|
||||
def self.truncate(text, max_length)
|
||||
# Fast track the common case where the text is already short enough.
|
||||
return text if text.size < max_length
|
||||
|
||||
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
|
||||
end
|
||||
end
|
||||
|
@ -42,6 +45,9 @@ module DiscourseAi
|
|||
end
|
||||
|
||||
def self.truncate(text, max_length)
|
||||
# Fast track the common case where the text is already short enough.
|
||||
return text if text.size < max_length
|
||||
|
||||
tokenizer.decode(tokenize(text).take(max_length))
|
||||
end
|
||||
end
|
||||
|
|
|
@ -17,7 +17,7 @@ task "ai:embeddings:create_table" => [:environment] do
|
|||
end
|
||||
|
||||
desc "Backfill embeddings for all topics"
|
||||
task "ai:embeddings:backfill", [:start_topic] => [:environment] do
|
||||
task "ai:embeddings:backfill", [:start_topic] => [:environment] do |_, args|
|
||||
public_categories = Category.where(read_restricted: false).pluck(:id)
|
||||
topic_embeddings = DiscourseAi::Embeddings::Topic.new
|
||||
Topic
|
||||
|
|
Loading…
Reference in New Issue