Fixes for embeddings and truncate (#67)
This commit is contained in:
parent
9ae8f86850
commit
739b314312
|
@ -14,6 +14,9 @@ module DiscourseAi
|
||||||
tokenize(text).size
|
tokenize(text).size
|
||||||
end
|
end
|
||||||
def self.truncate(text, max_length)
|
def self.truncate(text, max_length)
|
||||||
|
# Fast track the common case where the text is already short enough.
|
||||||
|
return text if text.size < max_length
|
||||||
|
|
||||||
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
|
tokenizer.decode(tokenizer.encode(text).ids.take(max_length))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -42,6 +45,9 @@ module DiscourseAi
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.truncate(text, max_length)
|
def self.truncate(text, max_length)
|
||||||
|
# Fast track the common case where the text is already short enough.
|
||||||
|
return text if text.size < max_length
|
||||||
|
|
||||||
tokenizer.decode(tokenize(text).take(max_length))
|
tokenizer.decode(tokenize(text).take(max_length))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -17,7 +17,7 @@ task "ai:embeddings:create_table" => [:environment] do
|
||||||
end
|
end
|
||||||
|
|
||||||
desc "Backfill embeddings for all topics"
|
desc "Backfill embeddings for all topics"
|
||||||
task "ai:embeddings:backfill", [:start_topic] => [:environment] do
|
task "ai:embeddings:backfill", [:start_topic] => [:environment] do |_, args|
|
||||||
public_categories = Category.where(read_restricted: false).pluck(:id)
|
public_categories = Category.where(read_restricted: false).pluck(:id)
|
||||||
topic_embeddings = DiscourseAi::Embeddings::Topic.new
|
topic_embeddings = DiscourseAi::Embeddings::Topic.new
|
||||||
Topic
|
Topic
|
||||||
|
|
Loading…
Reference in New Issue