2023-08-16 15:09:41 -03:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
RSpec.describe DiscourseAi::Embeddings::Strategies::Truncation do
|
2023-09-05 11:08:23 -03:00
|
|
|
subject(:truncation) { described_class.new }
|
|
|
|
|
2024-12-16 09:55:39 -03:00
|
|
|
describe "#prepare_query_text" do
|
|
|
|
context "when using vector def from OpenAI" do
|
2023-08-16 15:09:41 -03:00
|
|
|
before { SiteSetting.max_post_length = 100_000 }
|
|
|
|
|
2024-03-05 16:48:28 +01:00
|
|
|
fab!(:topic)
|
2023-08-16 15:09:41 -03:00
|
|
|
fab!(:post) do
|
|
|
|
Fabricate(:post, topic: topic, raw: "Baby, bird, bird, bird\nBird is the word\n" * 500)
|
|
|
|
end
|
|
|
|
fab!(:post) do
|
|
|
|
Fabricate(
|
|
|
|
:post,
|
|
|
|
topic: topic,
|
|
|
|
raw: "Don't you know about the bird?\nEverybody knows that the bird is a word\n" * 400,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
fab!(:post) { Fabricate(:post, topic: topic, raw: "Surfin' bird\n" * 800) }
|
2025-01-21 12:23:19 -03:00
|
|
|
fab!(:open_ai_embedding_def)
|
2023-08-16 15:09:41 -03:00
|
|
|
|
|
|
|
it "truncates a topic" do
|
2025-01-21 12:23:19 -03:00
|
|
|
prepared_text = truncation.prepare_target_text(topic, open_ai_embedding_def)
|
2023-08-16 15:09:41 -03:00
|
|
|
|
2025-01-21 12:23:19 -03:00
|
|
|
expect(open_ai_embedding_def.tokenizer.size(prepared_text)).to be <=
|
|
|
|
open_ai_embedding_def.max_sequence_length
|
2023-08-16 15:09:41 -03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|