DEV: Add tests to allmpnet tokenizer (#107)

* DEV: Add tests to allmpnet tokenizer

* lint
This commit is contained in:
Rafael dos Santos Silva 2023-07-14 11:37:21 -03:00 committed by GitHub
parent 473732c18a
commit b82074850e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 0 deletions

View File

@ -83,3 +83,20 @@ describe DiscourseAi::Tokenizer::OpenAiTokenizer do
end
end
end
describe DiscourseAi::Tokenizer::AllMpnetBaseV2Tokenizer do
describe "#size" do
describe "returns a token count" do
it "for a sentence with punctuation and capitalization and numbers" do
expect(described_class.size("Hello, World! 123")).to eq(7)
end
end
end
describe "#truncate" do
it "truncates a sentence" do
sentence = "foo bar baz qux quux corge grault garply waldo fred plugh xyzzy thud"
expect(described_class.truncate(sentence, 3)).to eq("foo bar")
end
end
end