FIX: display search correctly, bug when stripping XML (#668)

- Display filtered search correctly, so it is not confusing
- When XML stripping, if a chunk was `<` it would crash
- SQL Helper improved to be better aware of Data Explorer
This commit is contained in:
Sam 2024-06-14 15:28:40 +10:00 committed by GitHub
parent f642a27f11
commit 460f5c4553
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 65 additions and 20 deletions

View File

@ -58,7 +58,18 @@ module DiscourseAi
- When generating SQL always use ```sql Markdown code blocks. - When generating SQL always use ```sql Markdown code blocks.
- When generating SQL NEVER end SQL samples with a semicolon (;). - When generating SQL NEVER end SQL samples with a semicolon (;).
Eg: - You also understand the special formatting rules for Data Explorer in Discourse.
- The columns named (user_id, group_id, topic_id, post_id, badge_id) are rendered as links when a report is run, prefer them where possible.
- You can define custom params to create flexible queries, example:
-- [params]
-- int :num = 1
-- text :name
SELECT :num, :name
- You support the types (integer, text, boolean, date)
- When generating SQL use markdown formatting for code blocks, example:
```sql ```sql
select 1 from table select 1 from table

View File

@ -4,6 +4,8 @@ module DiscourseAi
module AiBot module AiBot
module Tools module Tools
class Search < Tool class Search < Tool
attr_reader :last_query
MIN_SEMANTIC_RESULTS = 5 MIN_SEMANTIC_RESULTS = 5
class << self class << self
@ -95,40 +97,38 @@ module DiscourseAi
parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status) parameters.slice(:category, :user, :order, :max_posts, :tags, :before, :after, :status)
end end
def search_query
parameters[:search_query]
end
def invoke def invoke
search_string = search_terms = []
search_args.reduce(+parameters[:search_query].to_s) do |memo, (key, value)|
return memo if value.blank?
memo << " " << "#{key}:#{value}"
end
@last_query = search_string search_terms << options[:base_query] if options[:base_query].present?
search_terms << search_query.strip if search_query.present?
search_args.each { |key, value| search_terms << "#{key}:#{value}" if value.present? }
yield(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
if options[:base_query].present?
search_string = "#{search_string} #{options[:base_query]}"
end
safe_search_string = search_string.to_s
guardian = nil guardian = nil
if options[:search_private] && context[:user] if options[:search_private] && context[:user]
guardian = Guardian.new(context[:user]) guardian = Guardian.new(context[:user])
else else
guardian = Guardian.new guardian = Guardian.new
safe_search_string += " status:public" search_terms << "status:public"
end end
results = search_string = search_terms.join(" ").to_s
::Search.execute(safe_search_string, search_type: :full_page, guardian: guardian) @last_query = search_string
yield(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
results = ::Search.execute(search_string, search_type: :full_page, guardian: guardian)
max_results = calculate_max_results(llm) max_results = calculate_max_results(llm)
results_limit = parameters[:limit] || max_results results_limit = parameters[:limit] || max_results
results_limit = max_results if parameters[:limit].to_i > max_results results_limit = max_results if parameters[:limit].to_i > max_results
should_try_semantic_search = should_try_semantic_search =
SiteSetting.ai_embeddings_semantic_search_enabled && parameters[:search_query].present? SiteSetting.ai_embeddings_semantic_search_enabled && search_query.present?
max_semantic_results = max_results / 4 max_semantic_results = max_results / 4
results_limit = results_limit - max_semantic_results if should_try_semantic_search results_limit = results_limit - max_semantic_results if should_try_semantic_search

View File

@ -17,6 +17,7 @@ module DiscourseAi
end end
end end
@parsed.concat(parse_tags(text)) @parsed.concat(parse_tags(text))
@parsed, result = process_parsed(@parsed) @parsed, result = process_parsed(@parsed)
result result
end end
@ -69,10 +70,15 @@ module DiscourseAi
while true while true
before, after = text.split("<", 2) before, after = text.split("<", 2)
parsed << { type: :text, content: before } parsed << { type: :text, content: before } if before && !before.empty?
break if after.nil? break if after.nil?
if before.empty? && after.empty?
parsed << { type: :maybe_tag, content: "<" }
break
end
tag, after = after.split(">", 2) tag, after = after.split(">", 2)
is_end_tag = tag[0] == "/" is_end_tag = tag[0] == "/"

View File

@ -29,6 +29,32 @@ describe DiscourseAi::Completions::PromptMessagesBuilder do
expect(result).to eq("\nhello\n") expect(result).to eq("\nhello\n")
end end
it "does not crash when we send a <" do
result = +""
result << (tag_stripper << "based:\n")
result << (tag_stripper << "<").to_s
result << (tag_stripper << " href")
result << (tag_stripper << ">")
result << (tag_stripper << "test ")
expect(result).to eq("based:\n< href>test ")
end
it "strips thinking correctly in a stream" do
result = +""
result << (tag_stripper << "hello")
result << (tag_stripper << "<").to_s
result << (tag_stripper << "thinking").to_s
result << (tag_stripper << ">").to_s
result << (tag_stripper << "test").to_s
result << (tag_stripper << "<").to_s
result << (tag_stripper << "/").to_s
result << (tag_stripper << "thinking").to_s
result << (tag_stripper << "> world")
expect(result).to eq("hello world")
end
it "works when nesting unrelated tags it strips correctly" do it "works when nesting unrelated tags it strips correctly" do
text = <<~TEXT text = <<~TEXT
<thinking> <thinking>

View File

@ -73,6 +73,8 @@ RSpec.describe DiscourseAi::AiBot::Tools::Search do
results = search.invoke(&progress_blk) results = search.invoke(&progress_blk)
expect(results[:rows].length).to eq(1) expect(results[:rows].length).to eq(1)
expect(search.last_query).to eq("#funny order:latest")
GroupUser.create!(group: group, user: user) GroupUser.create!(group: group, user: user)
results = search.invoke(&progress_blk) results = search.invoke(&progress_blk)