discourse/lib/search.rb

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1255 lines
36 KiB
Ruby
Raw Normal View History

# frozen_string_literal: true
class Search
DIACRITICS ||= /([\u0300-\u036f]|[\u1AB0-\u1AFF]|[\u1DC0-\u1DFF]|[\u20D0-\u20FF])/
HIGHLIGHT_CSS_CLASS = 'search-highlight'
2013-02-05 14:16:51 -05:00
cattr_accessor :preloaded_topic_custom_fields
self.preloaded_topic_custom_fields = Set.new
2013-02-05 14:16:51 -05:00
def self.per_facet
5
end
def self.strip_diacritics(str)
s = str.unicode_normalize(:nfkd)
s.gsub!(DIACRITICS, "")
s.strip!
s
end
def self.per_filter
50
end
2013-02-05 14:16:51 -05:00
def self.facets
%w(topic category user private_messages tags all_topics)
2013-02-05 14:16:51 -05:00
end
def self.ts_config(locale = SiteSetting.default_locale)
# if adding a text search configuration, you should check PG beforehand:
# SELECT cfgname FROM pg_ts_config;
# As an aside, dictionaries can be listed by `\dFd`, the
# physical locations are in /usr/share/postgresql/<version>/tsearch_data.
# But it may not appear there based on pg extension configuration.
# base docker config
#
case locale.split("_")[0].to_sym
when :da then 'danish'
when :nl then 'dutch'
when :en then 'english'
when :fi then 'finnish'
when :fr then 'french'
when :de then 'german'
when :hu then 'hungarian'
when :it then 'italian'
when :nb then 'norwegian'
when :pt then 'portuguese'
when :ro then 'romanian'
when :ru then 'russian'
when :es then 'spanish'
when :sv then 'swedish'
when :tr then 'turkish'
else 'simple' # use the 'simple' stemmer for other languages
end
end
def self.prepare_data(search_data, purpose = :query)
purpose ||= :query
data = search_data.dup
data.force_encoding("UTF-8")
if purpose != :topic
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
# Korean appears to be safe cause words are already space seperated
# For Japanese we should investigate using kakasi
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
require 'cppjieba_rb' unless defined? CppjiebaRb
mode = (purpose == :query ? :query : :mix)
data = CppjiebaRb.segment(search_data, mode: mode)
# TODO: we still want to tokenize here but the current stopword list is too wide
# in cppjieba leading to words such as volume to be skipped. PG already has an English
# stopword list so use that vs relying on cppjieba
if ts_config != 'english'
data = CppjiebaRb.filter_stop_word(data)
else
data = data.filter { |s| s.present? }
end
data = data.join(' ')
else
data.squish!
end
if SiteSetting.search_ignore_accents
data = strip_diacritics(data)
end
end
data.gsub!(EmailCook.url_regexp) do |url|
uri = URI.parse(url)
uri.query = nil
uri.to_s
rescue URI::Error
# Don't fail even if URL turns out to be invalid
end
data
end
def self.word_to_date(str)
if str =~ /^[0-9]{1,3}$/
return Time.zone.now.beginning_of_day.days_ago(str.to_i)
end
if str =~ /^([12][0-9]{3})(-([0-1]?[0-9]))?(-([0-3]?[0-9]))?$/
year = $1.to_i
month = $2 ? $3.to_i : 1
day = $4 ? $5.to_i : 1
return if day == 0 || month == 0 || day > 31 || month > 12
2018-05-06 23:38:04 -04:00
return begin
Time.zone.parse("#{year}-#{month}-#{day}")
rescue ArgumentError
end
end
if str.downcase == "yesterday"
return Time.zone.now.beginning_of_day.yesterday
end
titlecase = str.downcase.titlecase
if Date::DAYNAMES.include?(titlecase)
return Time.zone.now.beginning_of_week(str.downcase.to_sym)
end
if idx = (Date::MONTHNAMES.find_index(titlecase) ||
Date::ABBR_MONTHNAMES.find_index(titlecase))
delta = Time.zone.now.month - idx
delta += 12 if delta < 0
Time.zone.now.beginning_of_month.months_ago(delta)
end
end
def self.min_post_id_no_cache
return 0 unless SiteSetting.search_prefer_recent_posts?
2016-08-12 00:36:38 -04:00
offset, has_more = Post.unscoped
.order('id desc')
.offset(SiteSetting.search_recent_posts_size - 1)
.limit(2)
.pluck(:id)
has_more ? offset : 0
end
def self.min_post_id(opts = nil)
return 0 unless SiteSetting.search_prefer_recent_posts?
# It can be quite slow to count all the posts so let's cache it
Discourse.cache.fetch("search-min-post-id:#{SiteSetting.search_recent_posts_size}", expires_in: 1.week) do
min_post_id_no_cache
end
end
attr_accessor :term
attr_reader :clean_term
def initialize(term, opts = nil)
@opts = opts || {}
@guardian = @opts[:guardian] || Guardian.new
@search_context = @opts[:search_context]
@blurb_length = @opts[:blurb_length]
@valid = true
@page = @opts[:page]
term = term.to_s.dup
# Removes any zero-width characters from search terms
term.gsub!(/[\u200B-\u200D\uFEFF]/, '')
# Replace curly quotes to regular quotes
term.gsub!(/[\u201c\u201d]/, '"')
@clean_term = term
@in_title = false
term = process_advanced_search!(term)
if term.present?
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
@original_term = PG::Connection.escape_string(@term)
end
if @search_pms || @opts[:type_filter] == 'private_messages'
@opts[:type_filter] = "private_messages"
@search_context ||= @guardian.user
unless @search_context.present? && @guardian.can_see_private_messages?(@search_context.id)
raise Discourse::InvalidAccess.new
end
end
if @search_all_topics && @guardian.user
@opts[:type_filter] = "all_topics"
end
@results = GroupedSearchResults.new(
type_filter: @opts[:type_filter],
term: clean_term,
blurb_term: term,
search_context: @search_context,
blurb_length: @blurb_length
)
end
def limit
if @opts[:type_filter].present?
Search.per_filter + 1
else
Search.per_facet + 1
end
end
def offset
if @page && @opts[:type_filter].present?
(@page - 1) * Search.per_filter
else
0
end
end
def valid?
@valid
end
def self.execute(term, opts = nil)
self.new(term, opts).execute
2013-05-13 17:04:41 -04:00
end
# Query a term
def execute(readonly_mode: Discourse.readonly_mode?)
if SiteSetting.log_search_queries? && @opts[:search_type].present? && !readonly_mode
status, search_log_id = SearchLog.log(
2017-07-13 13:34:31 -04:00
term: @term,
search_type: @opts[:search_type],
ip_address: @opts[:ip_address],
user_id: @opts[:user_id]
)
@results.search_log_id = search_log_id unless status == :error
2017-07-13 13:34:31 -04:00
end
unless @filters.present? || @opts[:search_for_id]
min_length = @opts[:min_search_term_length] || SiteSetting.min_search_term_length
terms = (@term || '').split(/\s(?=(?:[^"]|"[^"]*")*$)/).reject { |t| t.length < min_length }
if terms.blank?
@term = ''
@valid = false
return
end
end
2013-05-13 17:04:41 -04:00
# If the term is a number or url to a topic, just include that topic
if @opts[:search_for_id] && ['topic', 'private_messages', 'all_topics'].include?(@results.type_filter)
if @term =~ /^\d+$/
single_topic(@term.to_i)
else
begin
if route = Discourse.route_for(@term)
if route[:controller] == "topics" && route[:action] == "show"
topic_id = (route[:id] || route[:topic_id]).to_i
single_topic(topic_id) if topic_id > 0
end
end
rescue ActionController::RoutingError
end
2013-05-13 17:04:41 -04:00
end
end
find_grouped_results if @results.posts.blank?
if preloaded_topic_custom_fields.present? && @results.posts.present?
topics = @results.posts.map(&:topic)
Topic.preload_custom_fields(topics, preloaded_topic_custom_fields)
end
@results
2013-05-13 17:04:41 -04:00
end
2013-02-05 14:16:51 -05:00
def self.advanced_order(trigger, &block)
(@advanced_orders ||= {})[trigger] = block
end
def self.advanced_orders
@advanced_orders
end
def self.advanced_filter(trigger, &block)
(@advanced_filters ||= {})[trigger] = block
end
def self.advanced_filters
@advanced_filters
end
def self.custom_topic_eager_load(tables = nil, &block)
(@custom_topic_eager_loads ||= []) << (tables || block)
end
def self.custom_topic_eager_loads
Array.wrap(@custom_topic_eager_loads)
end
advanced_filter(/^in:personal-direct$/) do |posts|
if @guardian.user
posts
.joins("LEFT JOIN topic_allowed_groups tg ON posts.topic_id = tg.topic_id")
.where(<<~SQL, user_id: @guardian.user.id)
tg.id IS NULL
AND posts.topic_id IN (
SELECT tau.topic_id
FROM topic_allowed_users tau
JOIN topic_allowed_users tau2
ON tau2.topic_id = tau.topic_id
AND tau2.id != tau.id
WHERE tau.user_id = :user_id
GROUP BY tau.topic_id
HAVING COUNT(*) = 1
)
SQL
end
end
advanced_filter(/^in:tagged$/) do |posts|
posts
.where('EXISTS (SELECT 1 FROM topic_tags WHERE topic_tags.topic_id = posts.topic_id)')
end
advanced_filter(/^in:untagged$/) do |posts|
posts
.joins("LEFT JOIN topic_tags ON
topic_tags.topic_id = posts.topic_id")
.where("topic_tags.id IS NULL")
end
advanced_filter(/^status:open$/) do |posts|
posts.where('NOT topics.closed AND NOT topics.archived')
end
advanced_filter(/^status:closed$/) do |posts|
posts.where('topics.closed')
end
advanced_filter(/^status:public$/) do |posts|
category_ids = Category
.where(read_restricted: false)
.pluck(:id)
posts.where("topics.category_id in (?)", category_ids)
end
advanced_filter(/^status:archived$/) do |posts|
posts.where('topics.archived')
end
advanced_filter(/^status:noreplies$/) do |posts|
posts.where("topics.posts_count = 1")
end
advanced_filter(/^status:single_user$/) do |posts|
posts.where("topics.participant_count = 1")
end
advanced_filter(/^posts_count:(\d+)$/) do |posts, match|
posts.where("topics.posts_count = ?", match.to_i)
end
advanced_filter(/^min_post_count:(\d+)$/) do |posts, match|
posts.where("topics.posts_count >= ?", match.to_i)
end
advanced_filter(/^in:first|^f$/) do |posts|
posts.where("posts.post_number = 1")
end
advanced_filter(/^in:pinned$/) do |posts|
posts.where("topics.pinned_at IS NOT NULL")
end
advanced_filter(/^in:wiki$/) do |posts, match|
2016-05-13 04:26:53 -04:00
posts.where(wiki: true)
end
advanced_filter(/^badge:(.*)$/) do |posts, match|
badge_id = Badge.where('name ilike ? OR id = ?', match, match.to_i).pluck_first(:id)
if badge_id
posts.where('posts.user_id IN (SELECT ub.user_id FROM user_badges ub WHERE ub.badge_id = ?)', badge_id)
else
posts.where("1 = 0")
end
end
def post_action_type_filter(posts, post_action_type)
posts.where("posts.id IN (
SELECT pa.post_id FROM post_actions pa
WHERE pa.user_id = #{@guardian.user.id} AND
pa.post_action_type_id = #{post_action_type} AND
deleted_at IS NULL
)")
end
advanced_filter(/^in:(likes)$/) do |posts, match|
if @guardian.user
post_action_type_filter(posts, PostActionType.types[:like])
end
end
advanced_filter(/^in:(bookmarks)$/) do |posts, match|
if @guardian.user
posts.where("posts.id IN (SELECT post_id FROM bookmarks WHERE bookmarks.user_id = #{@guardian.user.id})")
end
end
advanced_filter(/^in:posted$/) do |posts|
posts.where("posts.user_id = #{@guardian.user.id}") if @guardian.user
end
advanced_filter(/^in:created$/) do |posts|
posts.where(user_id: @guardian.user.id, post_number: 1) if @guardian.user
end
advanced_filter(/^created:@(.*)$/) do |posts, match|
user_id = User.where(username: match.downcase).pluck_first(:id)
posts.where(user_id: user_id, post_number: 1)
end
advanced_filter(/^in:(watching|tracking)$/) do |posts, match|
if @guardian.user
level = TopicUser.notification_levels[match.to_sym]
posts.where("posts.topic_id IN (
SELECT tu.topic_id FROM topic_users tu
WHERE tu.user_id = :user_id AND
tu.notification_level >= :level
)", user_id: @guardian.user.id, level: level)
end
end
advanced_filter(/^in:seen$/) do |posts|
if @guardian.user
posts
.joins("INNER JOIN post_timings ON
post_timings.topic_id = posts.topic_id
AND post_timings.post_number = posts.post_number
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
")
end
end
advanced_filter(/^in:unseen$/) do |posts|
if @guardian.user
posts
.joins("LEFT JOIN post_timings ON
post_timings.topic_id = posts.topic_id
AND post_timings.post_number = posts.post_number
AND post_timings.user_id = #{ActiveRecord::Base.connection.quote(@guardian.user.id)}
")
.where("post_timings.user_id IS NULL")
end
end
advanced_filter(/^with:images$/) do |posts|
FEATURE: Include optimized thumbnails for topics (#9215) This introduces new APIs for obtaining optimized thumbnails for topics. There are a few building blocks required for this: - Introduces new `image_upload_id` columns on the `posts` and `topics` table. This replaces the old `image_url` column, which means that thumbnails are now restricted to uploads. Hotlinked thumbnails are no longer possible. In normal use (with pull_hotlinked_images enabled), this has no noticeable impact - A migration attempts to match existing urls to upload records. If a match cannot be found then the posts will be queued for rebake - Optimized thumbnails are generated during post_process_cooked. If thumbnails are missing when serializing a topic list, then a sidekiq job is queued - Topic lists and topics now include a `thumbnails` key, which includes all the available images: ``` "thumbnails": [ { "max_width": null, "max_height": null, "url": "//example.com/original-image.png", "width": 1380, "height": 1840 }, { "max_width": 1024, "max_height": 1024, "url": "//example.com/optimized-image.png", "width": 768, "height": 1024 } ] ``` - Themes can request additional thumbnail sizes by using a modifier in their `about.json` file: ``` "modifiers": { "topic_thumbnail_sizes": [ [200, 200], [800, 800] ], ... ``` Remember that these are generated asynchronously, so your theme should include logic to fallback to other available thumbnails if your requested size has not yet been generated - Two new raw plugin outlets are introduced, to improve the customisability of the topic list. `topic-list-before-columns` and `topic-list-before-link`
2020-05-05 04:07:50 -04:00
posts.where("posts.image_upload_id IS NOT NULL")
end
advanced_filter(/^category:(.+)$/) do |posts, match|
exact = false
if match[0] == "="
exact = true
match = match[1..-1]
end
category_ids = Category.where('slug ilike ? OR name ilike ? OR id = ?',
match, match, match.to_i).pluck(:id)
if category_ids.present?
unless exact
category_ids +=
Category.where('parent_category_id = ?', category_ids.first).pluck(:id)
end
@category_filter_matched ||= true
posts.where("topics.category_id IN (?)", category_ids)
else
posts.where("1 = 0")
end
end
advanced_filter(/^\#([\p{L}\p{M}0-9\-:=]+)$/) do |posts, match|
exact = true
2019-03-13 03:23:01 -04:00
category_slug, subcategory_slug = match.to_s.split(":")
next unless category_slug
2019-03-13 03:23:01 -04:00
if subcategory_slug
category_id, _ = DB.query_single(<<~SQL, category_slug.downcase, subcategory_slug.downcase)
SELECT sub.id
FROM categories sub
JOIN categories c ON sub.parent_category_id = c.id
WHERE LOWER(c.slug) = ? AND LOWER(sub.slug) = ?
ORDER BY c.id
LIMIT 1
SQL
else
# main category
2019-03-13 03:23:01 -04:00
if category_slug[0] == "="
category_slug = category_slug[1..-1]
else
exact = false
end
2019-03-13 03:23:01 -04:00
category_id = Category.where("lower(slug) = ?", category_slug.downcase)
.order('case when parent_category_id is null then 0 else 1 end')
.pluck(:id)
.first
end
if category_id
category_ids = [category_id]
unless exact
category_ids +=
Category.where('parent_category_id = ?', category_id).pluck(:id)
end
@category_filter_matched ||= true
posts.where("topics.category_id IN (?)", category_ids)
else
# try a possible tag match
tag_id = Tag.where_name(category_slug).pluck_first(:id)
if (tag_id)
posts.where(<<~SQL, tag_id)
topics.id IN (
SELECT DISTINCT(tt.topic_id)
FROM topic_tags tt
WHERE tt.tag_id = ?
)
SQL
else
if tag_group_id = TagGroup.find_id_by_slug(category_slug)
posts.where(<<~SQL, tag_group_id)
topics.id IN (
SELECT DISTINCT(tt.topic_id)
FROM topic_tags tt
WHERE tt.tag_id in (
SELECT tag_id
FROM tag_group_memberships
WHERE tag_group_id = ?
)
)
SQL
# a bit yucky but we got to add the term back in
elsif match.to_s.length >= SiteSetting.min_search_term_length
posts.where <<~SQL
posts.id IN (
SELECT post_id FROM post_search_data pd1
WHERE pd1.search_data @@ #{Search.ts_query(term: "##{match}")})
SQL
end
end
end
end
advanced_filter(/^group:(.+)$/) do |posts, match|
group_id = Group.where('name ilike ? OR (id = ? AND id > 0)', match, match.to_i).pluck_first(:id)
if group_id
posts.where("posts.user_id IN (select gu.user_id from group_users gu where gu.group_id = ?)", group_id)
else
posts.where("1 = 0")
end
end
advanced_filter(/^user:(.+)$/) do |posts, match|
user_id = User.where(staged: false).where('username_lower = ? OR id = ?', match.downcase, match.to_i).pluck_first(:id)
if user_id
posts.where("posts.user_id = #{user_id}")
else
posts.where("1 = 0")
end
end
advanced_filter(/^\@([a-zA-Z0-9_\-.]+)$/) do |posts, match|
user_id = User.where(staged: false).where(username_lower: match.downcase).pluck_first(:id)
2016-05-12 04:43:44 -04:00
if user_id
posts.where("posts.user_id = #{user_id}")
else
posts.where("1 = 0")
end
end
advanced_filter(/^before:(.*)$/) do |posts, match|
if date = Search.word_to_date(match)
posts.where("posts.created_at < ?", date)
else
posts
end
end
advanced_filter(/^after:(.*)$/) do |posts, match|
if date = Search.word_to_date(match)
posts.where("posts.created_at > ?", date)
else
posts
end
end
advanced_filter(/^tags?:([\p{L}\p{M}0-9,\-_+]+)$/) do |posts, match|
search_tags(posts, match, positive: true)
end
advanced_filter(/^\-tags?:([\p{L}\p{M}0-9,\-_+]+)$/) do |posts, match|
search_tags(posts, match, positive: false)
end
advanced_filter(/^filetypes?:([a-zA-Z0-9,\-_]+)$/) do |posts, match|
file_extensions = match.split(",").map(&:downcase)
posts.where("posts.id IN (
2018-08-03 19:56:26 -04:00
SELECT post_id
FROM topic_links
WHERE extension IN (:file_extensions)
UNION
2018-08-03 19:56:26 -04:00
SELECT post_uploads.post_id
FROM uploads
JOIN post_uploads ON post_uploads.upload_id = uploads.id
WHERE lower(uploads.extension) IN (:file_extensions)
)", file_extensions: file_extensions)
end
private
2013-02-05 14:16:51 -05:00
def search_tags(posts, match, positive:)
return if match.nil?
match.downcase!
modifier = positive ? "" : "NOT"
if match.include?('+')
tags = match.split('+')
posts.where("topics.id #{modifier} IN (
SELECT tt.topic_id
FROM topic_tags tt, tags
WHERE tt.tag_id = tags.id
GROUP BY tt.topic_id
HAVING to_tsvector(#{default_ts_config}, array_to_string(array_agg(lower(tags.name)), ' ')) @@ to_tsquery(#{default_ts_config}, ?)
)", tags.join('&'))
else
tags = match.split(",")
posts.where("topics.id #{modifier} IN (
SELECT DISTINCT(tt.topic_id)
FROM topic_tags tt, tags
WHERE tt.tag_id = tags.id AND lower(tags.name) IN (?)
)", tags)
end
end
def process_advanced_search!(term)
term.to_s.scan(/(([^" \t\n\x0B\f\r]+)?(("[^"]+")?))/).to_a.map do |(word, _)|
next if word.blank?
found = false
Search.advanced_filters.each do |matcher, block|
cleaned = word.gsub(/["']/, "")
if cleaned =~ matcher
(@filters ||= []) << [block, $1]
found = true
end
2018-06-07 01:28:18 -04:00
end
if word == 'l'
@order = :latest
nil
elsif word =~ /order:\w+/
@order = word.gsub('order:', '').to_sym
nil
elsif word == 'in:title' || word == 't'
@in_title = true
nil
elsif word =~ /topic:(\d+)/
topic_id = $1.to_i
if topic_id > 1
topic = Topic.find_by(id: topic_id)
if @guardian.can_see?(topic)
@search_context = topic
end
end
2018-06-07 01:28:18 -04:00
nil
elsif word == 'in:all'
@search_all_topics = true
nil
elsif word == 'in:personal'
@search_pms = true
nil
elsif word == "in:personal-direct"
@search_pms = true
2018-06-07 01:28:18 -04:00
nil
2020-08-24 00:38:46 -04:00
elsif word =~ /^personal_messages:(.+)$/
if user = User.find_by_username($1)
@search_pms = true
@search_context = user
end
2018-06-07 01:28:18 -04:00
nil
else
found ? nil : word
2018-06-07 01:28:18 -04:00
end
end.compact.join(' ')
end
def find_grouped_results
if @results.type_filter.present?
raise Discourse::InvalidAccess.new("invalid type filter") unless Search.facets.include?(@results.type_filter)
# calling protected methods
send("#{@results.type_filter}_search")
else
unless @search_context
user_search if @term.present?
category_search if @term.present?
2017-08-25 11:52:18 -04:00
tags_search if @term.present?
groups_search if @term.present?
end
topic_search
2018-06-07 01:28:18 -04:00
end
2013-02-05 14:16:51 -05:00
@results
rescue ActiveRecord::StatementInvalid
# In the event of a PG:Error return nothing, it is likely they used a foreign language whose
# locale is not supported by postgres
end
2018-06-07 01:28:18 -04:00
# If we're searching for a single topic
def single_topic(id)
if @opts[:restrict_to_archetype].present?
archetype = @opts[:restrict_to_archetype] == Archetype.default ? Archetype.default : Archetype.private_message
post = posts_scope
.joins(:topic)
.find_by(
"topics.id = :id AND topics.archetype = :archetype AND posts.post_number = 1",
id: id,
archetype: archetype
)
else
post = posts_scope.find_by(topic_id: id, post_number: 1)
end
return nil unless @guardian.can_see?(post)
2013-05-13 17:04:41 -04:00
@results.add(post)
@results
end
def secure_category_ids
return @secure_category_ids unless @secure_category_ids.nil?
@secure_category_ids = @guardian.secure_category_ids
end
2013-02-05 14:16:51 -05:00
def category_search
2013-12-13 03:00:48 -05:00
# scope is leaking onto Category, this is not good and probably a bug in Rails
# the secure_category_ids will invoke the same method on User, it calls Category.where
# however the scope from the query below is leaking in to Category, this works around
# the issue while we figure out what is up in Rails
secure_category_ids
categories = Category.includes(:category_search_data)
.where("category_search_data.search_data @@ #{ts_query}")
2013-08-25 17:18:11 -04:00
.references(:category_search_data)
.order("topics_month DESC")
.secured(@guardian)
2017-07-31 20:28:16 -04:00
.limit(limit)
2013-05-13 17:04:41 -04:00
categories.each do |category|
@results.add(category)
end
2018-06-07 01:28:18 -04:00
end
def user_search
return if SiteSetting.hide_user_profiles_from_public && !@guardian.user
users = User.includes(:user_search_data)
.references(:user_search_data)
2015-11-18 15:06:59 -05:00
.where(active: true)
.where(staged: false)
.where("user_search_data.search_data @@ #{ts_query("simple")}")
.order("CASE WHEN username_lower = '#{@original_term.downcase}' THEN 0 ELSE 1 END")
.order("last_posted_at DESC")
2017-07-31 20:28:16 -04:00
.limit(limit)
users.each do |user|
@results.add(user)
end
2018-06-07 01:28:18 -04:00
end
def groups_search
groups = Group
.visible_groups(@guardian.user, "name ASC", include_everyone: false)
.where("name ILIKE :term OR full_name ILIKE :term", term: "%#{@term}%")
groups.each { |group| @results.add(group) }
end
2017-08-25 11:52:18 -04:00
def tags_search
return unless SiteSetting.tagging_enabled
tags = Tag.includes(:tag_search_data)
.where("tag_search_data.search_data @@ #{ts_query}")
.references(:tag_search_data)
.order("name asc")
.limit(limit)
hidden_tag_names = DiscourseTagging.hidden_tag_names(@guardian)
2017-08-25 11:52:18 -04:00
tags.each do |tag|
@results.add(tag) if !hidden_tag_names.include?(tag.name)
2017-08-25 11:52:18 -04:00
end
2018-06-07 01:28:18 -04:00
end
2017-08-25 11:52:18 -04:00
PHRASE_MATCH_REGEXP_PATTERN = '"([^"]+)"'
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
def posts_query(limit, type_filter: nil, aggregate_search: false)
posts = Post.where(post_type: Topic.visible_post_types(@guardian.user))
.joins(:post_search_data, :topic)
.joins("LEFT JOIN categories ON categories.id = topics.category_id")
is_topic_search = @search_context.present? && @search_context.is_a?(Topic)
posts = posts.where("topics.visible") unless is_topic_search
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if type_filter === "private_messages" || (is_topic_search && @search_context.private_message?)
posts = posts
.where(
"topics.archetype = ? AND post_search_data.private_message",
Archetype.private_message
)
unless @guardian.is_admin?
posts = posts.private_posts_for_user(@guardian.user)
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
elsif type_filter === "all_topics"
private_posts = posts
.where(
"topics.archetype = ? AND post_search_data.private_message",
Archetype.private_message
)
.private_posts_for_user(@guardian.user)
posts = posts
.where(
"topics.archetype <> ? AND NOT post_search_data.private_message",
Archetype.private_message
)
.or(private_posts)
else
posts = posts.where(
"topics.archetype <> ? AND NOT post_search_data.private_message",
Archetype.private_message
)
2018-06-07 01:28:18 -04:00
end
if @term.present?
if is_topic_search
term_without_quote = @term
if @term =~ /"(.+)"/
term_without_quote = $1
end
if @term =~ /'(.+)'/
term_without_quote = $1
end
posts = posts.joins('JOIN users u ON u.id = posts.user_id')
posts = posts.where("posts.raw || ' ' || u.username || ' ' || COALESCE(u.name, '') ilike ?", "%#{term_without_quote}%")
2018-06-07 01:28:18 -04:00
else
# A is for title
# B is for category
# C is for tags
# D is for cooked
weights = @in_title ? 'A' : (SiteSetting.tagging_enabled ? 'ABCD' : 'ABD')
posts = posts.where(post_number: 1) if @in_title
posts = posts.where("post_search_data.search_data @@ #{ts_query(weight_filter: weights)}")
exact_terms = @term.scan(Regexp.new(PHRASE_MATCH_REGEXP_PATTERN)).flatten
exact_terms.each do |exact|
posts = posts.where("posts.raw ilike :exact OR topics.title ilike :exact", exact: "%#{exact}%")
end
end
2018-06-07 01:28:18 -04:00
end
@filters.each do |block, match|
if block.arity == 1
posts = instance_exec(posts, &block) || posts
else
posts = instance_exec(posts, match, &block) || posts
2018-06-07 01:28:18 -04:00
end
end if @filters
# If we have a search context, prioritize those posts first
posts =
if @search_context.present?
if @search_context.is_a?(User)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if type_filter === "private_messages"
@guardian.is_admin? ? posts.private_posts_for_user(@search_context) : posts
else
posts.where("posts.user_id = #{@search_context.id}")
end
elsif @search_context.is_a?(Category)
category_ids = Category
.where(parent_category_id: @search_context.id)
.pluck(:id)
.push(@search_context.id)
posts.where("topics.category_id in (?)", category_ids)
elsif @search_context.is_a?(Topic)
posts.where("topics.id = #{@search_context.id}")
.order("posts.post_number #{@order == :latest ? "DESC" : ""}")
elsif @search_context.is_a?(Tag)
posts = posts
.joins("LEFT JOIN topic_tags ON topic_tags.topic_id = topics.id")
.joins("LEFT JOIN tags ON tags.id = topic_tags.tag_id")
posts.where("tags.id = #{@search_context.id}")
end
else
posts = categories_ignored(posts) unless @category_filter_matched
posts
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if aggregate_search
aggregate_relation = Post.unscoped
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
.select("subquery.topic_id id")
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
.group("subquery.topic_id")
posts = posts.select(posts.arel.projections)
end
if @order == :latest
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
posts = posts.reorder("posts.created_at DESC")
if aggregate_search
aggregate_relation = aggregate_relation
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
.select(
"MAX(subquery.post_number) post_number",
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
"MAX(subquery.created_at) created_at"
)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
.order("created_at DESC")
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
elsif @order == :latest_topic
posts = posts.order("topics.created_at DESC")
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if aggregate_search
posts = posts.select("topics.created_at topic_created_at")
aggregate_relation = aggregate_relation
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
.select(
"MIN(subquery.post_number) post_number",
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
"MAX(subquery.topic_created_at) topic_created_at"
)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
.order("topic_created_at DESC")
end
elsif @order == :views
posts = posts.order("topics.views DESC")
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if aggregate_search
posts = posts.select("topics.views topic_views")
aggregate_relation = aggregate_relation
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
.select(
"MIN(subquery.post_number) post_number",
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
"MAX(subquery.topic_views) topic_views"
)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
.order("topic_views DESC")
end
elsif @order == :likes
posts = posts.order("posts.like_count DESC")
else
rank = <<~SQL
TS_RANK_CD(
post_search_data.search_data,
#{@term.blank? ? '' : ts_query(weight_filter: weights)},
#{SiteSetting.search_ranking_normalization}|32
)
SQL
category_priority_weights = <<~SQL
(
CASE categories.search_priority
WHEN #{Searchable::PRIORITIES[:very_low]}
THEN #{SiteSetting.category_search_priority_very_low_weight}
WHEN #{Searchable::PRIORITIES[:low]}
THEN #{SiteSetting.category_search_priority_low_weight}
WHEN #{Searchable::PRIORITIES[:high]}
THEN #{SiteSetting.category_search_priority_high_weight}
WHEN #{Searchable::PRIORITIES[:very_high]}
THEN #{SiteSetting.category_search_priority_very_high_weight}
ELSE
CASE WHEN topics.closed
THEN 0.9
ELSE 1
END
END
)
SQL
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
data_ranking =
if @term.blank?
"(#{category_priority_weights})"
else
"(#{rank} * #{category_priority_weights})"
end
if aggregate_search
posts = posts.select("#{data_ranking} rank", "topics.bumped_at topic_bumped_at")
.order("rank DESC", "topic_bumped_at DESC")
aggregate_relation = aggregate_relation
FIX: Search by relevance may return incorrect post number. Follow up to d8c796bc4. Note that his change increases query time by around 40% in the following benchmark against `dev.discourse.org` but this is a tradeoff that has to be taken so that relevance search is accurate. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number ORDER BY rank DESC, bumped_at DESC))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 18.040 (± 0.0%) i/s - 181.000 in 10.035241s current aggregate search query with proper ranking 12.992 (± 0.0%) i/s - 130.000 in 10.007214s Comparison: current aggregate search query: 18.0 i/s current aggregate search query with proper ranking: 13.0 i/s - 1.39x (± 0.00) slower ```
2020-07-14 23:41:45 -04:00
.select(
"(ARRAY_AGG(subquery.post_number ORDER BY subquery.rank DESC, subquery.topic_bumped_at DESC))[1] post_number",
"MAX(subquery.rank) rank", "MAX(subquery.topic_bumped_at) topic_bumped_at"
)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
.order("rank DESC", "topic_bumped_at DESC")
2018-06-07 01:28:18 -04:00
else
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
posts = posts.order("#{data_ranking} DESC", "topics.bumped_at DESC")
2018-06-07 01:28:18 -04:00
end
end
if secure_category_ids.present?
posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted) OR (categories.id IN (?))", secure_category_ids).references(:categories)
else
posts = posts.where("(categories.id IS NULL) OR (NOT categories.read_restricted)").references(:categories)
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if aggregate_search
posts = yield(posts) if block_given?
posts = aggregate_relation.from(posts)
end
if @order
advanced_order = Search.advanced_orders&.fetch(@order, nil)
posts = advanced_order.call(posts) if advanced_order
end
posts = posts.offset(offset)
posts.limit(limit)
2014-05-15 10:31:45 -04:00
end
def categories_ignored(posts)
posts.where(<<~SQL, Searchable::PRIORITIES[:ignore])
(categories.search_priority IS NULL OR categories.search_priority IS NOT NULL AND categories.search_priority <> ?)
SQL
end
def self.default_ts_config
"'#{Search.ts_config}'"
2018-06-07 01:28:18 -04:00
end
def default_ts_config
self.class.default_ts_config
2018-06-07 01:28:18 -04:00
end
def self.ts_query(term: , ts_config: nil, joiner: nil, weight_filter: nil)
to_tsquery(
ts_config: ts_config,
term: set_tsquery_weight_filter(term, weight_filter),
joiner: joiner
)
end
def self.to_tsquery(ts_config: nil, term:, joiner: nil)
ts_config = ActiveRecord::Base.connection.quote(ts_config) if ts_config
tsquery = "TO_TSQUERY(#{ts_config || default_ts_config}, '#{term}')"
tsquery = "REPLACE(#{tsquery}::text, '&', '#{PG::Connection.escape_string(joiner)}')::tsquery" if joiner
tsquery
end
def self.set_tsquery_weight_filter(term, weight_filter)
term = term.gsub("'", "''")
"''#{PG::Connection.escape_string(term)}'':*#{weight_filter}"
end
def ts_query(ts_config = nil, weight_filter: nil)
2015-09-18 03:16:37 -04:00
@ts_query_cache ||= {}
@ts_query_cache["#{ts_config || default_ts_config} #{@term} #{weight_filter}"] ||=
Search.ts_query(term: @term, ts_config: ts_config, weight_filter: weight_filter)
2018-06-07 01:28:18 -04:00
end
2015-09-18 03:16:37 -04:00
def wrap_rows(query)
"SELECT *, row_number() over() row_number FROM (#{query.to_sql}) xxx"
2015-09-18 03:49:57 -04:00
end
def aggregate_post_sql(opts)
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
default_opts = {
type_filter: opts[:type_filter]
}
min_id =
if SiteSetting.search_recent_regular_posts_offset_post_id > 0
if %w{all_topics private_message}.include?(opts[:type_filter])
0
else
SiteSetting.search_recent_regular_posts_offset_post_id
end
else
# This is kept around for backwards compatibility.
# TODO: Drop this code path after Discourse 2.7 has been released.
Search.min_post_id
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if @order == :likes
# likes are a pain to aggregate so skip
query = posts_query(limit, **default_opts).select('topics.id', 'posts.post_number')
if min_id > 0
low_set = query.dup.where("post_search_data.post_id < #{min_id}")
high_set = query.where("post_search_data.post_id >= #{min_id}")
{ default: wrap_rows(high_set), remaining: wrap_rows(low_set) }
2018-06-07 01:28:18 -04:00
else
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
{ default: wrap_rows(query) }
end
else
query = posts_query(limit, **default_opts, aggregate_search: true) do |posts|
if min_id > 0
posts.select("post_search_data.post_id post_search_data_post_id")
else
posts
end
end
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
if min_id > 0
low_set = query.dup.where("subquery.post_search_data_post_id < #{min_id}")
high_set = query.where("subquery.post_search_data_post_id >= #{min_id}")
2018-06-07 01:28:18 -04:00
FIX: Ensure that aggregating search shows the post with the higest rank. Previously, we would only take either the `MIN` or `MAX` for `post_number` during aggregation meaning that the ranking is not considered. ``` require 'benchmark/ips' Benchmark.ips do |x| x.config(time: 10, warmup: 2) x.report("current aggregate search query") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT topics.id, min(posts.post_number) post_number FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted)) GROUP BY topics.id ORDER BY MAX(( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) ) DESC, topics.bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.report("current aggregate search query with proper ranking") do DB.exec <<~SQL SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id" FROM "posts" JOIN (SELECT *, row_number() over() row_number FROM (SELECT subquery.topic_id id, (ARRAY_AGG(subquery.post_number))[1] post_number, MAX(subquery.rank) rank, MAX(subquery.bumped_at) bumped_at FROM (SELECT "posts"."id", "posts"."user_id", "posts"."topic_id", "posts"."post_number", "posts"."raw", "posts"."cooked", "posts"."created_at", "posts"."updated_at", "posts"."reply_to_post_number", "posts"."reply_count", "posts"."quote_count", "posts"."deleted_at", "posts"."off_topic_count", "posts"."like_count", "posts"."incoming_link_count", "posts"."bookmark_count", "posts"."score", "posts"."reads", "posts"."post_type", "posts"."sort_order", "posts"."last_editor_id", "posts"."hidden", "posts"."hidden_reason_id", "posts"."notify_moderators_count", "posts"."spam_count", "posts"."illegal_count", "posts"."inappropriate_count", "posts"."last_version_at", "posts"."user_deleted", "posts"."reply_to_user_id", "posts"."percent_rank", "posts"."notify_user_count", "posts"."like_score", "posts"."deleted_by_id", "posts"."edit_reason", "posts"."word_count", "posts"."version", "posts"."cook_method", "posts"."wiki", "posts"."baked_at", "posts"."baked_version", "posts"."hidden_at", "posts"."self_edits", "posts"."reply_quoted", "posts"."via_email", "posts"."raw_email", "posts"."public_version", "posts"."action_code", "posts"."locked_by_id", "posts"."image_upload_id", ( TS_RANK_CD( post_search_data.search_data, TO_TSQUERY('english', '''postgres'':*ABCD'), 1|32 ) * ( CASE categories.search_priority WHEN 2 THEN 0.6 WHEN 3 THEN 0.8 WHEN 4 THEN 1.2 WHEN 5 THEN 1.4 ELSE CASE WHEN topics.closed THEN 0.9 ELSE 1 END END ) ) rank, topics.bumped_at bumped_at FROM "posts" INNER JOIN "post_search_data" ON "post_search_data"."post_id" = "posts"."id" INNER JOIN "topics" ON "topics"."id" = "posts"."topic_id" AND ("topics"."deleted_at" IS NULL) LEFT JOIN categories ON categories.id = topics.category_id WHERE ("posts"."deleted_at" IS NULL) AND "posts"."post_type" IN (1, 2, 3, 4) AND (topics.visible) AND (topics.archetype <> 'private_message') AND (post_search_data.search_data @@ TO_TSQUERY('english', '''postgres'':*ABCD')) AND (categories.id NOT IN ( SELECT categories.id WHERE categories.search_priority = 1 ) ) AND ((categories.id IS NULL) OR (NOT categories.read_restricted))) subquery GROUP BY subquery.topic_id ORDER BY rank DESC, bumped_at DESC LIMIT 51 OFFSET 0) xxx) x ON x.id = posts.topic_id AND x.post_number = posts.post_number WHERE ("posts"."deleted_at" IS NULL) ORDER BY row_number; SQL end x.compare! end ``` ``` Warming up -------------------------------------- current aggregate search query 1.000 i/100ms current aggregate search query with proper ranking 1.000 i/100ms Calculating ------------------------------------- current aggregate search query 17.726 (± 0.0%) i/s - 178.000 in 10.045107s current aggregate search query with proper ranking 17.802 (± 0.0%) i/s - 178.000 in 10.002230s Comparison: current aggregate search query with proper ranking: 17.8 i/s current aggregate search query: 17.7 i/s - 1.00x (± 0.00) slower ```
2020-07-07 03:36:57 -04:00
{ default: wrap_rows(high_set), remaining: wrap_rows(low_set) }
else
{ default: wrap_rows(query) }
end
end
2018-06-07 01:28:18 -04:00
end
def aggregate_posts(post_sql)
return [] unless post_sql
posts_scope(posts_eager_loads(Post))
.joins("JOIN (#{post_sql}) x ON x.id = posts.topic_id AND x.post_number = posts.post_number")
.order('row_number')
2018-06-07 01:28:18 -04:00
end
def aggregate_search(opts = {})
post_sql = aggregate_post_sql(opts)
added = 0
aggregate_posts(post_sql[:default]).each do |p|
@results.add(p)
added += 1
end
if added < limit
aggregate_posts(post_sql[:remaining]).each { |p| @results.add(p) }
end
2018-06-07 01:28:18 -04:00
end
def private_messages_search
raise Discourse::InvalidAccess.new("anonymous can not search PMs") unless @guardian.user
aggregate_search(type_filter: "private_messages")
end
def all_topics_search
aggregate_search(type_filter: "all_topics")
end
2013-02-05 14:16:51 -05:00
def topic_search
if @search_context.is_a?(Topic)
posts = posts_scope(posts_eager_loads(posts_query(limit)))
.where('posts.topic_id = ?', @search_context.id)
posts.each do |post|
@results.add(post)
end
2018-06-07 01:28:18 -04:00
else
aggregate_search
2018-06-07 01:28:18 -04:00
end
end
def posts_eager_loads(query)
query = query.includes(:user, :post_search_data)
topic_eager_loads = [:category]
2018-06-07 01:28:18 -04:00
if SiteSetting.tagging_enabled
topic_eager_loads << :tags
end
Search.custom_topic_eager_loads.each do |custom_loads|
topic_eager_loads.concat(custom_loads.is_a?(Array) ? custom_loads : custom_loads.call(search_pms: @search_pms).to_a)
end
query.includes(topic: topic_eager_loads)
2018-06-07 01:28:18 -04:00
end
PERF: Limit characters used to generate headline for search blurb. We determined using the following benchmark script that limiting to 2500 chars would mean a maximum of 25ms spent generating headlines. ``` require 'benchmark/ips' string = <<~STRING Far far away, behind the word mountains... STRING def sql_excerpt(string, l = 1000000) DB.query_single(<<~SQL) SELECT TS_HEADLINE('english', left('#{string}', #{l}), PLAINTO_TSQUERY('mountains')) SQL end def ruby_excerpt(string) output = DB.query_single("SELECT '#{string}'")[0] Search::GroupedSearchResults::TextHelper.excerpt(output, 'mountains', radius: 100) end puts "Ruby Excerpt: #{ruby_excerpt(string)}" puts "SQL Excerpt: #{sql_excerpt(string)}" puts Benchmark.ips do |x| x.time = 10 [1000, 2500, 5000, 10000, 20000, 50000].each do |l| short_string = string[0..l] x.report("ts_headline excerpt #{l}") do sql_excerpt(short_string, l) end x.report("actionview excerpt #{l}") do ruby_excerpt(short_string) end end x.compare! end ``` ``` actionview excerpt 1000: 20570.7 i/s actionview excerpt 2500: 17863.1 i/s - 1.15x (± 0.00) slower actionview excerpt 5000: 14228.9 i/s - 1.45x (± 0.00) slower actionview excerpt 10000: 10906.2 i/s - 1.89x (± 0.00) slower actionview excerpt 20000: 6255.0 i/s - 3.29x (± 0.00) slower ts_headline excerpt 1000: 4337.5 i/s - 4.74x (± 0.00) slower actionview excerpt 50000: 3222.7 i/s - 6.38x (± 0.00) slower ts_headline excerpt 2500: 2240.4 i/s - 9.18x (± 0.00) slower ts_headline excerpt 5000: 1258.7 i/s - 16.34x (± 0.00) slower ts_headline excerpt 10000: 667.2 i/s - 30.83x (± 0.00) slower ts_headline excerpt 20000: 348.7 i/s - 58.98x (± 0.00) slower ts_headline excerpt 50000: 131.9 i/s - 155.91x (± 0.00) slower ```
2020-08-07 02:36:12 -04:00
# Limited for performance reasons since `TS_HEADLINE` is slow when the text
# document is too long.
MAX_LENGTH_FOR_HEADLINE = 2500
def posts_scope(default_scope = Post.all)
if SiteSetting.use_pg_headlines_for_excerpt
search_term = @term.present? ? PG::Connection.escape_string(@term) : nil
ts_config = default_ts_config
default_scope
.joins("INNER JOIN post_search_data pd ON pd.post_id = posts.id")
.joins("INNER JOIN topics t1 ON t1.id = posts.topic_id")
.select(
"TS_HEADLINE(
#{ts_config},
t1.fancy_title,
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
'StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
) AS topic_title_headline",
"TS_HEADLINE(
#{ts_config},
LEFT(
TS_HEADLINE(
#{ts_config},
LEFT(pd.raw_data, #{MAX_LENGTH_FOR_HEADLINE}),
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
'ShortWord=0, MaxFragments=1, MinWords=50, MaxWords=51, StartSel='''', StopSel='''''
),
#{Search::GroupedSearchResults::BLURB_LENGTH}
),
PLAINTO_TSQUERY(#{ts_config}, '#{search_term}'),
'HighlightAll=true, StartSel=''<span class=\"#{HIGHLIGHT_CSS_CLASS}\">'', StopSel=''</span>'''
) AS headline",
"LEFT(pd.raw_data, 50) AS leading_raw_data",
"RIGHT(pd.raw_data, 50) AS trailing_raw_data",
default_scope.arel.projections
)
else
default_scope
end
end
2013-02-05 14:16:51 -05:00
end