Merge pull request #5576 from discourse/top-tags

PERF: a faster way to count tags used per category
This commit is contained in:
Neil Lalonde 2018-02-12 16:25:18 -05:00 committed by GitHub
commit 1bafbc8c5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 226 additions and 12 deletions

View File

@ -0,0 +1,15 @@
module Jobs
class InitCategoryTagStats < Jobs::Onceoff
def execute_onceoff(args)
CategoryTagStat.exec_sql <<~SQL
INSERT INTO category_tag_stats (category_id, tag_id, topic_count)
SELECT topics.category_id, tags.id, COUNT(topics.id)
FROM tags
INNER JOIN topic_tags ON tags.id = topic_tags.tag_id
INNER JOIN topics ON topics.id = topic_tags.topic_id
AND topics.deleted_at IS NULL
GROUP BY tags.id, topics.category_id
SQL
end
end
end

View File

@ -16,6 +16,7 @@ module Jobs
CategoryUser.ensure_consistency! CategoryUser.ensure_consistency!
UserOption.ensure_consistency! UserOption.ensure_consistency!
Tag.ensure_consistency! Tag.ensure_consistency!
CategoryTagStat.ensure_consistency!
end end
end end
end end

View File

@ -0,0 +1,61 @@
class CategoryTagStat < ActiveRecord::Base
belongs_to :category
belongs_to :tag
def self.topic_moved(topic, from_category_id, to_category_id)
if from_category_id
self.where(tag_id: topic.tags.map(&:id), category_id: from_category_id)
.where('topic_count > 0')
.update_all('topic_count = topic_count - 1')
end
if to_category_id
sql = <<~SQL
UPDATE #{self.table_name}
SET topic_count = topic_count + 1
WHERE tag_id in (:tag_ids)
AND category_id = :category_id
RETURNING tag_id
SQL
tag_ids = topic.tags.map(&:id)
updated_tag_ids = self.exec_sql(sql, tag_ids: tag_ids, category_id: to_category_id).map { |row| row['tag_id'] }
(tag_ids - updated_tag_ids).each do |tag_id|
CategoryTagStat.create!(tag_id: tag_id, category_id: to_category_id, topic_count: 1)
end
end
end
def self.topic_deleted(topic)
topic_moved(topic, topic.category_id, nil)
end
def self.topic_recovered(topic)
topic_moved(topic, nil, topic.category_id)
end
def self.ensure_consistency!
self.update_topic_counts
end
# Recalculate all topic counts if they got out of sync
def self.update_topic_counts
CategoryTagStat.exec_sql <<~SQL
UPDATE category_tag_stats stats
SET topic_count = x.topic_count
FROM (
SELECT COUNT(topics.id) AS topic_count,
tags.id AS tag_id,
topics.category_id as category_id
FROM tags
INNER JOIN topic_tags ON tags.id = topic_tags.tag_id
INNER JOIN topics ON topics.id = topic_tags.topic_id AND topics.deleted_at IS NULL
GROUP BY tags.id, topics.category_id
) x
WHERE stats.tag_id = x.tag_id
AND stats.category_id = x.category_id
AND x.topic_count <> stats.topic_count
SQL
end
end

View File

@ -59,13 +59,19 @@ class Tag < ActiveRecord::Base
scope_category_ids &= ([category.id] + category.subcategories.pluck(:id)) scope_category_ids &= ([category.id] + category.subcategories.pluck(:id))
end end
tags = DiscourseTagging.filter_allowed_tags( return [] if scope_category_ids.empty?
tags_by_count_query(limit: limit).where("topics.category_id in (?)", scope_category_ids),
nil, # Don't pass guardian. You might not be able to use some tags, but should still be able to see where they've been used.
category: category
)
tags.count(COUNT_ARG).map { |name, _| name } tag_names_with_counts = Tag.exec_sql <<~SQL
SELECT tags.name as tag_name, SUM(stats.topic_count) AS sum_topic_count
FROM category_tag_stats stats
INNER JOIN tags ON stats.tag_id = tags.id AND stats.topic_count > 0
WHERE stats.category_id in (#{scope_category_ids.join(',')})
GROUP BY tags.name
ORDER BY sum_topic_count DESC, tag_name ASC
LIMIT #{limit}
SQL
tag_names_with_counts.map { |row| row['tag_name'] }
end end
def self.include_tags? def self.include_tags?

View File

@ -54,14 +54,20 @@ class Topic < ActiveRecord::Base
end end
def trash!(trashed_by = nil) def trash!(trashed_by = nil)
update_category_topic_count_by(-1) if deleted_at.nil? if deleted_at.nil?
update_category_topic_count_by(-1)
CategoryTagStat.topic_deleted(self) if self.tags.present?
end
super(trashed_by) super(trashed_by)
update_flagged_posts_count update_flagged_posts_count
self.topic_embed.trash! if has_topic_embed? self.topic_embed.trash! if has_topic_embed?
end end
def recover! def recover!
update_category_topic_count_by(1) unless deleted_at.nil? unless deleted_at.nil?
update_category_topic_count_by(1)
CategoryTagStat.topic_recovered(self) if self.tags.present?
end
super super
update_flagged_posts_count update_flagged_posts_count
unless (topic_embed = TopicEmbed.with_deleted.find_by_topic_id(id)).nil? unless (topic_embed = TopicEmbed.with_deleted.find_by_topic_id(id)).nil?
@ -122,8 +128,8 @@ class Topic < ActiveRecord::Base
has_many :allowed_users, through: :topic_allowed_users, source: :user has_many :allowed_users, through: :topic_allowed_users, source: :user
has_many :queued_posts has_many :queued_posts
has_many :topic_tags, dependent: :destroy has_many :topic_tags
has_many :tags, through: :topic_tags has_many :tags, through: :topic_tags, dependent: :destroy # dependent destroy applies to the topic_tags records
has_many :tag_users, through: :tags has_many :tag_users, through: :tags
has_one :top_topic has_one :top_topic
@ -226,6 +232,12 @@ class Topic < ActiveRecord::Base
UserActionCreator.log_topic(self) UserActionCreator.log_topic(self)
end end
after_update do
if saved_changes[:category_id] && self.tags.present?
CategoryTagStat.topic_moved(self, *saved_changes[:category_id])
end
end
def initialize_default_values def initialize_default_values
self.bumped_at ||= Time.now self.bumped_at ||= Time.now
self.last_post_user_id ||= user_id self.last_post_user_id ||= user_id

View File

@ -1,6 +1,24 @@
class TopicTag < ActiveRecord::Base class TopicTag < ActiveRecord::Base
belongs_to :topic belongs_to :topic
belongs_to :tag, counter_cache: "topic_count" belongs_to :tag, counter_cache: "topic_count"
after_create do
if topic.category_id
if stat = CategoryTagStat.where(tag_id: tag_id, category_id: topic.category_id).first
stat.increment!(:topic_count)
else
CategoryTagStat.create(tag_id: tag_id, category_id: topic.category_id, topic_count: 1)
end
end
end
after_destroy do
if topic.category_id
if stat = CategoryTagStat.where(tag_id: tag_id, category: topic.category_id).first
stat.topic_count == 1 ? stat.destroy : stat.decrement!(:topic_count)
end
end
end
end end
# == Schema Information # == Schema Information

View File

@ -0,0 +1,12 @@
class CreateCategoryTagStats < ActiveRecord::Migration[5.1]
def change
create_table :category_tag_stats do |t|
t.references :category, null: false
t.references :tag, null: false
t.integer :topic_count, default: 0, null: false
end
add_index :category_tag_stats, [:category_id, :topic_count]
add_index :category_tag_stats, [:category_id, :tag_id], unique: true
end
end

View File

@ -183,3 +183,92 @@ describe "category tag restrictions" do
end end
end end
end end
describe "tag topic counts per category" do
let(:user) { Fabricate(:user) }
let(:admin) { Fabricate(:admin) }
let(:category) { Fabricate(:category) }
let(:category2) { Fabricate(:category) }
let(:tag1) { Fabricate(:tag) }
let(:tag2) { Fabricate(:tag) }
let(:tag3) { Fabricate(:tag) }
before do
SiteSetting.tagging_enabled = true
SiteSetting.min_trust_to_create_tag = 0
SiteSetting.min_trust_level_to_tag_topics = 0
end
it "counts when a topic is created with tags" do
expect {
Fabricate(:topic, category: category, tags: [tag1, tag2])
}.to change { CategoryTagStat.count }.by(2)
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(1)
end
it "counts when tag is added to an existing topic" do
topic = Fabricate(:topic, category: category)
post = Fabricate(:post, user: topic.user, topic: topic)
expect(CategoryTagStat.where(category: category).count).to eq(0)
expect {
PostRevisor.new(post).revise!(topic.user, raw: post.raw, tags: [tag1.name, tag2.name])
}.to change { CategoryTagStat.count }.by(2)
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(1)
end
context "topic with 2 tags" do
let(:topic) { Fabricate(:topic, category: category, tags: [tag1, tag2]) }
let(:post) { Fabricate(:post, user: topic.user, topic: topic) }
it "has correct counts after tag is removed from a topic" do
post
topic2 = Fabricate(:topic, category: category, tags: [tag2])
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(2)
PostRevisor.new(post).revise!(topic.user, raw: post.raw, tags: [])
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(0)
end
it "has correct counts after a topic's category changes" do
PostRevisor.new(post).revise!(topic.user, category_id: category2.id, raw: post.raw, tags: [tag1.name, tag2.name])
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category2, tag: tag1).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category2, tag: tag2).sum(:topic_count)).to eq(1)
end
it "has correct counts after topic's category AND tags changed" do
PostRevisor.new(post).revise!(topic.user, raw: post.raw, tags: [tag2.name, tag3.name], category_id: category2.id)
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category, tag: tag2).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category, tag: tag3).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category2, tag: tag1).sum(:topic_count)).to eq(0)
expect(CategoryTagStat.where(category: category2, tag: tag2).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category2, tag: tag3).sum(:topic_count)).to eq(1)
end
end
context "topic with one tag" do
let(:topic) { Fabricate(:topic, tags: [tag1], category: category) }
let(:post) { Fabricate(:post, user: topic.user, topic: topic) }
it "counts after topic becomes uncategorized" do
PostRevisor.new(post).revise!(topic.user, raw: post.raw, tags: [tag1.name], category_id: SiteSetting.uncategorized_category_id)
expect(CategoryTagStat.where(category: Category.find(SiteSetting.uncategorized_category_id), tag: tag1).sum(:topic_count)).to eq(1)
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(0)
end
it "updates counts after topic is deleted" do
PostDestroyer.new(admin, post).destroy
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(0)
end
it "updates counts after topic is recovered" do
PostDestroyer.new(admin, post).destroy
PostDestroyer.new(admin, post).recover
expect(CategoryTagStat.where(category: category, tag: tag1).sum(:topic_count)).to eq(1)
end
end
end

View File

@ -62,8 +62,8 @@ describe TopicList do
let!(:other_tag) { Fabricate(:tag, topics: [topic], name: "use-anywhere") } let!(:other_tag) { Fabricate(:tag, topics: [topic], name: "use-anywhere") }
let(:topic_list) { TopicList.new('latest', topic.user, [topic], category: category.id, category_id: category.id) } let(:topic_list) { TopicList.new('latest', topic.user, [topic], category: category.id, category_id: category.id) }
it 'should only return tags allowed in the category' do it 'should return tags used in the category' do
expect(topic_list.top_tags).to eq([tag.name]) expect(topic_list.top_tags).to eq([tag.name, other_tag.name].sort)
end end
it "with no category, should return all tags" do it "with no category, should return all tags" do