PERF: Avoid parsing `Post#cooked` with Nokogiri for every search.
This commit is contained in:
parent
b979579c1b
commit
181c4eb760
|
@ -10,7 +10,7 @@ class SimilarTopicsController < ApplicationController
|
|||
attr_reader :topic
|
||||
|
||||
def blurb
|
||||
Search::GroupedSearchResults.blurb_for(@topic.try(:blurb))
|
||||
Search::GroupedSearchResults.blurb_for(cooked: @topic.try(:blurb))
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class SearchIndexer
|
||||
POST_INDEX_VERSION = 3
|
||||
POST_INDEX_VERSION = 4
|
||||
MIN_POST_REINDEX_VERSION = 3
|
||||
TOPIC_INDEX_VERSION = 3
|
||||
CATEGORY_INDEX_VERSION = 3
|
||||
|
@ -39,8 +39,6 @@ class SearchIndexer
|
|||
setweight(to_tsvector('#{stemmer}', coalesce(:d,'')), 'D')
|
||||
SQL
|
||||
|
||||
indexed_data = search_data.select { |d| d.length > 0 }.join(' ')
|
||||
|
||||
ranked_params = {
|
||||
a: search_data[0],
|
||||
b: search_data[1],
|
||||
|
@ -48,6 +46,13 @@ class SearchIndexer
|
|||
d: search_data[3],
|
||||
}
|
||||
|
||||
indexed_data =
|
||||
if table.to_s == "post"
|
||||
ranked_params[:d]
|
||||
else
|
||||
search_data.select { |d| d.length > 0 }.join(' ')
|
||||
end
|
||||
|
||||
tsvector = DB.query_single("SELECT #{ranked_index}", ranked_params)[0]
|
||||
additional_lexemes = []
|
||||
|
||||
|
@ -105,7 +110,7 @@ class SearchIndexer
|
|||
scrubbed_cooked = scrub_html_for_search(cooked)[0...Topic::MAX_SIMILAR_BODY_LENGTH]
|
||||
|
||||
# a bit inconsitent that we use title as A and body as B when in
|
||||
# the post index body is C
|
||||
# the post index body is D
|
||||
update_index(table: 'topic', id: topic_id, raw_data: [title, scrubbed_cooked])
|
||||
end
|
||||
|
||||
|
@ -165,9 +170,11 @@ class SearchIndexer
|
|||
end
|
||||
|
||||
category_name = topic.category&.name if topic
|
||||
|
||||
if topic
|
||||
tags = topic.tags.select(:id, :name)
|
||||
unless tags.empty?
|
||||
tags = topic.tags.select(:id, :name).to_a
|
||||
|
||||
if tags.present?
|
||||
tag_names = (tags.map(&:name) + Tag.where(target_tag_id: tags.map(&:id)).pluck(:name)).join(' ')
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1128,7 +1128,7 @@ class Search
|
|||
end
|
||||
|
||||
def posts_eager_loads(query)
|
||||
query = query.includes(:user)
|
||||
query = query.includes(:user, :post_search_data)
|
||||
topic_eager_loads = [:category]
|
||||
|
||||
if SiteSetting.tagging_enabled
|
||||
|
|
|
@ -58,7 +58,19 @@ class Search
|
|||
end
|
||||
|
||||
def blurb(post)
|
||||
GroupedSearchResults.blurb_for(post.cooked, @blurb_term, @blurb_length)
|
||||
opts = {
|
||||
term: @blurb_term,
|
||||
blurb_length: @blurb_length
|
||||
}
|
||||
|
||||
if post.post_search_data.version > SearchIndexer::MIN_POST_REINDEX_VERSION
|
||||
opts[:cooked] = post.post_search_data.raw_data
|
||||
opts[:scrub] = false
|
||||
else
|
||||
opts[:cooked] = post.cooked
|
||||
end
|
||||
|
||||
GroupedSearchResults.blurb_for(**opts)
|
||||
end
|
||||
|
||||
def add(object)
|
||||
|
@ -73,9 +85,9 @@ class Search
|
|||
end
|
||||
end
|
||||
|
||||
def self.blurb_for(cooked, term = nil, blurb_length = BLURB_LENGTH)
|
||||
def self.blurb_for(cooked: nil, term: nil, blurb_length: BLURB_LENGTH, scrub: true)
|
||||
blurb = nil
|
||||
cooked = SearchIndexer.scrub_html_for_search(cooked)
|
||||
cooked = SearchIndexer.scrub_html_for_search(cooked) if scrub
|
||||
|
||||
urls = Set.new
|
||||
cooked.scan(URI.regexp(%w{http https})) { urls << $& }
|
||||
|
|
|
@ -38,7 +38,7 @@ describe Search do
|
|||
|
||||
link to a video file: https://somesite.com/content/somethingelse.MOV
|
||||
RAW
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||
expect(result).to eq("link to an external page: https://google.com/?u=bar link to an audio file: #{I18n.t("search.audio")} link to a video file: #{I18n.t("search.video")}")
|
||||
end
|
||||
|
||||
|
@ -51,7 +51,7 @@ describe Search do
|
|||
http://localhost/uploads/default/original/1X/90adc0092b30c04b761541bc0322d0dce3d896e7.m4a
|
||||
RAW
|
||||
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||
expect(result).to eq("Here goes a test cooked with enough characters to hit the blurb limit. Something is very interesting about this audio file. #{I18n.t("search.audio")}")
|
||||
end
|
||||
|
||||
|
@ -59,7 +59,7 @@ describe Search do
|
|||
cooked = <<~RAW
|
||||
invalid URL: http:error] should not trip up blurb generation.
|
||||
RAW
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked)
|
||||
result = Search::GroupedSearchResults.blurb_for(cooked: cooked)
|
||||
expect(result).to eq("invalid URL: http:error] should not trip up blurb generation.")
|
||||
end
|
||||
end
|
||||
|
|
|
@ -3,10 +3,22 @@
|
|||
require 'rails_helper'
|
||||
|
||||
describe SearchController do
|
||||
fab!(:awesome_topic) do
|
||||
topic = Fabricate(:topic)
|
||||
tag = Fabricate(:tag)
|
||||
topic.tags << tag
|
||||
Fabricate(:tag, target_tag_id: tag.id)
|
||||
topic
|
||||
end
|
||||
|
||||
fab!(:awesome_post) do
|
||||
SearchIndexer.enable
|
||||
Fabricate(:post, raw: 'this is my really awesome post')
|
||||
Fabricate(:post, topic: awesome_topic, raw: 'this is my really awesome post')
|
||||
end
|
||||
|
||||
fab!(:awesome_post_2) do
|
||||
SearchIndexer.enable
|
||||
Fabricate(:post, raw: 'this is my really awesome post 2')
|
||||
end
|
||||
|
||||
fab!(:user) do
|
||||
|
@ -95,10 +107,14 @@ describe SearchController do
|
|||
|
||||
data = response.parsed_body
|
||||
|
||||
expect(data['posts'].length).to eq(1)
|
||||
expect(data['posts'][0]['id']).to eq(awesome_post.id)
|
||||
expect(data['posts'][0]['blurb']).to eq(awesome_post.raw)
|
||||
expect(data['topics'][0]['id']).to eq(awesome_post.topic_id)
|
||||
expect(data['posts'].length).to eq(2)
|
||||
expect(data['posts'][0]['id']).to eq(awesome_post_2.id)
|
||||
expect(data['posts'][0]['blurb']).to eq(awesome_post_2.raw)
|
||||
expect(data['topics'][0]['id']).to eq(awesome_post_2.topic_id)
|
||||
|
||||
expect(data['posts'][1]['id']).to eq(awesome_post.id)
|
||||
expect(data['posts'][1]['blurb']).to eq(awesome_post.raw)
|
||||
expect(data['topics'][1]['id']).to eq(awesome_post.topic_id)
|
||||
end
|
||||
|
||||
it "can search correctly with advanced search filters" do
|
||||
|
|
|
@ -20,12 +20,13 @@ describe SearchIndexer do
|
|||
it 'correctly indexes chinese' do
|
||||
SiteSetting.default_locale = 'zh_CN'
|
||||
data = "你好世界"
|
||||
expect(data.split(" ").length).to eq(1)
|
||||
|
||||
SearchIndexer.update_posts_index(post_id, "你好世界", "", "", nil)
|
||||
SearchIndexer.update_posts_index(post_id, "", "", "", data)
|
||||
|
||||
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
|
||||
expect(raw_data.split(' ').length).to eq(2)
|
||||
post_search_data = PostSearchData.find_by(post_id: post_id)
|
||||
|
||||
expect(post_search_data.raw_data).to eq("你好 世界")
|
||||
expect(post_search_data.search_data).to eq("'世界':2 '你好':1")
|
||||
end
|
||||
|
||||
it 'extract youtube title' do
|
||||
|
@ -104,11 +105,6 @@ describe SearchIndexer do
|
|||
expect(raw_data).to eq("This is a test")
|
||||
expect(locale).to eq(SiteSetting.default_locale)
|
||||
expect(version).to eq(SearchIndexer::POST_INDEX_VERSION)
|
||||
|
||||
SearchIndexer.update_posts_index(post_id, "tester", "", nil, nil)
|
||||
|
||||
raw_data = PostSearchData.where(post_id: post_id).pluck(:raw_data)[0]
|
||||
expect(raw_data).to eq("tester")
|
||||
end
|
||||
|
||||
describe '.index' do
|
||||
|
@ -118,10 +114,10 @@ describe SearchIndexer do
|
|||
expect { post }.to change { PostSearchData.count }.by(1)
|
||||
|
||||
expect { post.update!(raw: "this is new content") }
|
||||
.to change { post.reload.post_search_data.raw_data }
|
||||
.to change { post.reload.post_search_data.search_data }
|
||||
|
||||
expect { post.update!(topic_id: Fabricate(:topic).id) }
|
||||
.to change { post.reload.post_search_data.raw_data }
|
||||
.to change { post.reload.post_search_data.search_data }
|
||||
end
|
||||
|
||||
it 'should not index posts with empty raw' do
|
||||
|
@ -141,7 +137,7 @@ describe SearchIndexer do
|
|||
topic = post.topic
|
||||
|
||||
expect(post.post_search_data.raw_data).to eq(
|
||||
"#{topic.title} #{topic.category.name} https://meta.discourse.org/some.png"
|
||||
"https://meta.discourse.org/some.png"
|
||||
)
|
||||
end
|
||||
|
||||
|
@ -158,7 +154,7 @@ describe SearchIndexer do
|
|||
topic = post.topic
|
||||
|
||||
expect(post.post_search_data.raw_data).to eq(
|
||||
"#{topic.title} #{category.name} a https://cnn.com , http://stuff.com.au b http://abc.net/xyz=1 abc.net/xyz=1"
|
||||
"a https://cnn.com , http://stuff.com.au b http://abc.net/xyz=1 abc.net/xyz=1"
|
||||
)
|
||||
|
||||
expect(post.post_search_data.search_data).to eq(
|
||||
|
@ -190,7 +186,7 @@ describe SearchIndexer do
|
|||
)
|
||||
|
||||
expect(post.post_search_data.raw_data).to eq(
|
||||
"#{topic.title} #{topic.category.name} Let me see how I can fix this image white walkers GOT"
|
||||
"Let me see how I can fix this image white walkers GOT"
|
||||
)
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue