From 4f8262e0d5d13dbe4ed1d4867aab94d1cb04f668 Mon Sep 17 00:00:00 2001 From: Guo Xiang Tan Date: Tue, 28 Jul 2020 15:20:18 +0800 Subject: [PATCH] FIX: Cooked snippet of raw in `Topic.similar_to`. If we don't cook the raw, we end up trying to match uncooked raw against `TopicSearchData#search_data` which consists of cooked raw. --- app/models/topic.rb | 6 +++++- spec/models/topic_spec.rb | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/app/models/topic.rb b/app/models/topic.rb index caedcd62c30..17fd50c6bef 100644 --- a/app/models/topic.rb +++ b/app/models/topic.rb @@ -587,8 +587,12 @@ class Topic < ActiveRecord::Base ) if raw.present? + cooked = SearchIndexer::HtmlScrubber.scrub( + PrettyText.cook(raw[0...MAX_SIMILAR_BODY_LENGTH].strip) + ) + raw_tsquery = Search.set_tsquery_weight_filter( - Search.prepare_data(raw[0...MAX_SIMILAR_BODY_LENGTH].strip), + Search.prepare_data(cooked), 'B' ) diff --git a/spec/models/topic_spec.rb b/spec/models/topic_spec.rb index 66faccd4497..ccb5a31ce70 100644 --- a/spec/models/topic_spec.rb +++ b/spec/models/topic_spec.rb @@ -542,6 +542,22 @@ describe Topic do expect(Topic.similar_to("unrelated term", "1 2 3 poddle")).to eq([]) end + it 'doesnt match numbered lists against numbers in Post#raw' do + post.update!(raw: <<~RAW) + Internet Explorer 11+ Oct 2013 Google Chrome 32+ Jan 2014 Firefox 27+ Feb 2014 Safari 6.1+ Jul 2012 Safari, iOS 8+ Oct 2014 + RAW + + post.topic.update!(title: 'Where are we with browser support in 2019?') + + topics = Topic.similar_to("Videos broken in composer", <<~RAW) + 1. Do something + 2. Do something else + 3. Do more things + RAW + + expect(topics).to eq([]) + end + context "secure categories" do before do category.update!(read_restricted: true)