From df10a27067056f07f246494e8d9a715d2720f1a1 Mon Sep 17 00:00:00 2001 From: Penar Musaraj Date: Wed, 6 Apr 2022 16:06:45 -0400 Subject: [PATCH] FIX: Exclude automatic anchors from search index (#16396) --- app/services/search_indexer.rb | 4 ++++ spec/services/search_indexer_spec.rb | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb index 1885a4c6f47..33d18b41c9a 100644 --- a/app/services/search_indexer.rb +++ b/app/services/search_indexer.rb @@ -337,6 +337,10 @@ class SearchIndexer if node["href"] == node.text || MENTION_CLASSES.include?(node["class"]) node.remove_attribute("href") end + + if node["class"] == "anchor" && node["href"].starts_with?("#") + node.remove_attribute("href") + end end html_scrubber = new diff --git a/spec/services/search_indexer_spec.rb b/spec/services/search_indexer_spec.rb index 180d1d0be82..b132c88080d 100644 --- a/spec/services/search_indexer_spec.rb +++ b/spec/services/search_indexer_spec.rb @@ -42,6 +42,12 @@ describe SearchIndexer do expect(scrubbed).to eq("http://meta.discourse.org/ link") end + it 'ignores autogenerated link anchors' do + html = "something special" + scrubbed = SearchIndexer::HtmlScrubber.scrub(html) + expect(scrubbed).to eq("something special") + end + it 'extracts @username from mentions' do html = '

@狮子 @foo

' scrubbed = SearchIndexer::HtmlScrubber.scrub(html)