Correct stripping of non BasicLatin characters

This commit is contained in:
Kirill Pimenov 2013-02-28 23:52:35 +04:00
parent ee0396198e
commit f639397aff
2 changed files with 19 additions and 3 deletions

View File

@ -100,7 +100,8 @@ module Search
def self.query(term, type_filter=nil) def self.query(term, type_filter=nil)
return nil if term.blank? return nil if term.blank?
sanitized_term = PG::Connection.escape_string(term) #term.gsub(/[^0-9a-zA-Z_ ]/, '') sanitized_term = PG::Connection.escape_string(term.gsub(/[:()&!]/,'')) # Instead of original term.gsub(/[^0-9a-zA-Z_ ]/, '')
# We are stripping only symbols taking place in FTS and simply sanitizing the rest.
# really short terms are totally pointless # really short terms are totally pointless
return nil if sanitized_term.blank? || sanitized_term.length < self.min_search_term_length return nil if sanitized_term.blank? || sanitized_term.length < self.min_search_term_length

View File

@ -1,3 +1,5 @@
# encoding: utf-8
require 'spec_helper' require 'spec_helper'
require 'search' require 'search'
@ -70,8 +72,7 @@ describe Search do
end end
it 'escapes non alphanumeric characters' do it 'escapes non alphanumeric characters' do
ActiveRecord::Base.expects(:exec_sql).never Search.query(':!$);}]>@\#\"\'').should be_blank # There are at least three levels of sanitation for Search.query!
Search.query(':!$').should be_blank
end end
it 'works when given two terms with spaces' do it 'works when given two terms with spaces' do
@ -123,6 +124,20 @@ describe Search do
end end
context 'cyrillic topic' do
let!(:cyrillic_topic) { Fabricate(:topic) do
user
title { sequence(:title) { |i| "Тестовая запись #{i}" } }
end
}
let!(:post) {Fabricate(:post, topic: cyrillic_topic, user: cyrillic_topic.user)}
let(:result) { first_of_type(Search.query('запись'), 'topic') }
it 'finds something when given cyrillic query' do
result.should be_present
end
end
context 'categories' do context 'categories' do
let!(:category) { Fabricate(:category) } let!(:category) { Fabricate(:category) }