488 lines
14 KiB
Ruby
488 lines
14 KiB
Ruby
# name: discourse-data-explorer
|
|
# about: Interface for running analysis SQL queries on the live database
|
|
# version: 0.2
|
|
# authors: Riking
|
|
# url: https://github.com/discourse/discourse-data-explorer
|
|
|
|
enabled_site_setting :data_explorer_enabled
|
|
register_asset 'stylesheets/explorer.scss'
|
|
|
|
# route: /admin/plugins/explorer
|
|
add_admin_route 'explorer.title', 'explorer'
|
|
|
|
module ::DataExplorer
|
|
def self.plugin_name
|
|
'discourse-data-explorer'.freeze
|
|
end
|
|
|
|
def self.pstore_get(key)
|
|
PluginStore.get(DataExplorer.plugin_name, key)
|
|
end
|
|
|
|
def self.pstore_set(key, value)
|
|
PluginStore.set(DataExplorer.plugin_name, key, value)
|
|
end
|
|
|
|
def self.pstore_delete(key)
|
|
PluginStore.remove(DataExplorer.plugin_name, key)
|
|
end
|
|
end
|
|
|
|
|
|
after_initialize do
|
|
|
|
module ::DataExplorer
|
|
class Engine < ::Rails::Engine
|
|
engine_name "data_explorer"
|
|
isolate_namespace DataExplorer
|
|
end
|
|
|
|
class ValidationError < StandardError;
|
|
end
|
|
|
|
# Extract :colon-style parameters from the SQL query and replace them with
|
|
# $1-style parameters.
|
|
#
|
|
# @return [Hash] :sql => [String] the new SQL query to run, :names =>
|
|
# [Array] the names of all parameters, in order by their $-style name.
|
|
# (The first name is $0.)
|
|
def self.extract_params(sql)
|
|
names = []
|
|
new_sql = sql.gsub(/(:?):([a-z_]+)/) do |_|
|
|
if $1 == ':' # skip casts
|
|
$&
|
|
else
|
|
names << $2
|
|
"$#{names.length - 1}"
|
|
end
|
|
end
|
|
{sql: new_sql, names: names}
|
|
end
|
|
|
|
# Run a data explorer query on the currently connected database.
|
|
#
|
|
# @param [DataExplorer::Query] query the Query object to run
|
|
# @param [Hash] params the colon-style query parameters to pass to AR
|
|
# @param [Hash] opts hash of options
|
|
# explain - include a query plan in the result
|
|
# @return [Hash]
|
|
# error - any exception that was raised in the execution. Check this
|
|
# first before looking at any other fields.
|
|
# pg_result - the PG::Result object
|
|
# duration_nanos - the query duration, in nanoseconds
|
|
# explain - the query
|
|
def self.run_query(query, params={}, opts={})
|
|
# Safety checks
|
|
if query.sql =~ /;/
|
|
err = DataExplorer::ValidationError.new(I18n.t('js.errors.explorer.no_semicolons'))
|
|
return {error: err, duration_nanos: 0}
|
|
end
|
|
|
|
query_args = (query.qopts[:defaults] || {}).with_indifferent_access.merge(params)
|
|
|
|
# Rudimentary types
|
|
query_args.each do |k, arg|
|
|
if arg =~ /\A\d+\z/
|
|
query_args[k] = arg.to_i
|
|
end
|
|
end
|
|
# If we don't include this, then queries with a % sign in them fail
|
|
# because AR thinks we want percent-based parametes
|
|
query_args[:xxdummy] = 1
|
|
|
|
time_start, time_end, explain, err, result = nil
|
|
begin
|
|
ActiveRecord::Base.connection.transaction do
|
|
# Setting transaction to read only prevents shoot-in-foot actions like SELECT FOR UPDATE
|
|
ActiveRecord::Base.exec_sql "SET TRANSACTION READ ONLY"
|
|
# SQL comments are for the benefits of the slow queries log
|
|
sql = <<SQL
|
|
|
|
/*
|
|
* DataExplorer Query
|
|
* Query: /admin/plugins/explorer?id=#{query.id}
|
|
* Started by: #{opts[:current_user]}
|
|
* :xxdummy
|
|
*/
|
|
WITH query AS (
|
|
#{query.sql}
|
|
) SELECT * FROM query
|
|
LIMIT #{opts[:limit] || 1000}
|
|
SQL
|
|
|
|
time_start = Time.now
|
|
result = ActiveRecord::Base.exec_sql(sql, query_args)
|
|
time_end = Time.now
|
|
|
|
if opts[:explain]
|
|
explain = ActiveRecord::Base.exec_sql("-- :xxdummy \nEXPLAIN #{query.sql}", query_args)
|
|
.map { |row| row["QUERY PLAN"] }.join "\n"
|
|
end
|
|
|
|
# All done. Issue a rollback anyways, just in case
|
|
raise ActiveRecord::Rollback
|
|
end
|
|
rescue Exception => ex
|
|
err = ex
|
|
time_end = Time.now
|
|
end
|
|
|
|
{
|
|
error: err,
|
|
pg_result: result,
|
|
duration_nanos: time_end.nsec - time_start.nsec,
|
|
explain: explain,
|
|
}
|
|
end
|
|
|
|
def self.sensitive_column_names
|
|
%w(
|
|
#_IP_Addresses
|
|
topic_views.ip_address
|
|
users.ip_address
|
|
users.registration_ip_address
|
|
incoming_links.ip_address
|
|
topic_link_clicks.ip_address
|
|
user_histories.ip_address
|
|
|
|
#_Emails
|
|
email_tokens.email
|
|
users.email
|
|
invites.email
|
|
user_histories.email
|
|
email_logs.to_address
|
|
posts.raw_email
|
|
badge_posts.raw_email
|
|
|
|
#_Secret_Tokens
|
|
email_tokens.token
|
|
email_logs.reply_key
|
|
api_keys.key
|
|
site_settings.value
|
|
|
|
users.password_hash
|
|
users.salt
|
|
|
|
#_Authentication_Info
|
|
user_open_ids.email
|
|
oauth2_user_infos.uid
|
|
oauth2_user_infos.email
|
|
facebook_user_infos.facebook_user_id
|
|
facebook_user_infos.email
|
|
twitter_user_infos.twitter_user_id
|
|
github_user_infos.github_user_id
|
|
single_sign_on_records.external_email
|
|
single_sign_on_records.external_id
|
|
google_user_infos.google_user_id
|
|
google_user_infos.email
|
|
)
|
|
end
|
|
|
|
def self.schema
|
|
# refer user to http://www.postgresql.org/docs/9.3/static/datatype.html
|
|
@schema ||= begin
|
|
results = ActiveRecord::Base.exec_sql <<SQL
|
|
select column_name, data_type, character_maximum_length, is_nullable, column_default, table_name
|
|
from INFORMATION_SCHEMA.COLUMNS where table_schema = 'public'
|
|
SQL
|
|
by_table = {}
|
|
# Massage the results into a nicer form
|
|
results.each do |hash|
|
|
if hash['is_nullable'] == "YES"
|
|
hash['is_nullable'] = true
|
|
else
|
|
hash.delete('is_nullable')
|
|
end
|
|
clen = hash.delete 'character_maximum_length'
|
|
dt = hash['data_type']
|
|
if dt == 'character varying'
|
|
hash['data_type'] = "varchar(#{clen.to_i})"
|
|
elsif dt == 'timestamp without time zone'
|
|
hash['data_type'] = 'timestamp'
|
|
elsif dt == 'double precision'
|
|
hash['data_type'] = 'double'
|
|
end
|
|
default = hash['column_default']
|
|
if default.nil? || default =~ /^nextval\(/
|
|
hash.delete 'column_default'
|
|
elsif default =~ /^'(.*)'::(character varying|text)/
|
|
hash['column_default'] = $1
|
|
end
|
|
|
|
if sensitive_column_names.include? "#{hash['table_name']}.#{hash['column_name']}"
|
|
hash['sensitive'] = true
|
|
end
|
|
|
|
tname = hash.delete('table_name')
|
|
by_table[tname] ||= []
|
|
by_table[tname] << hash
|
|
end
|
|
|
|
# this works for now, but no big loss if the tables aren't quite sorted
|
|
sorted_by_table = {}
|
|
by_table.keys.sort.each do |tbl|
|
|
sorted_by_table[tbl] = by_table[tbl]
|
|
end
|
|
sorted_by_table
|
|
end
|
|
end
|
|
end
|
|
|
|
# Reimplement a couple ActiveRecord methods, but use PluginStore for storage instead
|
|
class DataExplorer::Query
|
|
attr_accessor :id, :name, :description, :sql
|
|
attr_reader :qopts
|
|
|
|
def initialize
|
|
@name = 'Unnamed Query'
|
|
@description = 'Enter a description here'
|
|
@sql = 'SELECT 1'
|
|
@qopts = {}
|
|
end
|
|
|
|
def param_names
|
|
param_info = DataExplorer.extract_params sql
|
|
param_info[:names]
|
|
end
|
|
|
|
def slug
|
|
s = Slug.for(name)
|
|
s = "query-#{id}" unless s.present?
|
|
s
|
|
end
|
|
|
|
# saving/loading functions
|
|
# May want to extract this into a library or something for plugins to use?
|
|
def self.alloc_id
|
|
DistributedMutex.synchronize('data-explorer_query-id') do
|
|
max_id = DataExplorer.pstore_get("q:_id")
|
|
max_id = 1 unless max_id
|
|
DataExplorer.pstore_set("q:_id", max_id + 1)
|
|
max_id
|
|
end
|
|
end
|
|
|
|
def qopts=(val)
|
|
case val
|
|
when String
|
|
@qopts = HashWithIndifferentAccess.new(MultiJson.load(val))
|
|
when HashWithIndifferentAccess
|
|
@qopts = val
|
|
when Hash
|
|
@qopts = val.with_indifferent_access
|
|
else
|
|
raise ArgumentError.new('invalid type for qopts')
|
|
end
|
|
end
|
|
|
|
def self.from_hash(h)
|
|
query = DataExplorer::Query.new
|
|
[:name, :description, :sql, :qopts].each do |sym|
|
|
query.send("#{sym}=", h[sym]) if h[sym]
|
|
end
|
|
if h[:id]
|
|
query.id = h[:id].to_i
|
|
end
|
|
query
|
|
end
|
|
|
|
def to_hash
|
|
{
|
|
id: @id,
|
|
name: @name,
|
|
description: @description,
|
|
sql: @sql,
|
|
qopts: @qopts.to_hash,
|
|
}
|
|
end
|
|
|
|
def self.find(id, opts={})
|
|
hash = DataExplorer.pstore_get("q:#{id}")
|
|
unless hash
|
|
return DataExplorer::Query.new if opts[:ignore_deleted]
|
|
raise Discourse::NotFound
|
|
end
|
|
from_hash hash
|
|
end
|
|
|
|
def save
|
|
unless @id && @id > 0
|
|
@id = self.class.alloc_id
|
|
end
|
|
DataExplorer.pstore_set "q:#{id}", to_hash
|
|
end
|
|
|
|
def destroy
|
|
DataExplorer.pstore_delete "q:#{id}"
|
|
end
|
|
|
|
def read_attribute_for_serialization(attr)
|
|
self.send(attr)
|
|
end
|
|
|
|
def self.all
|
|
PluginStoreRow.where(plugin_name: DataExplorer.plugin_name)
|
|
.where("key LIKE 'q:%'")
|
|
.where("key != 'q:_id'")
|
|
.map do |psr|
|
|
DataExplorer::Query.from_hash PluginStore.cast_value(psr.type_name, psr.value)
|
|
end
|
|
end
|
|
end
|
|
|
|
require_dependency 'application_controller'
|
|
class DataExplorer::QueryController < ::ApplicationController
|
|
requires_plugin DataExplorer.plugin_name
|
|
|
|
before_filter :check_enabled
|
|
|
|
def check_enabled
|
|
raise Discourse::NotFound unless SiteSetting.data_explorer_enabled?
|
|
end
|
|
|
|
def index
|
|
# guardian.ensure_can_use_data_explorer!
|
|
queries = DataExplorer::Query.all
|
|
render_serialized queries, DataExplorer::QuerySerializer, root: 'queries'
|
|
end
|
|
|
|
skip_before_filter :check_xhr, only: [:show]
|
|
|
|
def show
|
|
check_xhr unless params[:export]
|
|
|
|
query = DataExplorer::Query.find(params[:id].to_i)
|
|
|
|
if params[:export]
|
|
response.headers['Content-Disposition'] = "attachment; filename=#{query.slug}.dcquery.json"
|
|
response.sending_file = true
|
|
end
|
|
|
|
# guardian.ensure_can_see! query
|
|
render_serialized query, DataExplorer::QuerySerializer, root: 'query'
|
|
end
|
|
|
|
def create
|
|
# guardian.ensure_can_create_explorer_query!
|
|
|
|
query = DataExplorer::Query.from_hash params.require(:query)
|
|
query.id = nil # json import will assign an id, which is wrong
|
|
query.save
|
|
|
|
render_serialized query, DataExplorer::QuerySerializer, root: 'query'
|
|
end
|
|
|
|
def update
|
|
query = DataExplorer::Query.find(params[:id].to_i, ignore_deleted: true)
|
|
hash = params.require(:query)
|
|
|
|
# Undeleting
|
|
unless query.id
|
|
if hash[:id]
|
|
query.id = hash[:id].to_i
|
|
else
|
|
raise Discourse::NotFound
|
|
end
|
|
end
|
|
|
|
[:name, :sql, :description, :qopts].each do |sym|
|
|
query.send("#{sym}=", hash[sym]) if hash[sym]
|
|
end
|
|
query.save
|
|
|
|
render_serialized query, DataExplorer::QuerySerializer, root: 'query'
|
|
end
|
|
|
|
def destroy
|
|
query = DataExplorer::Query.find(params[:id].to_i)
|
|
query.destroy
|
|
|
|
render json: {success: true, errors: []}
|
|
end
|
|
|
|
def schema
|
|
schema_version = ActiveRecord::Base.exec_sql("SELECT max(version) AS tag FROM schema_migrations").first['tag']
|
|
if stale?(public: true, etag: schema_version)
|
|
render json: DataExplorer.schema
|
|
end
|
|
end
|
|
|
|
# Return value:
|
|
# success - true/false. if false, inspect the errors value.
|
|
# errors - array of strings.
|
|
# params - hash. Echo of the query parameters as executed.
|
|
# duration - float. Time to execute the query, in milliseconds, to 1 decimal place.
|
|
# columns - array of strings. Titles of the returned columns, in order.
|
|
# explain - string. (Optional - pass explain=true in the request) Postgres query plan, UNIX newlines.
|
|
# rows - array of array of strings. Results of the query. In the same order as 'columns'.
|
|
def run
|
|
query = DataExplorer::Query.find(params[:id].to_i)
|
|
query_params = MultiJson.load(params[:params])
|
|
opts = {current_user: current_user.username}
|
|
opts[:explain] = true if params[:explain] == "true"
|
|
opts[:limit] = params[:limit].to_i if params[:limit]
|
|
result = DataExplorer.run_query(query, query_params, opts)
|
|
|
|
if result[:error]
|
|
err = result[:error]
|
|
|
|
# Pretty printing logic
|
|
err_class = err.class
|
|
err_msg = err.message
|
|
if err.is_a? ActiveRecord::StatementInvalid
|
|
err_class = err.original_exception.class
|
|
err_msg.gsub!("#{err_class.to_s}:", '')
|
|
else
|
|
err_msg = "#{err_class}: #{err_msg}"
|
|
end
|
|
|
|
render json: {
|
|
success: false,
|
|
errors: [err_msg]
|
|
}
|
|
else
|
|
pg_result = result[:pg_result]
|
|
cols = pg_result.fields
|
|
json = {
|
|
success: true,
|
|
errors: [],
|
|
duration: (result[:duration_nanos].to_f / 1_000_000).round(1),
|
|
columns: cols,
|
|
}
|
|
json[:explain] = result[:explain] if opts[:explain]
|
|
# TODO - special serialization
|
|
# This is dead code in the client right now
|
|
# if cols.any? { |col_name| special_serialization? col_name }
|
|
# json[:relations] = DataExplorer.add_extra_data(pg_result)
|
|
# end
|
|
|
|
json[:rows] = pg_result.values
|
|
|
|
render json: json
|
|
end
|
|
end
|
|
end
|
|
|
|
class DataExplorer::QuerySerializer < ActiveModel::Serializer
|
|
attributes :id, :sql, :name, :description, :qopts, :param_names
|
|
end
|
|
|
|
DataExplorer::Engine.routes.draw do
|
|
root to: "query#index"
|
|
get 'schema' => "query#schema"
|
|
get 'queries' => "query#index"
|
|
post 'queries' => "query#create"
|
|
get 'queries/:id' => "query#show"
|
|
put 'queries/:id' => "query#update"
|
|
delete 'queries/:id' => "query#destroy"
|
|
get 'queries/parse_params' => "query#parse_params"
|
|
post 'queries/:id/run' => "query#run"
|
|
end
|
|
|
|
Discourse::Application.routes.append do
|
|
mount ::DataExplorer::Engine, at: '/admin/plugins/explorer', constraints: AdminConstraint.new
|
|
end
|
|
|
|
end
|
|
|