FIX: More resilient sentiment backfill query (#998)
This commit is contained in:
parent
e7c2cd861a
commit
938d4c018c
|
@ -5,43 +5,43 @@ module DiscourseAi
|
||||||
class PostClassification
|
class PostClassification
|
||||||
def self.backfill_query(from_post_id: nil, max_age_days: nil)
|
def self.backfill_query(from_post_id: nil, max_age_days: nil)
|
||||||
available_classifier_names =
|
available_classifier_names =
|
||||||
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema
|
DiscourseAi::Sentiment::SentimentSiteSettingJsonSchema.values.map { _1.model_name }
|
||||||
.values
|
|
||||||
.map { |mc| mc.model_name.downcase }
|
|
||||||
.sort
|
|
||||||
|
|
||||||
base_query =
|
queries =
|
||||||
Post
|
available_classifier_names.map do |classifier_name|
|
||||||
.includes(:sentiment_classifications)
|
base_query =
|
||||||
.joins("INNER JOIN topics ON topics.id = posts.topic_id")
|
Post
|
||||||
.where(post_type: Post.types[:regular])
|
.includes(:sentiment_classifications)
|
||||||
.where.not(topics: { archetype: Archetype.private_message })
|
.joins("INNER JOIN topics ON topics.id = posts.topic_id")
|
||||||
.where(posts: { deleted_at: nil })
|
.where(post_type: Post.types[:regular])
|
||||||
.where(topics: { deleted_at: nil })
|
.where.not(topics: { archetype: Archetype.private_message })
|
||||||
.joins(<<~SQL)
|
.where(posts: { deleted_at: nil })
|
||||||
LEFT JOIN classification_results crs
|
.where(topics: { deleted_at: nil })
|
||||||
ON crs.target_id = posts.id
|
.joins(<<~SQL)
|
||||||
AND crs.target_type = 'Post'
|
LEFT JOIN classification_results crs
|
||||||
AND crs.classification_type = 'sentiment'
|
ON crs.target_id = posts.id
|
||||||
SQL
|
AND crs.target_type = 'Post'
|
||||||
.group("posts.id")
|
AND crs.classification_type = 'sentiment'
|
||||||
.having(<<~SQL, available_classifier_names)
|
AND crs.model_used = '#{classifier_name}'
|
||||||
COUNT(crs.model_used) = 0
|
SQL
|
||||||
OR array_agg(
|
.where("crs.id IS NULL")
|
||||||
DISTINCT LOWER(crs.model_used) ORDER BY LOWER(crs.model_used)
|
|
||||||
)::text[] IS DISTINCT FROM array[?]
|
|
||||||
SQL
|
|
||||||
|
|
||||||
base_query = base_query.where("posts.id >= ?", from_post_id.to_i) if from_post_id.present?
|
base_query =
|
||||||
|
base_query.where("posts.id >= ?", from_post_id.to_i) if from_post_id.present?
|
||||||
|
|
||||||
if max_age_days.present?
|
if max_age_days.present?
|
||||||
base_query =
|
base_query =
|
||||||
base_query.where(
|
base_query.where(
|
||||||
"posts.created_at > current_date - INTERVAL '#{max_age_days.to_i} DAY'",
|
"posts.created_at > current_date - INTERVAL '#{max_age_days.to_i} DAY'",
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
base_query
|
base_query
|
||||||
|
end
|
||||||
|
|
||||||
|
unioned_queries = queries.map(&:to_sql).join(" UNION ")
|
||||||
|
|
||||||
|
Post.from(Arel.sql("(#{unioned_queries}) as posts"))
|
||||||
end
|
end
|
||||||
|
|
||||||
def bulk_classify!(relation)
|
def bulk_classify!(relation)
|
||||||
|
|
Loading…
Reference in New Issue