From 90f4ffa31d4bda16fcda04fb837bf1c01160d417 Mon Sep 17 00:00:00 2001 From: Eugen Rochko Date: Mon, 4 Nov 2024 11:11:06 +0100 Subject: [PATCH] Fix performance of percentile calculation for annual reports (#32765) --- app/lib/annual_report.rb | 10 ++++ app/lib/annual_report/percentiles.rb | 55 +++++-------------- app/lib/annual_report/source.rb | 8 +++ .../statuses_per_account_count.rb | 15 +++++ ...nual_report_statuses_per_account_counts.rb | 13 +++++ db/schema.rb | 9 ++- spec/lib/annual_report/percentiles_spec.rb | 14 +++-- 7 files changed, 77 insertions(+), 47 deletions(-) create mode 100644 app/models/annual_report/statuses_per_account_count.rb create mode 100644 db/migrate/20241104082851_create_annual_report_statuses_per_account_counts.rb diff --git a/app/lib/annual_report.rb b/app/lib/annual_report.rb index cf4297f2a4..275cc4b87d 100644 --- a/app/lib/annual_report.rb +++ b/app/lib/annual_report.rb @@ -17,11 +17,21 @@ class AnnualReport SCHEMA = 1 + def self.table_name_prefix + 'annual_report_' + end + def initialize(account, year) @account = account @year = year end + def self.prepare(year) + SOURCES.each do |klass| + klass.prepare(year) + end + end + def generate return if GeneratedAnnualReport.exists?(account: @account, year: @year) diff --git a/app/lib/annual_report/percentiles.rb b/app/lib/annual_report/percentiles.rb index 0251cb66ad..2b0305c415 100644 --- a/app/lib/annual_report/percentiles.rb +++ b/app/lib/annual_report/percentiles.rb @@ -1,62 +1,37 @@ # frozen_string_literal: true class AnnualReport::Percentiles < AnnualReport::Source + def self.prepare(year) + AnnualReport::StatusesPerAccountCount.connection.exec_query(<<~SQL.squish, nil, [year, Mastodon::Snowflake.id_at(DateTime.new(year).beginning_of_year), Mastodon::Snowflake.id_at(DateTime.new(year).end_of_year)]) + INSERT INTO annual_report_statuses_per_account_counts (year, account_id, statuses_count) + SELECT $1, account_id, count(*) + FROM statuses + WHERE id BETWEEN $2 AND $3 + AND (local OR uri IS NULL) + GROUP BY account_id + ON CONFLICT (year, account_id) DO NOTHING + SQL + end + def generate { percentiles: { - followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100, - statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100, + statuses: 100.0 - ((total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100), }, } end private - def followers_gained - @followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count - end - def statuses_created @statuses_created ||= report_statuses.count end - def total_with_fewer_followers - @total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total - WITH tmp0 AS ( - SELECT follows.target_account_id - FROM follows - INNER JOIN accounts ON accounts.id = follows.target_account_id - WHERE date_part('year', follows.created_at) = :year - AND accounts.domain IS NULL - GROUP BY follows.target_account_id - HAVING COUNT(*) < :comparison - ) - SELECT count(*) AS total - FROM tmp0 - SQL - end - def total_with_fewer_statuses - @total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total - WITH tmp0 AS ( - SELECT statuses.account_id - FROM statuses - INNER JOIN accounts ON accounts.id = statuses.account_id - WHERE statuses.id BETWEEN :min_id AND :max_id - AND accounts.domain IS NULL - GROUP BY statuses.account_id - HAVING count(*) < :comparison - ) - SELECT count(*) AS total - FROM tmp0 - SQL - end - - def total_with_any_followers - @total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id') + @total_with_fewer_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).where(statuses_count: ...statuses_created).count end def total_with_any_statuses - @total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id') + @total_with_any_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).count end end diff --git a/app/lib/annual_report/source.rb b/app/lib/annual_report/source.rb index cb9f7b16e3..86528731f5 100644 --- a/app/lib/annual_report/source.rb +++ b/app/lib/annual_report/source.rb @@ -8,6 +8,14 @@ class AnnualReport::Source @year = year end + def self.prepare(_year) + # Use this method if any pre-calculations must be made before individual annual reports are generated + end + + def generate + raise NotImplementedError + end + protected def report_statuses diff --git a/app/models/annual_report/statuses_per_account_count.rb b/app/models/annual_report/statuses_per_account_count.rb new file mode 100644 index 0000000000..05a2f53c9d --- /dev/null +++ b/app/models/annual_report/statuses_per_account_count.rb @@ -0,0 +1,15 @@ +# frozen_string_literal: true + +# == Schema Information +# +# Table name: annual_report_statuses_per_account_counts +# +# id :bigint(8) not null, primary key +# year :integer not null +# account_id :bigint(8) not null +# statuses_count :bigint(8) not null +# + +class AnnualReport::StatusesPerAccountCount < ApplicationRecord + # This table facilitates percentile calculations +end diff --git a/db/migrate/20241104082851_create_annual_report_statuses_per_account_counts.rb b/db/migrate/20241104082851_create_annual_report_statuses_per_account_counts.rb new file mode 100644 index 0000000000..c8ec30ad57 --- /dev/null +++ b/db/migrate/20241104082851_create_annual_report_statuses_per_account_counts.rb @@ -0,0 +1,13 @@ +# frozen_string_literal: true + +class CreateAnnualReportStatusesPerAccountCounts < ActiveRecord::Migration[7.1] + def change + create_table :annual_report_statuses_per_account_counts do |t| # rubocop:disable Rails/CreateTableWithTimestamps + t.integer :year, null: false + t.bigint :account_id, null: false + t.bigint :statuses_count, null: false + end + + add_index :annual_report_statuses_per_account_counts, [:year, :account_id], unique: true + end +end diff --git a/db/schema.rb b/db/schema.rb index bea9ad46be..ec029366a5 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do +ActiveRecord::Schema[7.1].define(version: 2024_11_04_082851) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -260,6 +260,13 @@ ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do t.bigint "status_ids", array: true end + create_table "annual_report_statuses_per_account_counts", force: :cascade do |t| + t.integer "year", null: false + t.bigint "account_id", null: false + t.bigint "statuses_count", null: false + t.index ["year", "account_id"], name: "idx_on_year_account_id_ff3e167cef", unique: true + end + create_table "appeals", force: :cascade do |t| t.bigint "account_id", null: false t.bigint "account_warning_id", null: false diff --git a/spec/lib/annual_report/percentiles_spec.rb b/spec/lib/annual_report/percentiles_spec.rb index 1d1df3166b..11df81cfb6 100644 --- a/spec/lib/annual_report/percentiles_spec.rb +++ b/spec/lib/annual_report/percentiles_spec.rb @@ -4,17 +4,20 @@ require 'rails_helper' RSpec.describe AnnualReport::Percentiles do describe '#generate' do - subject { described_class.new(account, Time.zone.now.year) } + subject { described_class.new(account, year) } + + let(:year) { Time.zone.now.year } context 'with an inactive account' do let(:account) { Fabricate :account } it 'builds a report for an account' do + described_class.prepare(year) + expect(subject.generate) .to include( percentiles: include( - followers: 0, - statuses: 0 + statuses: 100 ) ) end @@ -25,16 +28,15 @@ RSpec.describe AnnualReport::Percentiles do before do Fabricate.times 2, :status # Others as `account` - Fabricate.times 2, :follow # Others as `target_account` Fabricate.times 2, :status, account: account - Fabricate.times 2, :follow, target_account: account end it 'builds a report for an account' do + described_class.prepare(year) + expect(subject.generate) .to include( percentiles: include( - followers: 50, statuses: 50 ) )