|
| 1 | +# frozen_string_literal: true |
| 2 | + |
| 3 | +module Competitive |
| 4 | + # Scheduled Sidekiq job that orchestrates the full historical backfill pipeline: |
| 5 | + # |
| 6 | + # 1. Triggers the historical backfill on the ProStaff Scraper (Leaguepedia → ES) |
| 7 | + # 2. Polls the scraper's backfill status until it finishes or times out |
| 8 | + # 3. Syncs the newly indexed matches from ES into the Rails DB |
| 9 | + # |
| 10 | + # The scraper's backfill is resumable — re-triggering it skips already-completed |
| 11 | + # tournaments. This means the job is safe to run on a schedule (e.g. daily): |
| 12 | + # first runs import the full history (~8-12h for CBLOL), subsequent runs only |
| 13 | + # process new or previously-failed tournaments (minutes). |
| 14 | + # |
| 15 | + # Configuration (environment variables): |
| 16 | + # BACKFILL_LEAGUE — league to backfill (default: 'CBLOL') |
| 17 | + # BACKFILL_MIN_YEAR — earliest year to import (default: 2013) |
| 18 | + # BACKFILL_OUR_TEAM — team name for the sync step (default: 'paiN Gaming') |
| 19 | + # BACKFILL_SYNC_LIMIT — max matches to sync per run (default: 500) |
| 20 | + # |
| 21 | + # @example Run manually from console |
| 22 | + # Competitive::HistoricalBackfillJob.perform_later |
| 23 | + # |
| 24 | + # @example Check backfill progress |
| 25 | + # ProStaffScraperService.new.historical_backfill_status(league: 'CBLOL') |
| 26 | + # |
| 27 | + class HistoricalBackfillJob < ApplicationJob |
| 28 | + queue_as :low_priority |
| 29 | + |
| 30 | + # The scraper may be temporarily unavailable — retry after 10 minutes. |
| 31 | + retry_on ProStaffScraperService::UnavailableError, wait: 10.minutes, attempts: 3 |
| 32 | + discard_on ProStaffScraperService::UnauthorizedError |
| 33 | + |
| 34 | + # How often to poll the scraper for backfill progress (seconds). |
| 35 | + POLL_INTERVAL = 5.minutes |
| 36 | + |
| 37 | + # Maximum time to wait for the scraper backfill to finish before |
| 38 | + # proceeding to the sync step anyway. The scraper's backfill is |
| 39 | + # resumable, so the next scheduled run will pick up where it left off. |
| 40 | + MAX_WAIT_TIME = 6.hours |
| 41 | + |
| 42 | + def perform |
| 43 | + league = ENV.fetch('BACKFILL_LEAGUE', 'CBLOL') |
| 44 | + min_year = ENV.fetch('BACKFILL_MIN_YEAR', '2013').to_i |
| 45 | + our_team = ENV.fetch('BACKFILL_OUR_TEAM', 'paiN Gaming') |
| 46 | + sync_limit = ENV.fetch('BACKFILL_SYNC_LIMIT', '500').to_i |
| 47 | + |
| 48 | + scraper = ProStaffScraperService.new |
| 49 | + |
| 50 | + # Step 1: Trigger the backfill on the scraper (returns immediately). |
| 51 | + Rails.logger.info( |
| 52 | + "[HistoricalBackfillJob] Triggering backfill on scraper: " \ |
| 53 | + "league=#{league} min_year=#{min_year}" |
| 54 | + ) |
| 55 | + |
| 56 | + begin |
| 57 | + trigger_result = scraper.trigger_historical_backfill( |
| 58 | + league: league, |
| 59 | + min_year: min_year |
| 60 | + ) |
| 61 | + Rails.logger.info( |
| 62 | + "[HistoricalBackfillJob] Scraper responded: #{trigger_result.inspect}" |
| 63 | + ) |
| 64 | + rescue ProStaffScraperService::ScraperError => e |
| 65 | + Rails.logger.warn( |
| 66 | + "[HistoricalBackfillJob] Scraper trigger failed: #{e.message}. " \ |
| 67 | + "Proceeding to sync step (scraper may already be running)." |
| 68 | + ) |
| 69 | + end |
| 70 | + |
| 71 | + # Step 2: Poll backfill status until completion or timeout. |
| 72 | + Rails.logger.info( |
| 73 | + "[HistoricalBackfillJob] Polling backfill status (max #{MAX_WAIT_TIME / 60}min)..." |
| 74 | + ) |
| 75 | + |
| 76 | + started_at = Time.current |
| 77 | + last_status = nil |
| 78 | + |
| 79 | + loop do |
| 80 | + elapsed = Time.current - started_at |
| 81 | + if elapsed > MAX_WAIT_TIME |
| 82 | + Rails.logger.warn( |
| 83 | + "[HistoricalBackfillJob] Max wait time exceeded (#{MAX_WAIT_TIME / 3600}h). " \ |
| 84 | + "Proceeding to sync step. Last status: #{last_status&.inspect}" |
| 85 | + ) |
| 86 | + break |
| 87 | + end |
| 88 | + |
| 89 | + begin |
| 90 | + last_status = scraper.historical_backfill_status(league: league) |
| 91 | + remaining = last_status['remaining'] || 0 |
| 92 | + completed = last_status['completed'] || 0 |
| 93 | + total = last_status['total_tournaments'] || 0 |
| 94 | + |
| 95 | + Rails.logger.info( |
| 96 | + "[HistoricalBackfillJob] Progress: #{completed}/#{total} tournaments " \ |
| 97 | + "(#{remaining} remaining)" |
| 98 | + ) |
| 99 | + |
| 100 | + # If nothing is pending/in-progress, the backfill is done. |
| 101 | + break if remaining == 0 |
| 102 | + rescue ProStaffScraperService::ScraperError => e |
| 103 | + Rails.logger.warn( |
| 104 | + "[HistoricalBackfillJob] Status poll failed: #{e.message}" |
| 105 | + ) |
| 106 | + end |
| 107 | + |
| 108 | + sleep POLL_INTERVAL |
| 109 | + end |
| 110 | + |
| 111 | + # Step 3: Sync matches from ES into Rails DB for all organizations. |
| 112 | + Rails.logger.info( |
| 113 | + "[HistoricalBackfillJob] Starting sync step: " \ |
| 114 | + "league=#{league} our_team=#{our_team} limit=#{sync_limit}" |
| 115 | + ) |
| 116 | + |
| 117 | + Organization.find_each do |org| |
| 118 | + Rails.logger.info( |
| 119 | + "[HistoricalBackfillJob] Syncing for org=#{org.id} (#{org.name})" |
| 120 | + ) |
| 121 | + SyncScraperMatchesJob.perform_later( |
| 122 | + org.id, |
| 123 | + league: league, |
| 124 | + our_team: our_team, |
| 125 | + limit: sync_limit |
| 126 | + ) |
| 127 | + end |
| 128 | + |
| 129 | + record_job_heartbeat |
| 130 | + |
| 131 | + Rails.logger.info("[HistoricalBackfillJob] Done.") |
| 132 | + end |
| 133 | + end |
| 134 | +end |
0 commit comments