Skip to content

Commit 033c9d6

Browse files
adding standings pipelines (#247)
1 parent 9cb4299 commit 033c9d6

3 files changed

Lines changed: 170 additions & 0 deletions

File tree

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
import dagster as dg
2+
import polars as pl
3+
import requests
4+
from bs4 import BeautifulSoup
5+
6+
def seasons(year: int) -> list[dict]:
7+
url = "https://labs.limitlesstcg.com/"
8+
r = requests.get(url)
9+
soup = BeautifulSoup(r.content, 'html.parser')
10+
11+
season_header = soup.find("h2", string=lambda x: x and f"{year}" in x)
12+
tournament_list = season_header.find_next_sibling("ul")
13+
14+
tournaments = []
15+
for a in tournament_list.find_all("a", href=True):
16+
parts = list(a.stripped_strings)
17+
tournaments.append({
18+
"name": parts[0],
19+
"date": parts[-1],
20+
"link": f"https://labs.limitlesstcg.com{a['href']}",
21+
})
22+
23+
return tournaments
24+
25+
26+
def build_standings(tournament: dict) -> pl.DataFrame | None:
27+
tournament_id = tournament["link"].split("/")[-2]
28+
29+
r = requests.get(tournament["link"])
30+
soup = BeautifulSoup(r.content, 'html.parser')
31+
32+
table = soup.find("table", class_="data-table striped")
33+
34+
if table:
35+
headers = ['Rank', 'Name', 'Country', 'Points', 'Record', 'OPW%', 'OOPW%', 'Deck', 'Decklist', 'Unknown']
36+
37+
rows = []
38+
tbody = table.find('tbody')
39+
40+
for tr in tbody.find_all('tr'):
41+
cells = tr.find_all('td')
42+
43+
if len(cells) == 1:
44+
continue
45+
46+
row_data = []
47+
for i, td in enumerate(cells):
48+
if i == 2:
49+
img = td.find('img')
50+
if img:
51+
country = img.get('alt') or img.get('title') or ''
52+
row_data.append(country)
53+
else:
54+
row_data.append('')
55+
56+
elif i == 7: # Deck column
57+
pokemon_imgs = td.find_all('img', class_='pokemon')
58+
if pokemon_imgs:
59+
pokemon_names = [img.get('alt', '') for img in pokemon_imgs if img.get('alt')]
60+
pokemon_string = '/'.join(pokemon_names)
61+
row_data.append(pokemon_string)
62+
else:
63+
row_data.append('')
64+
65+
elif i == 8: # Decklist column
66+
link = td.find('a')
67+
if link:
68+
decklist_url = link.get('href', '')
69+
row_data.append(f"https://labs.limitlesstcg.com{decklist_url}" if decklist_url else '')
70+
else:
71+
row_data.append('')
72+
73+
else:
74+
cell_text = td.get_text(strip=True)
75+
row_data.append(cell_text)
76+
77+
rows.append(row_data)
78+
79+
df = pl.DataFrame(rows, schema=headers, orient="row")
80+
81+
df = df.drop("Unknown")
82+
83+
df = df.with_columns(pl.lit(tournament_id).alias("tournament_id"))
84+
85+
df = df.rename(
86+
{
87+
"Rank": "rank",
88+
"Name": "name",
89+
"Country": "country",
90+
"Points": "points",
91+
"Record": "record",
92+
"OPW%": "opp_win_percent",
93+
"OOPW%": "opp_opp_win_percent",
94+
"Deck": "deck",
95+
"Decklist": "decklist",
96+
},
97+
)
98+
99+
df = df.cast({"rank": pl.Int16, "points": pl.Int16})
100+
101+
return df
102+
103+
return None
104+
105+
106+
@dg.asset(kinds={"Polars"}, name="create_standings_dataframe")
107+
def create_standings_dataframe() -> pl.DataFrame:
108+
tournaments = seasons(2026)
109+
110+
dfs = []
111+
for t in tournaments:
112+
df = build_standings(t)
113+
if df is not None:
114+
dfs.append(df)
115+
print(f"Loaded {df.shape[0]} rows from {t['name']}")
116+
117+
return pl.concat(dfs)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import dagster as dg
2+
import polars as pl
3+
from dagster import Backoff, RetryPolicy
4+
from sqlalchemy.exc import OperationalError
5+
from termcolor import colored
6+
7+
from ....utils.secret_retriever import fetch_secret
8+
9+
10+
@dg.asset(
11+
kinds={"Supabase", "Postgres"},
12+
name="load_standings_data",
13+
retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL),
14+
)
15+
def load_standings_data(create_standings_dataframe: pl.DataFrame) -> None:
16+
database_url: str = fetch_secret()
17+
table_name: str = "staging.standings"
18+
19+
df = create_standings_dataframe
20+
try:
21+
df.write_database(
22+
table_name=table_name, connection=database_url, if_table_exists="replace"
23+
)
24+
print(colored(" ✓", "green"), f"Data loaded into {table_name}")
25+
except OperationalError as e:
26+
print(colored(" ✖", "red"), "Connection error in load_card_data():", e)
27+
raise
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{{ config(
2+
materialized='table',
3+
post_hook="{{ enable_rls() }}"
4+
) }}
5+
6+
SELECT
7+
s.rank,
8+
s.name,
9+
s.points,
10+
s.record,
11+
s.opp_win_percent,
12+
s.opp_opp_win_percent,
13+
s.deck,
14+
s.decklist,
15+
c.country_name AS player_country,
16+
t.location,
17+
t.start_date,
18+
t.end_date,
19+
t.type,
20+
t.player_quantity
21+
FROM
22+
{{ source('staging', 'standings') }} AS s
23+
INNER JOIN {{ source('staging', 'tournaments') }} AS t
24+
ON s.tournament_id = t.tournament_id
25+
INNER JOIN {{ source('staging', 'country_codes') }} AS c
26+
ON s.country = c.code

0 commit comments

Comments
 (0)