|
| 1 | +use crate::commands::fetch_issues::parse_since; |
1 | 2 | use crate::config::Config; |
2 | 3 | use anyhow::{Context, Result}; |
3 | 4 | use reqwest::blocking::Client; |
4 | 5 | use serde::{Deserialize, Serialize}; |
5 | | -use serde_json::json; |
| 6 | +use serde_json::{json, Value}; |
| 7 | +use std::collections::BTreeMap; |
6 | 8 | use std::fs; |
7 | 9 | use std::path::Path; |
8 | 10 |
|
@@ -45,6 +47,42 @@ query($owner: String!, $name: String!, $first: Int!, $after: String) { |
45 | 47 | } |
46 | 48 | "#; |
47 | 49 |
|
| 50 | +const QUERY_SINCE: &str = r#" |
| 51 | +query($owner: String!, $name: String!, $first: Int!, $after: String) { |
| 52 | + repository(owner: $owner, name: $name) { |
| 53 | + discussions(first: $first, after: $after, orderBy: {field: UPDATED_AT, direction: DESC}) { |
| 54 | + pageInfo { |
| 55 | + endCursor |
| 56 | + hasNextPage |
| 57 | + } |
| 58 | + nodes { |
| 59 | + number |
| 60 | + title |
| 61 | + bodyText |
| 62 | + url |
| 63 | + createdAt |
| 64 | + updatedAt |
| 65 | + closedAt |
| 66 | + closed |
| 67 | + stateReason |
| 68 | + isAnswered |
| 69 | + locked |
| 70 | + author { |
| 71 | + login |
| 72 | + } |
| 73 | + category { |
| 74 | + name |
| 75 | + } |
| 76 | + comments { |
| 77 | + totalCount |
| 78 | + } |
| 79 | + upvoteCount |
| 80 | + } |
| 81 | + } |
| 82 | + } |
| 83 | +} |
| 84 | +"#; |
| 85 | + |
48 | 86 | #[derive(Deserialize, Serialize)] |
49 | 87 | #[serde(rename_all = "camelCase")] |
50 | 88 | pub struct Discussion { |
@@ -111,28 +149,129 @@ struct GraphQlResponse { |
111 | 149 | errors: Option<serde_json::Value>, |
112 | 150 | } |
113 | 151 |
|
114 | | -pub fn run(config: &Config) -> Result<()> { |
115 | | - run_with_client(&Client::new(), config) |
| 152 | +pub fn run(config: &Config, since: Option<&str>) -> Result<()> { |
| 153 | + run_with_client(&Client::new(), config, since) |
116 | 154 | } |
117 | 155 |
|
118 | | -pub fn run_with_client(client: &Client, config: &Config) -> Result<()> { |
119 | | - let discussions = fetch_all_discussions(client, config)?; |
| 156 | +pub fn run_with_client(client: &Client, config: &Config, since: Option<&str>) -> Result<()> { |
| 157 | + let since_ts = since.map(parse_since).transpose()?; |
| 158 | + |
| 159 | + let discussions = if let Some(ref ts) = since_ts { |
| 160 | + println!("Fetching discussions updated since {ts}..."); |
| 161 | + fetch_discussions_since(client, config, ts)? |
| 162 | + } else { |
| 163 | + fetch_all_discussions(client, config)? |
| 164 | + }; |
120 | 165 |
|
121 | 166 | println!("Total discussions fetched: {}", discussions.len()); |
122 | 167 |
|
123 | | - let out_dir = Path::new("out/historical/discussions"); |
124 | | - fs::create_dir_all(out_dir)?; |
125 | | - let out_file = out_dir.join(format!( |
126 | | - "{}_{}_discussions.json", |
127 | | - config.repo_owner, config.repo_name |
128 | | - )); |
129 | | - let json = serde_json::to_string_pretty(&discussions)?; |
130 | | - fs::write(&out_file, json)?; |
131 | | - println!("Saved to '{}'", out_file.display()); |
| 168 | + let repo_prefix = format!("{}_{}", config.repo_owner, config.repo_name); |
| 169 | + let out_dir = Path::new("data").join(&repo_prefix).join("discussions"); |
| 170 | + fs::create_dir_all(&out_dir)?; |
| 171 | + |
| 172 | + // Bucket by year based on createdAt |
| 173 | + let mut by_year: BTreeMap<String, Vec<Discussion>> = BTreeMap::new(); |
| 174 | + for d in discussions { |
| 175 | + let year = d.created_at.get(..4).unwrap_or("unknown").to_string(); |
| 176 | + by_year.entry(year).or_default().push(d); |
| 177 | + } |
| 178 | + |
| 179 | + for (year, items) in &by_year { |
| 180 | + let path = out_dir.join(format!("{year}.json")); |
| 181 | + let mut existing: Vec<Value> = if path.exists() { |
| 182 | + let json = fs::read_to_string(&path)?; |
| 183 | + serde_json::from_str(&json).unwrap_or_default() |
| 184 | + } else { |
| 185 | + Vec::new() |
| 186 | + }; |
| 187 | + let new_values: Vec<Value> = items.iter() |
| 188 | + .map(|d| serde_json::to_value(d).unwrap()) |
| 189 | + .collect(); |
| 190 | + existing.extend(new_values); |
| 191 | + let json_str = serde_json::to_string_pretty(&existing)?; |
| 192 | + fs::write(&path, json_str)?; |
| 193 | + println!(" {year}.json: appended {} discussions", items.len()); |
| 194 | + } |
132 | 195 |
|
133 | 196 | Ok(()) |
134 | 197 | } |
135 | 198 |
|
| 199 | +fn fetch_discussions_since(client: &Client, config: &Config, since: &str) -> Result<Vec<Discussion>> { |
| 200 | + let mut discussions = Vec::new(); |
| 201 | + let mut after: Option<String> = None; |
| 202 | + let mut page = 1; |
| 203 | + |
| 204 | + loop { |
| 205 | + println!("Fetching page {page} of discussions..."); |
| 206 | + |
| 207 | + let body = json!({ |
| 208 | + "query": QUERY_SINCE, |
| 209 | + "variables": { |
| 210 | + "owner": config.repo_owner, |
| 211 | + "name": config.repo_name, |
| 212 | + "first": PAGE_SIZE, |
| 213 | + "after": after, |
| 214 | + } |
| 215 | + }); |
| 216 | + |
| 217 | + let response = client |
| 218 | + .post(GRAPHQL_URL) |
| 219 | + .header("Authorization", format!("Bearer {}", config.github_token)) |
| 220 | + .header("Accept", "application/vnd.github+json") |
| 221 | + .header("User-Agent", "github-tools") |
| 222 | + .json(&body) |
| 223 | + .send() |
| 224 | + .context("Failed to send GraphQL request")?; |
| 225 | + |
| 226 | + if !response.status().is_success() { |
| 227 | + let status = response.status(); |
| 228 | + let text = response.text().unwrap_or_default(); |
| 229 | + eprintln!("Warning: GraphQL request failed ({status}): {text}"); |
| 230 | + break; |
| 231 | + } |
| 232 | + |
| 233 | + let result: GraphQlResponse = response |
| 234 | + .json() |
| 235 | + .context("Failed to parse GraphQL response")?; |
| 236 | + |
| 237 | + if let Some(errors) = result.errors { |
| 238 | + eprintln!("Warning: GraphQL errors: {errors}"); |
| 239 | + break; |
| 240 | + } |
| 241 | + |
| 242 | + let page_data = result |
| 243 | + .data |
| 244 | + .context("Missing 'data' in GraphQL response")? |
| 245 | + .repository |
| 246 | + .discussions; |
| 247 | + |
| 248 | + let has_next = page_data.page_info.has_next_page; |
| 249 | + after = page_data.page_info.end_cursor; |
| 250 | + |
| 251 | + let mut hit_boundary = false; |
| 252 | + for d in page_data.nodes { |
| 253 | + if d.updated_at.as_str() < since { |
| 254 | + hit_boundary = true; |
| 255 | + } else { |
| 256 | + discussions.push(d); |
| 257 | + } |
| 258 | + } |
| 259 | + |
| 260 | + println!("Fetched {} discussions so far...", discussions.len()); |
| 261 | + |
| 262 | + if hit_boundary { |
| 263 | + println!("Reached discussions older than --since cutoff."); |
| 264 | + break; |
| 265 | + } |
| 266 | + if !has_next { |
| 267 | + break; |
| 268 | + } |
| 269 | + page += 1; |
| 270 | + } |
| 271 | + |
| 272 | + Ok(discussions) |
| 273 | +} |
| 274 | + |
136 | 275 | fn fetch_all_discussions(client: &Client, config: &Config) -> Result<Vec<Discussion>> { |
137 | 276 | let mut discussions = Vec::new(); |
138 | 277 | let mut after: Option<String> = None; |
|
0 commit comments