diff --git a/.gitignore b/.gitignore index a14702c4..e06d00f8 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,5 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json # Finder (MacOS) folder config .DS_Store + +.qodo \ No newline at end of file diff --git a/agents/agents/credit-assigner.ts b/agents/agents/credit-assigner.ts index 895bfd61..8ac9defd 100644 --- a/agents/agents/credit-assigner.ts +++ b/agents/agents/credit-assigner.ts @@ -10,5 +10,6 @@ export const creditAssignmentAgent = new Agent({ You are given a list of applications reviews and you should assign a score between 0.00 and 1.00 to each review based on how much funding the project deserve. The total score of all reviews should be 1.00. `, - model: google("gemini-2.0-flash-thinking-exp-01-21"), + //model: google("gemini-2.0-flash-thinking-exp-01-21"), + model: openai("gpt-4.1-2025-04-14"), }); diff --git a/scores/credit-assignment-gitcoin-communist.csv b/scores/credit-assignment-gitcoin-communist.csv new file mode 100644 index 00000000..921c4c77 --- /dev/null +++ b/scores/credit-assignment-gitcoin-communist.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.3125 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO,0.2250 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,ÆRTH - Planetary AI,0.0875 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.2500 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.1250 \ No newline at end of file diff --git a/scores/credit-assignment-open-source-capitalist.csv b/scores/credit-assignment-open-source-capitalist.csv new file mode 100644 index 00000000..6fe5960c --- /dev/null +++ b/scores/credit-assignment-open-source-capitalist.csv @@ -0,0 +1,5 @@ +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.3500 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO,0.2500 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,ÆRTH - Planetary AI,0.1000 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.1500 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.1500 \ No newline at end of file diff --git a/scores/credit-assignment-regenerator.csv b/scores/credit-assignment-regenerator.csv new file mode 100644 index 00000000..29d23592 --- /dev/null +++ b/scores/credit-assignment-regenerator.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.3800 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO,0.1800 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c0c093c3efc5a6f1b518a3792d5e8b721860,ÆRTH - Planetary AI,0.0800 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.2300 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.1300 \ No newline at end of file diff --git a/scores/elo-credit-assignment-gitcoin-communist-from-review-only.csv b/scores/elo-credit-assignment-gitcoin-communist-from-review-only.csv new file mode 100644 index 00000000..fae0c4f7 --- /dev/null +++ b/scores/elo-credit-assignment-gitcoin-communist-from-review-only.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,0.206004 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.187525 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,0.200414 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,0.194112 \ No newline at end of file diff --git a/scores/elo-credit-assignment-gitcoin-communist-review-and-data.csv b/scores/elo-credit-assignment-gitcoin-communist-review-and-data.csv new file mode 100644 index 00000000..21baec7f --- /dev/null +++ b/scores/elo-credit-assignment-gitcoin-communist-review-and-data.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,AERTH,0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.200694 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.199940 \ No newline at end of file diff --git a/scores/elo-credit-assignment-gitcoin-communist.csv b/scores/elo-credit-assignment-gitcoin-communist.csv new file mode 100644 index 00000000..44db4cf4 --- /dev/null +++ b/scores/elo-credit-assignment-gitcoin-communist.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.194294 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.206340 \ No newline at end of file diff --git a/scores/elo-credit-assignment-open-source-capitalist-from-review-only.csv b/scores/elo-credit-assignment-open-source-capitalist-from-review-only.csv new file mode 100644 index 00000000..b54ee566 --- /dev/null +++ b/scores/elo-credit-assignment-open-source-capitalist-from-review-only.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,0.206004 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.193925 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,0.200120 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,0.188006 \ No newline at end of file diff --git a/scores/elo-credit-assignment-open-source-capitalist-review-and-data.csv b/scores/elo-credit-assignment-open-source-capitalist-review-and-data.csv new file mode 100644 index 00000000..21baec7f --- /dev/null +++ b/scores/elo-credit-assignment-open-source-capitalist-review-and-data.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,AERTH,0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.200694 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.199940 \ No newline at end of file diff --git a/scores/elo-credit-assignment-open-source-capitalist.csv b/scores/elo-credit-assignment-open-source-capitalist.csv new file mode 100644 index 00000000..44db4cf4 --- /dev/null +++ b/scores/elo-credit-assignment-open-source-capitalist.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.194294 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.206340 \ No newline at end of file diff --git a/scores/elo-credit-assignment-regenerator-from-review-only.csv b/scores/elo-credit-assignment-regenerator-from-review-only.csv new file mode 100644 index 00000000..774afb03 --- /dev/null +++ b/scores/elo-credit-assignment-regenerator-from-review-only.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,0.199898 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.194192 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,0.205938 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,0.188027 \ No newline at end of file diff --git a/scores/elo-credit-assignment-regenerator-review-and-data.csv b/scores/elo-credit-assignment-regenerator-review-and-data.csv new file mode 100644 index 00000000..4a5d7be6 --- /dev/null +++ b/scores/elo-credit-assignment-regenerator-review-and-data.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,AERTH, 0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.200694 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.199940 \ No newline at end of file diff --git a/scores/elo-credit-assignment-regenerator.csv b/scores/elo-credit-assignment-regenerator.csv new file mode 100644 index 00000000..27e2531b --- /dev/null +++ b/scores/elo-credit-assignment-regenerator.csv @@ -0,0 +1,6 @@ +id,name,score +42161-867-0x62f25a11c2ae5a2af563cc5b1f772b3aebe1bd4a0a82e41a78e61e1db972ad7e,GainForest,0.211946 +42161-867-0xd089724cd73c932413bce5c797aee7d2fbcd1ad282f24cff790977e77908fdca,Treegens DAO🌳,0.199604 +42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,42161-867-0x5a35dc4ee0fd8cf69eb9f227b626c0c093c3efc5a6f1b518a3792d5e8b721860,0.187817 +42161-867-0xe573019b9f23a496663f5944a83c8acdc99792bfc5f5ad603ee8f6cb0f46f9fe,Hydrapad,0.200694 +42161-865-0x9119659eb8173b32bb4423f83702ee30c1e1db49ae0c07b00263bf3ea7f4d4ef,Deep Funding,0.199940 \ No newline at end of file diff --git a/scripts/credit-assignment-elo.ts b/scripts/credit-assignment-elo.ts new file mode 100644 index 00000000..fc7e48f1 --- /dev/null +++ b/scripts/credit-assignment-elo.ts @@ -0,0 +1,161 @@ +import { + fetchModelSpecs, + getApplicationId, + loadApplicationsFromDirectory, + loadReview, + saveFile, + loadApplication, + loadKarmaGap, + loadResearch, + getProjectName, +} from "../utils/utils"; + +import { creditAssignmentAgent } from "../agents/agents/credit-assigner"; + +// Core Elo scoring parameters +const BASE_RATING = 1000; +const K_FACTOR = 32; // Adjust this to control how volatile the scores are + +// Helper: Calculate expected score between two ratings +function expectedScore(ratingA: number, ratingB: number): number { + return 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400)); +} + +// Helper: Update Elo rating after a matchup +function updateElo(rating: number, expected: number, actual: number): number { + return rating + K_FACTOR * (actual - expected); +} + +async function main() { + // Load all applications from the directory + const applications = loadApplicationsFromDirectory(); + console.log(`Processing ${applications.length} applications...`); + + // Pre-load all necessary data for each application + console.log("Pre-loading application data (app, research, karmagap)..."); + const applicationDataMap = new Map(); + for (const app of applications) { + const id = getApplicationId(app); + const name = getProjectName(app) || id; + applicationDataMap.set(id, { + id, + name, + application: loadApplication(id), + research: loadResearch(id), + karmaGap: loadKarmaGap(id), + }); + } + console.log("Finished pre-loading data."); + + // Get the available review models/agents + const modelSpecs = await fetchModelSpecs(); + + // Load all reviews for all agents, associating with pre-loaded data + const reviewsByAgent = modelSpecs.reduce((acc, agent) => { + const agentName = agent?.name; + acc[agentName] = applications + .map((app) => { + const id = getApplicationId(app); + const baseData = applicationDataMap.get(id); + if (!baseData) return null; + + const reviewData = loadReview(id, agentName); + if (!reviewData) return null; + + return { ...baseData, review: reviewData }; + }) + .filter(Boolean); + return acc; + }, {} as Record); + + // Loop through each agent's reviews + for (const [agentName, agentApplicationsData] of Object.entries(reviewsByAgent)) { + console.log(`\n🎯 Running Elo tournament for agent: ${agentName}`); + + // Initialize all ratings + const ratings: Record = {}; + for (const { id } of agentApplicationsData) { + ratings[id] = BASE_RATING; + } + + // Run simulated pairwise matchups (round-robin style) + for (let i = 0; i < agentApplicationsData.length; i++) { + for (let j = i + 1; j < agentApplicationsData.length; j++) { + const appA = agentApplicationsData[i]!; + const appB = agentApplicationsData[j]!; + + // Prepare data snippets for the prompt (avoid stringifying huge objects) + const projectAData = { + title: appA.name, + application_summary: appA.application?.project?.description, + research_summary: appA.research?.summary, + karmagap_score: appA.karmaGap?.score, + reviewer_comment: appA.review?.comments + }; + const projectBData = { + title: appB.name, + application_summary: appB.application?.project?.description, + research_summary: appB.research?.summary, + karmagap_score: appB.karmaGap?.score, + reviewer_comment: appB.review?.comments + }; + + const prompt = ` +You are a grant allocator reviewing two projects. Consider all available information. + +Choose the one that deserves *more funding*, based on impact, clarity, roadmap, potential, and overall quality presented in the data below. + +You are essentially a judge in the tournament which gives a score based on each Agent's review, so it's important you strongly consider reviewer_comment along with the metric data provided. +Respond ONLY with "A" or "B". Do NOT explain. + +--- Project A --- +${JSON.stringify(projectAData, null, 2)} + +--- Project B --- +${JSON.stringify(projectBData, null, 2)} +`; + + const result = await creditAssignmentAgent.generate(prompt); + const winner = result.text.trim().toUpperCase(); + + const ratingA = ratings[appA.id]!; + const ratingB = ratings[appB.id]!; + const expectedA = expectedScore(ratingA, ratingB); + const expectedB = expectedScore(ratingB, ratingA); + + if (winner === "A") { + ratings[appA.id] = updateElo(ratingA, expectedA, 1); + ratings[appB.id] = updateElo(ratingB, expectedB, 0); + } else if (winner === "B") { + ratings[appA.id] = updateElo(ratingA, expectedA, 0); + ratings[appB.id] = updateElo(ratingB, expectedB, 1); + } else { + console.warn(`⚠️ Unexpected response: ${result.text}`); + } + } + } + + // Normalize scores so they sum to 1 (for funding allocation) + const totalScore = Object.values(ratings).reduce((sum, score) => sum + score, 0); + const normalized = Object.entries(ratings).map(([id, score]) => { + const name = applicationDataMap.get(id)?.name || id; + return { + id, + name, + score: (score / totalScore).toFixed(6), + }; + }); + + // Prepare output CSV format + const output = ["id,name,score", ...normalized.map((r) => `${r.id},${r.name},${r.score}`)].join("\n"); + + // Save results to file + saveFile(`scores/elo-credit-assignment-${agentName}.csv`, output); + console.log(`✅ Saved results for ${agentName}`); + } +} + +main().catch((error) => { + console.error("❌ Error:", error); + process.exit(1); +}); diff --git a/utils/utils.ts b/utils/utils.ts index c2138711..0c48b83a 100644 --- a/utils/utils.ts +++ b/utils/utils.ts @@ -135,6 +135,15 @@ export function loadReview(applicationId: string, agent: string): any { return null; } } +export function loadApplication(applicationId: string): any { + try { + return JSON.parse( + readFileSync(getApplicationPath(applicationId) + "/application.json", "utf8") + ); + } catch (error) { + return null; + } +} export function loadKarmaGap(applicationId: string): any { try { return JSON.parse( @@ -174,6 +183,9 @@ export async function fetchModelSpecs(): Promise< constitution: await fetch( `${contentURL}/${name}/modelspec/constitution.md` ).then((r) => r.text()), + scoringRubric: await fetch( + `${contentURL}/${name}/modelspec/scoring-rubric.md` + ).then((r) => r.text()), })) ); }