Remove Evaluate Your Model section and update paper link to arXiv

ymathur-godaddy · ymathur-godaddy · commit 5ada56938e63 · 2026-03-22T22:41:23.000-04:00
diff --git a/docs/index.html b/docs/index.html
@@ -44,7 +44,7 @@ <h1 class="paper-title">
     </p>
 
     <div class="badge-row">
-      <a href="paper.pdf" class="badge-btn paper" target="_blank">
+      <a href="https://arxiv.org/pdf/2505.23126" class="badge-btn paper" target="_blank">
         <span class="icon">📄</span> Paper
       </a>
       <a href="https://huggingface.co/datasets/changelinglab/PBEBench-Lite" class="badge-btn dataset" target="_blank">
@@ -324,61 +324,6 @@ <h3>PBEBench-Lite on HuggingFace 🤗</h3>
   </div>
 </section>
 
-<!-- ===== EVALUATION ===== -->
-<section id="evaluation">
-  <div class="container">
-    <h2 class="section-title">Evaluate Your Model</h2>
-    <p style="color:#4a4a4a;margin-bottom:1.5rem;">
-      Follow these steps to evaluate a new model on PBEBench-Lite and submit results to the leaderboard:
-    </p>
-
-    <div class="eval-steps">
-      <div class="eval-step">
-        <p>
-          <strong>Clone the repository.</strong><br>
-          <code>git clone https://github.com/ymathur/symbolic-library-agent</code>
-          and install dependencies with <code>pip install -r requirements.txt</code>.
-        </p>
-      </div>
-      <div class="eval-step">
-        <p>
-          <strong>Download the dataset.</strong><br>
-          Load PBEBench-Lite from HuggingFace:
-          <code>datasets.load_dataset("changelinglab/PBEBench-Lite")</code>.
-          Both <code>synthesis</code> and <code>reordering</code> splits are available.
-        </p>
-      </div>
-      <div class="eval-step">
-        <p>
-          <strong>Configure your model.</strong><br>
-          Add your model's API key or local path to <code>configs/</code>.
-          Refer to <code>docs/agents.md</code> for the agent interface specification.
-        </p>
-      </div>
-      <div class="eval-step">
-        <p>
-          <strong>Run evaluation.</strong><br>
-          <code>python main.py --model your_model_name --split synthesis</code>.
-          Results are written to <code>outputs/</code> in the standard JSONL format.
-        </p>
-      </div>
-      <div class="eval-step">
-        <p>
-          <strong>Compute metrics and submit.</strong><br>
-          Use <code>scripts/compute_metrics.py</code> to compute Pass@1, EditSim, and other metrics.
-          Open a GitHub issue with your results to be added to the leaderboard.
-        </p>
-      </div>
-    </div>
-
-    <div style="margin-top:1.5rem">
-      <a href="https://github.com/ymathur/symbolic-library-agent" class="badge-btn github" target="_blank">
-        <span class="icon">💻</span> GitHub Repository
-      </a>
-    </div>
-  </div>
-</section>
-
 <!-- ===== CITATION ===== -->
 <section id="citation">
   <div class="container">