switch back to s3df to grab data file

sschmidt23 · sschmidt23 · commit 9dcf3332504c · 2026-02-10T18:00:22.000-08:00
diff --git a/examples/rail_project_example.ipynb b/examples/rail_project_example.ipynb
@@ -0,0 +1,237 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3f31a160-8ce9-4fe6-8805-9df7a97b7adc",
+   "metadata": {},
+   "source": [
+    "## Using `RailProject` \n",
+    "\n",
+    "This notebook will show you the basics using the `RailProject` class to manage an analysis project"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d436cc4a-49c9-4f27-93cf-9a196d251a77",
+   "metadata": {},
+   "source": [
+    "### Setup and teardown scripts to setup a test area"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96bee599-4711-4a07-b182-1cbe257ea4d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from rail.projects import library\n",
+    "\n",
+    "check_dir = os.path.basename(os.path.abspath(os.curdir))\n",
+    "if check_dir == 'examples':\n",
+    "    os.chdir('..')\n",
+    "\n",
+    "setup = library.setup_project_area()\n",
+    "assert setup == 0\n",
+    "\n",
+    "# use this to cleanup\n",
+    "# library.teardown_project_area()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "454c1464-aa61-4a0c-96b9-f48f5ddb94f6",
+   "metadata": {},
+   "source": [
+    "### Load the test project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7136e23a-43f1-4a36-ba77-03b9a649c449",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rail.projects import RailProject\n",
+    "\n",
+    "project = RailProject.load_config(\"tests/ci_project.yaml\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57c75f4c-23f4-415b-8148-5b39754b5770",
+   "metadata": {},
+   "source": [
+    "### Inspect the test project"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f47f06c0-2718-4cdd-aaa9-156341336907",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "catalog_files_truth = project.get_catalog_files(\"truth\")\n",
+    "print(catalog_files_truth)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d37d65cf-6fbe-4fcd-818f-6eaa6a0aa88d",
+   "metadata": {},
+   "source": [
+    "### Run a data reduction algorithm on the test project data\n",
+    "\n",
+    "This will use the \"roman_rubin\" reducer to apply the \"gold\" selection to the \"truth\" catalog to make a \"reduced\" catalog"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1e4a8135-8131-473b-a132-b9be2109dbe0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project.reduce_data(\n",
+    "    catalog_template=\"truth\",\n",
+    "    output_catalog_template=\"reduced\",\n",
+    "    reducer_class_name=\"roman_rubin\",\n",
+    "    input_selection=\"\",\n",
+    "    selection=\"gold\",\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a70ff5d0-2e93-47e9-b5d4-7d791e883d9f",
+   "metadata": {},
+   "source": [
+    "### Subsample the test project\n",
+    "\n",
+    "This will use the \"random_subsampler\" to apply the \"train_10\" subsample to the \"reduced\" catalog of the baseline flavor with the gold selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e6b79133-b695-433c-a47f-91bfa9fffc97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project.subsample_data(\n",
+    "    catalog_template=\"reduced\",\n",
+    "    file_template=\"train_file_10\",\n",
+    "    subsampler_class_name=\"random_subsampler\",\n",
+    "    subsample_name=\"train_10\",\n",
+    "    flavor=\"baseline\",\n",
+    "    selection=\"gold\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "45aede11-ea93-4dab-b7e2-e2d108079ac3",
+   "metadata": {},
+   "source": [
+    "### Build all the pipelines for the \"baseline\" flavor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90ca72b7-bd09-4774-996b-8f4e6ca2bc6f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "project.build_pipelines(flavor=\"baseline\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37354004-48a7-48eb-af8f-b87154be1c8a",
+   "metadata": {},
+   "source": [
+    "### Generate the commands to run the \"pz\" pipeline in the \"baseline\" flavor for the \"gold\" selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2dd52439-1f2e-4ca6-91c6-ed2b9ffb859d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "single_ceci_command = project.make_pipeline_single_input_command(\n",
+    "    pipeline_name=\"pz\",\n",
+    "    flavor=\"basline\",\n",
+    "    selection=\"gold\",\n",
+    ")\n",
+    "print(single_ceci_command)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ff29b713-902e-41a5-94ce-f852eb5e461e",
+   "metadata": {},
+   "source": [
+    "### Generate the commands to run the \"spec_selection\" pipeline in the \"baseline\" flavor for the \"gold\" selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c6461441-b51e-42f0-bb86-9a5d3b0513c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ceci_catalog_commands = project.make_pipeline_catalog_commands(\n",
+    "    pipeline_name=\"spec_selection\",\n",
+    "    flavor=\"basline\",\n",
+    "    selection=\"gold\",\n",
+    "    spec_selections = list(project.get_spec_selections().keys()),\n",
+    ")\n",
+    "print(ceci_catalog_commands)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e8f3aee-ae84-4a99-9835-797350b94cbf",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13d8b9e3-7603-45d2-bc71-4b50c326c102",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "testenv",
+   "language": "python",
+   "name": "testenv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/src/rail/projects/library.py b/src/rail/projects/library.py
@@ -236,8 +236,8 @@ def setup_project_area() -> int:  # pragma: no cover
 
     if not os.path.exists("tests/ci_test.tgz"):
         urllib.request.urlretrieve(
-            "https://portal.nersc.gov/cfs/lsst/PZ/test_data/ci_test.tgz",
-            # "http://s3df.slac.stanford.edu/people/echarles/xfer/ci_test.tgz",
+            # "https://portal.nersc.gov/cfs/lsst/PZ/test_data/ci_test.tgz",
+            "http://s3df.slac.stanford.edu/people/echarles/xfer/ci_test.tgz",
             "tests/ci_test.tgz",
         )
         if not os.path.exists("tests/ci_test.tgz"):