Anxhela21
diff --git a/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 154 additions & 0 deletions b/‎.github/workflows/e2e_tests.yaml‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎tests/integration/lightspeed-stack.yaml‎
Lines changed: 26 additions & 0 deletions b/‎tests/integration/lightspeed-stack.yaml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎tests/integration/run.yaml‎
Lines changed: 148 additions & 0 deletions b/‎tests/integration/run.yaml‎
Lines changed: 148 additions & 0 deletions
@@ -0,0 +1,154 @@
+# E2E integration tests with Lightspeed Core
+name: E2E Lightspeed Evaluation Integration Tests
+
+on: [push, pull_request]
+
+jobs:
+  ##########
+  e2e_tests:
+    runs-on: ubuntu-latest
+
+    #name: "Lightspeed-stack setup"
+
+    strategy:
+      # For local testing use matrix with just one variant, "act" doesn't separate runs
+      matrix:
+        mode: ["query", "streaming"]
+        eval-data: ["tests/integration/test_evaluation_data.yaml"]
+        lsc_image_path: ["quay.io/lightspeed-core/lightspeed-stack:latest"]
+    
+    name: "E2E Lightspeed Evaluation Test, mode: ${{ matrix.mode }}"
+
+    env:
+      LSC_IMAGE_NAME: "lightspeed-stack-test-mode-${{ matrix.mode }}"
+
+    steps:
+      # Stolen from lightspeed-stack
+      - uses: actions/checkout@v4
+        with:
+          # On PR_TARGET → the fork (or same repo) that opened the PR.
+          # On push      → falls back to the current repository.
+          repository: ${{ github.event.pull_request.head.repo.full_name || github.repository }}
+
+          # On PR_TARGET → the PR head *commit* (reproducible).
+          # On push      → the pushed commit that triggered the workflow.
+          ref: ${{ github.event.pull_request.head.ref || github.sha }}
+
+          # Don’t keep credentials when running untrusted PR code under PR_TARGET.
+          persist-credentials: ${{ github.event_name != 'pull_request_target' }}
+
+      - name: Verify actual git checkout result
+        run: |
+          echo "=== Git Status After Checkout ==="
+          echo "Remote URLs:"
+          git remote -v
+          echo ""
+          echo "Current branch: $(git branch --show-current 2>/dev/null || echo 'detached HEAD')"
+          echo "Current commit: $(git rev-parse HEAD)"
+          echo "Current commit message: $(git log -1 --oneline)"
+          echo ""
+          echo "=== Recent commits ==="
+          git log --oneline -5
+
+      # Run LSC
+      # Can't be in onetime separate job -- networking is not shared between jobs
+      - name: Run Lightspeed Stack (LSC)
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          echo "==========Running Lightspeed Core======="
+          docker run \
+            --name $LSC_IMAGE_NAME \
+            -p 8080:8080 \
+            -v $(pwd)/tests/integration/lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:Z \
+            -v $(pwd)/tests/integration/run.yaml:/app-root/run.yaml:Z \
+            -e OPENAI_API_KEY="${OPENAI_API_KEY}" \
+            --detach \
+            ${{ matrix.lsc_image_path }}
+          echo "==========Running Lightspeed Core Done======="
+
+      - name: Show logs from the LSC
+        run: |
+          sleep 2
+          docker container ls -a
+          docker logs $LSC_IMAGE_NAME
+
+      # Wait for LSC
+      - name: Wait for the LSC
+        run: |
+          echo "Waiting for service on port 8080..."
+          for i in {1..30}; do
+            if curl --output /dev/null --fail http://localhost:8080/v1/models ; then
+              echo "Service is up!"
+              exit 0
+            fi
+            docker logs -n 10 $LSC_IMAGE_NAME
+            echo "Still waiting..."
+            sleep 2
+          done
+
+          echo "Service did not start in time"
+          exit 1
+
+      # Query mode
+      - name: Set query mode
+        if: matrix.mode == 'query'
+        run: |
+          echo "CONFIG=./tests/integration/system-config-query.yaml" >> $GITHUB_ENV
+
+      - name: Set streaming mode
+        if: matrix.mode == 'streaming'
+        run: |
+          echo "CONFIG=./tests/integration/system-config-streaming.yaml" >> $GITHUB_ENV
+
+      # Dependencies
+      - name: Install dependencies for Lightspeed Evaluation
+        env:
+          TERM: xterm-256color
+          FORCE_COLOR: 1
+        run: |
+          echo "Installing e2e tests dependencies"
+          pip install --break-system-packages uv
+          uv sync
+      
+      # Run the tests
+      - name: Run the tests
+        env:
+          TERM: xterm-256color
+          FORCE_COLOR: 1
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          echo "============================="
+          echo "Running..."
+          echo "  config: ${CONFIG}" 
+          echo "  LSC image: ${{ matrix.lsc_image_path }}"
+          echo "============================="
+          uv run lightspeed-eval --system-config "${CONFIG}" --eval-data "${{ matrix.eval-data }}"
+      
+      # Check the result
+      - name: Check test result
+        run: |
+          OUT_FILES=( eval_output/evaluation_*_summary.json )
+          if [ ${#OUT_FILES[@]} != 1 ] ; then
+            echo "Multiple output files: " eval_output/evaluation_*_summary.json
+            exit 1
+          fi
+          OUT_FILE=${OUT_FILES[0]}
+          PASS=$( jq .summary_stats.overall.PASS $OUT_FILE )
+          EXPECTED="1"
+          if [ ${PASS} != ${EXPECTED} ] ; then
+            echo "============"
+            echo "Wrong PASS number in ${OUT_FILE}: got ${PASS}, expected ${EXPECTED}"
+            echo "============"
+            exit 1
+          fi
+
+
+      # Cleanup
+      - name: Stop the LSC if in local devel
+        if: ${{ always() && env.ACT }}
+        run: |
+          echo "Stopping LSC container $LSC_IMAGE_NAME"
+          echo "++++++++++++++++++++++"
+          docker stop $LSC_IMAGE_NAME || true
+          docker rm $LSC_IMAGE_NAME || true
@@ -189,3 +189,6 @@ eval_output*/
 wip*/
 
 .history/
+
+# Used in e2e tests local testing
+.secrets
@@ -0,0 +1,26 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Uses a remote llama-stack service
+  # The instance would have already been started with a llama-stack-run.yaml file
+  # use_as_library_client: false
+
+  # Alternative for "as library use"
+  use_as_library_client: true
+  library_client_config_path: /app-root/run.yaml
+  url: http://localhost:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+
+authentication:
+  module: "noop"
@@ -0,0 +1,148 @@
+version: 2
+
+apis:
+- agents
+- batches
+- datasetio
+- eval
+- files
+- inference
+- safety
+- scoring
+- tool_runtime
+- vector_io
+      
+benchmarks: []
+datasets: []
+image_name: starter
+# external_providers_dir: /opt/app-root/src/.llama/providers.d
+
+providers:
+  inference:
+  - provider_id: openai # This ID is a reference to 'providers.inference'
+    provider_type: remote::openai
+    config:
+      api_key: ${env.OPENAI_API_KEY}
+      allowed_models: ["${env.E2E_OPENAI_MODEL:=gpt-4o-mini}"]
+  - config: {}
+    provider_id: sentence-transformers
+    provider_type: inline::sentence-transformers
+  files:
+  - config:
+      metadata_store:
+        table_name: files_metadata
+        backend: sql_default
+      storage_dir: ~/.llama/storage/files
+    provider_id: meta-reference-files
+    provider_type: inline::localfs
+  safety:
+  - config:
+      excluded_categories: []
+    provider_id: llama-guard
+    provider_type: inline::llama-guard
+  scoring:
+  - provider_id: basic
+    provider_type: inline::basic
+    config: {}
+  - provider_id: llm-as-judge
+    provider_type: inline::llm-as-judge
+    config: {}
+  - provider_id: braintrust
+    provider_type: inline::braintrust
+    config:
+      openai_api_key: '********'
+  tool_runtime:
+  - config: {} # Enable the RAG tool
+    provider_id: rag-runtime
+    provider_type: inline::rag-runtime
+  vector_io:
+  - config: # Define the storage backend for RAG
+      persistence:
+        namespace: vector_io::faiss
+        backend: kv_default
+    provider_id: faiss
+    provider_type: inline::faiss
+  agents:
+  - config:
+      persistence:
+        agent_state:
+          namespace: agents_state
+          backend: kv_default
+        responses:
+          table_name: agents_responses
+          backend: sql_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+  batches:
+  - config:
+      kvstore:
+        namespace: batches_store
+        backend: kv_default
+    provider_id: reference
+    provider_type: inline::reference
+  datasetio:
+  - config:
+      kvstore:
+        namespace: huggingface_datasetio
+        backend: kv_default
+    provider_id: huggingface
+    provider_type: remote::huggingface
+  - config:
+      kvstore:
+        namespace: localfs_datasetio
+        backend: kv_default
+    provider_id: localfs
+    provider_type: inline::localfs
+  eval:
+  - config:
+      kvstore:
+        namespace: eval_store
+        backend: kv_default
+    provider_id: meta-reference
+    provider_type: inline::meta-reference
+scoring_fns: []
+server:
+  port: 8321
+storage:
+  backends:
+    kv_default: # Define the storage backend type for RAG, in this case registry and RAG are unified i.e. information on registered resources (e.g. models, vector_stores) are saved together with the RAG chunks
+      type: kv_sqlite
+      db_path: ${env.KV_STORE_PATH:=~/.llama/storage/rag/kv_store.db}
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQL_STORE_PATH:=~/.llama/storage/sql_store.db}
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
+registered_resources:
+  models: []
+  shields:
+  - shield_id: llama-guard
+    provider_id: llama-guard
+    provider_shield_id: openai/gpt-4o-mini
+  vector_stores: []
+  datasets: []
+  scoring_fns: []
+  benchmarks: []
+  tool_groups:
+  - toolgroup_id: builtin::rag # Register the RAG tool
+    provider_id: rag-runtime
+vector_stores:
+  default_provider_id: faiss
+  default_embedding_model: # Define the default embedding model for RAG
+    provider_id: sentence-transformers
+    model_id: nomic-ai/nomic-embed-text-v1.5
+safety:
+  default_shield_id: llama-guard