Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions aiopslab/config.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@ qualitative_eval: false

# Flag to enable/disable printing the session
print_session: false

# Offline mode configuration (for environments with restricted network access)
# When enabled, images are loaded from local tar files instead of pulling from registries
offline_mode: false
images_dir: ./images # Directory containing pre-downloaded image tar files
20 changes: 20 additions & 0 deletions aiopslab/orchestrator/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from aiopslab.utils.status import *
from aiopslab.utils.critical_section import CriticalSection
from aiopslab.service.telemetry.prometheus import Prometheus
from aiopslab.paths import BASE_DIR
import time
import inspect
import asyncio
Expand All @@ -30,6 +31,25 @@ def __init__(self, results_dir=None):
self.kubectl = KubeCtl()
self.use_wandb = os.getenv("USE_WANDB", "false").lower() == "true"
self.results_dir = results_dir

# Configure offline mode if enabled in config
self._configure_offline_mode()

def _configure_offline_mode(self):
"""Configure offline image loading if enabled in config."""
try:
from aiopslab.config import Config
config_path = BASE_DIR / "config.yml"
if config_path.exists():
config = Config(config_path)
offline_mode = config.get("offline_mode", False)
images_dir = config.get("images_dir", "./images")

if offline_mode:
Helm.configure_offline_mode(enabled=True, images_dir=images_dir)
except Exception as e:
# Config file might not exist or other error, that's OK
pass

def init_problem(self, problem_id: str):
"""Initialize a problem instance for the agent to solve.
Expand Down
4 changes: 4 additions & 0 deletions aiopslab/plugins/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""AIOpsLab Plugins Package"""
102 changes: 102 additions & 0 deletions aiopslab/plugins/offline_images/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Offline Images Plugin

This plugin enables AIOpsLab to work in environments with restricted or no internet access by loading pre-downloaded Docker images from local tar files into Kind clusters.

## Why Use This?

- **Network Restrictions**: Some environments don't have access to Docker Hub, GHCR, or other registries
- **Slow Networks**: Pulling large images repeatedly can be time-consuming
- **Reproducibility**: Pre-downloaded images ensure consistent versions across deployments

## Quick Start

### 1. Download Images (with internet access)

```bash
# Run this on a machine with internet access
./scripts/download_images.sh ./images
```

This will download all required images and save them as tar files.

### 2. Transfer Images (if needed)

Copy the `./images` directory to your target machine.

### 3. Configure AIOpsLab

Edit `aiopslab/config.yml`:

```yaml
# Enable offline mode
offline_mode: true
images_dir: ./images
```

### 4. Run as Usual

```bash
python cli.py
# or
python service.py
```

Images will be automatically loaded from local tars before deploying applications.

## Manual Usage

You can also use the ImageLoader programmatically:

```python
from aiopslab.plugins.offline_images import ImageLoader

# Initialize loader
loader = ImageLoader(images_dir="./images", cluster_name="kind")

# Load all images
count = loader.load_all_from_directory()
print(f"Loaded {count} images")

# Or load a specific image
loader.load_image_from_tar(Path("./images/nginx_latest.tar"))
```

## Tar File Naming Convention

Image tar files should be named in the format:
```
{registry}_{image_path}_{tag}.tar
```

Examples:
- `ghcr.io_open-telemetry_demo_1.11.1.tar`
- `docker.io_library_nginx_latest.tar`
- `quay.io_prometheus_prometheus_v2.47.2.tar`

The `download_images.sh` script handles this naming automatically.

## Supported Registries

- `ghcr.io` (GitHub Container Registry)
- `docker.io` (Docker Hub)
- `quay.io`
- `registry.k8s.io`
- `gcr.io` (Google Container Registry)

## Troubleshooting

### Images not loading

1. Check that the tar files exist in the images directory
2. Ensure Docker daemon is running
3. Verify Kind cluster is running: `kind get clusters`

### Image name mismatch

If an image fails to load, check that the tar filename follows the naming convention.
You can manually check the image name in a tar:

```bash
docker load -i image.tar
# Output: Loaded image: registry/image:tag
```
20 changes: 20 additions & 0 deletions aiopslab/plugins/offline_images/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""
Offline Images Plugin for AIOpsLab

This plugin enables offline/local image loading for Kind clusters,
which is useful in environments with restricted network access.

Usage:
1. Download images: ./scripts/download_images.sh ./images
2. Enable in config.yml:
offline_mode: true
images_dir: ./images
3. Run AIOpsLab as usual - images will be loaded from local tars
"""

from .image_loader import ImageLoader, ensure_images_loaded

__all__ = ["ImageLoader", "ensure_images_loaded"]
213 changes: 213 additions & 0 deletions aiopslab/plugins/offline_images/image_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""
Image Loader for Offline Deployment

Loads pre-downloaded Docker images from local tar files into Kind clusters.
This enables AIOpsLab to work in environments without internet access.
"""

import subprocess
from pathlib import Path
from typing import List, Optional, Set
import logging

logger = logging.getLogger(__name__)


class ImageLoader:
"""
Loads Docker images from local tar files into Kind clusters.

Tar files should be named in the format: registry_image_tag.tar
Examples:
- ghcr.io_open-telemetry_demo_1.0.0.tar
- docker.io_library_nginx_latest.tar
"""

def __init__(self, images_dir: str, cluster_name: str = "kind"):
"""
Initialize ImageLoader.

Args:
images_dir: Directory containing pre-downloaded image tar files
cluster_name: Name of the Kind cluster (default: "kind")
"""
self.images_dir = Path(images_dir)
self.cluster_name = cluster_name
self.loaded_images: Set[str] = set()

def _tar_name_to_image(self, tar_path: Path) -> str:
"""
Convert tar filename back to Docker image name.

Args:
tar_path: Path to the tar file

Returns:
Docker image name (e.g., "ghcr.io/open-telemetry/demo:1.0.0")
"""
name = tar_path.stem # Remove .tar extension

# Known registry prefixes
registry_prefixes = [
'ghcr.io_',
'quay.io_',
'registry.k8s.io_',
'docker.io_',
'gcr.io_',
]

registry = ''
for prefix in registry_prefixes:
if name.startswith(prefix):
# Convert prefix back to registry URL
registry = prefix.replace('_', '/', 1).rstrip('_') + '/'
name = name[len(prefix):]
break

# Find the last underscore as tag separator
parts = name.rsplit('_', 1)
if len(parts) == 2:
image_path = parts[0].replace('_', '/')
tag = parts[1]
return f"{registry}{image_path}:{tag}"
else:
return name.replace('_', '/')

def load_image_from_tar(self, tar_path: Path) -> bool:
"""
Load a single image from tar file into Kind cluster.

Args:
tar_path: Path to the tar file

Returns:
True if successful, False otherwise
"""
if not tar_path.exists():
logger.warning(f"Tar file not found: {tar_path}")
return False

# Step 1: Load into local Docker
load_cmd = ["docker", "load", "-i", str(tar_path)]
result = subprocess.run(load_cmd, capture_output=True, text=True)

if result.returncode != 0:
logger.warning(f"Failed to load {tar_path.name}: {result.stderr[:100]}")
return False

# Extract image name from output or infer from filename
image_name = None
for line in result.stdout.split('\n'):
if 'Loaded image:' in line:
image_name = line.split('Loaded image:')[-1].strip()
break

if not image_name:
# Infer from filename
image_name = self._tar_name_to_image(tar_path)

# Step 2: Load into Kind cluster
kind_cmd = ["kind", "load", "docker-image", image_name, "--name", self.cluster_name]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this only work for kind?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated! Now supports all three AIOpsLab deployment modes:

  • Kind: docker load + kind load docker-image
  • Localhost: docker load directly
  • Remote: SSH + docker load on remote node

Reads k8s_host, k8s_user, ssh_key_path from config.yml automatically.

kind_result = subprocess.run(kind_cmd, capture_output=True, text=True)

if kind_result.returncode != 0:
if "already present" not in kind_result.stderr.lower():
logger.warning(f"Failed to load {image_name} to Kind: {kind_result.stderr[:100]}")
return False

self.loaded_images.add(image_name)
logger.info(f"Loaded image: {image_name}")
return True

def load_all_from_directory(self) -> int:
"""
Load all tar files from the images directory into Kind cluster.

Returns:
Number of successfully loaded images
"""
if not self.images_dir.exists():
logger.warning(f"Images directory not found: {self.images_dir}")
return 0

tar_files = list(self.images_dir.glob("*.tar"))
if not tar_files:
logger.warning(f"No tar files found in {self.images_dir}")
return 0

logger.info(f"Loading {len(tar_files)} images from {self.images_dir}...")

success_count = 0
for i, tar_file in enumerate(tar_files, 1):
logger.info(f"[{i}/{len(tar_files)}] Loading {tar_file.name}...")
if self.load_image_from_tar(tar_file):
success_count += 1

logger.info(f"Loaded {success_count}/{len(tar_files)} images successfully")
return success_count

def is_image_loaded(self, image_name: str) -> bool:
"""
Check if an image has been loaded.

Args:
image_name: Docker image name

Returns:
True if loaded, False otherwise
"""
return image_name in self.loaded_images


# Global instance (lazily initialized)
_loader: Optional[ImageLoader] = None


def get_loader() -> Optional[ImageLoader]:
"""Get the global ImageLoader instance."""
return _loader


def init_loader(images_dir: str, cluster_name: str = "kind") -> ImageLoader:
"""
Initialize the global ImageLoader.

Args:
images_dir: Directory containing image tar files
cluster_name: Name of the Kind cluster

Returns:
The initialized ImageLoader instance
"""
global _loader
_loader = ImageLoader(images_dir, cluster_name)
return _loader


def ensure_images_loaded(images_dir: Optional[str] = None, cluster_name: str = "kind") -> bool:
"""
Ensure all images from the directory are loaded into the Kind cluster.

This is the main entry point for the offline images plugin.
Call this before deploying applications.

Args:
images_dir: Directory containing image tar files (optional if already initialized)
cluster_name: Name of the Kind cluster

Returns:
True if images were loaded successfully, False otherwise
"""
global _loader

if _loader is None:
if images_dir is None:
logger.warning("ImageLoader not initialized and no images_dir provided")
return False
_loader = ImageLoader(images_dir, cluster_name)

count = _loader.load_all_from_directory()
return count > 0
Loading