diff --git a/AUTOMATION_README.md b/AUTOMATION_README.md new file mode 100644 index 00000000000..39d8dcfa6e3 --- /dev/null +++ b/AUTOMATION_README.md @@ -0,0 +1,145 @@ +# Stable Diffusion WebUI - Automation Guide + +## Overview + +This directory contains an automated Python script (`automate_sd.py`) that provides programmatic access to the Stable Diffusion WebUI API for generating images without needing the graphical interface. + +## Prerequisites + +1. **WebUI must be running** with the API enabled: + ```bash + cd stable-diffusion-webui + python3.10 launch.py --xformers --api + ``` + +2. **Required Python packages**: + ```bash + pip install requests + ``` + +## Quick Start + +### Starting the WebUI + +```bash +cd /home/tbaltzakis/new-portfolio/figma-cloud-portfolio/stable-diffusion-webui +python3.10 launch.py --xformers --api +``` + +The WebUI will be available at: **http://127.0.0.1:7860** + +### Basic Image Generation + +```bash +python3.10 automate_sd.py --prompt "a beautiful landscape" --output landscape.png +``` + +### Advanced Options + +```bash +# Higher quality (more steps) +python3.10 automate_sd.py --prompt "portrait" --steps 50 --output portrait.png + +# Larger images +python3.10 automate_sd.py --prompt "cinematic scene" --width 768 --height 768 --output scene.png + +# Using a specific seed for reproducibility +python3.10 automate_sd.py --prompt "cat" --seed 42 --output cat.png + +# Image-to-image (transform an existing image) +python3.10 automate_sd.py --prompt "oil painting style" --img2img input.png --denoise 0.6 --output output.png +``` + +## Command-Line Options + +| Option | Description | Default | +|--------|-------------|---------| +| `--prompt` | Text description of the image to generate | (required) | +| `--negative` | Things to avoid in the image | "" | +| `--output` | Output filename | "output.png" | +| `--steps` | Number of denoising steps (higher = better quality) | 20 | +| `--cfg` | CFG scale (7 is good default) | 7.0 | +| `--width` | Image width (must be multiple of 8) | 512 | +| `--height` | Image height (must be multiple of 8) | 512 | +| `--seed` | Random seed (-1 for random) | -1 | +| `--sampler` | Sampling method | "Euler a" | +| `--url` | WebUI URL | http://127.0.0.1:7860 | +| `--img2img` | Input image for image-to-image generation | (none) | +| `--denoise` | Denoising strength for img2img (0-1) | 0.75 | + +## Python API Usage + +You can also use the automation programmatically in your own Python scripts: + +```python +from automate_sd import StableDiffusionAPI + +# Connect to WebUI +api = StableDiffusionAPI("http://127.0.0.1:7860") + +# Wait for API to be ready +api.wait_for_api() + +# Generate an image +images = api.txt2img( + prompt="a sunset over mountains", + steps=20, + cfg_scale=7.0, + width=512, + height=512 +) + +# Save the image +api.save_image(images[0], "sunset.png") + +# Image-to-image +images = api.img2img( + prompt="make it more abstract", + image_path="input.png", + denoising_strength=0.7 +) +``` + +## Available Samplers + +Common samplers (specified with `--sampler`): +- `Euler a` - Fast, good quality (default) +- `Euler` - Classic Euler method +- `DPM++ 2M` - High quality, recommended +- `DPM++ 2M Karras` - DPM++ with Karras noise schedule +- `DDIM` - Good for image-to-image +- `PLMS` - Legacy sampler + +## Troubleshooting + +### WebUI not starting + +If you see `ModuleNotFoundError: No module named 'taming'`, the fixes have already been applied. Make sure you're in the correct directory and running the command properly: + +```bash +cd /home/tbaltzakis/new-portfolio/figma-cloud-portfolio/stable-diffusion-webui +python3.10 launch.py --xformers --api +``` + +### API connection error + +Ensure the WebUI is running and accessible at the correct URL. The default is `http://127.0.0.1:7860`. + +### xformers error + +If you encounter xformers compatibility issues, try running without xformers: + +```bash +python3.10 launch.py --api +``` + +## Files Modified + +The following files were modified/created to fix startup issues and enable automation: + +- `modules/paths.py` - Added taming module mocks +- `modules/sd_hijack_unet.py` - Fixed xformers compatibility +- `repositories/stable-diffusion-stability-ai/ldm/data/util.py` - Added AddMiDaS stub +- `repositories/stable-diffusion-stability-ai/ldm/models/ddpm.py` - Added LatentDepth2ImageDiffusion stub +- `automate_sd.py` - Automation script (NEW) +- `AUTOMATION_README.md` - This documentation diff --git a/DEV_GUIDE.md b/DEV_GUIDE.md new file mode 100644 index 00000000000..ad1335e5f84 --- /dev/null +++ b/DEV_GUIDE.md @@ -0,0 +1,343 @@ +# Stable Diffusion WebUI - Development Workflow Guide + +## Overview + +This guide explains how to integrate Stable Diffusion image generation into your development workflow for your portfolio projects. + +## Quick Reference + +### Start WebUI (One-time) +```bash +cd stable-diffusion-webui +python3.10 launch.py --xformers --api & +# Wait ~30 seconds for startup +``` + +### CLI Usage +```bash +# Basic generation +python3.10 automate_sd.py --prompt "your prompt" --output image.png + +# With custom settings +python3.10 automate_sd.py --prompt "landscape" --steps 30 --width 1024 --height 576 --output hero.png + +# Image-to-image +python3.10 automate_sd.py --prompt "oil painting" --img2img input.png --denoise 0.6 --output output.png +``` + +### Python API Usage +```python +from automate_sd import StableDiffusionAPI + +api = StableDiffusionAPI() +api.wait_for_api() + +# Generate image +images = api.txt2img(prompt="your prompt", steps=25, width=512, height=512) +api.save_image(images[0], "output.png") +``` + +--- + +## Development Workflow Integration + +### 1. Workflow Overview + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ Development Workflow │ +├─────────────────────────────────────────────────────────────────┤ +│ │ +│ 1. Start WebUI → python3.10 launch.py --xformers --api │ +│ (runs in background) │ +│ │ +│ 2. Generate Assets → Use CLI or Python API │ +│ │ +│ 3. Use in Project → Copy images to your portfolio │ +│ │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### 2. Starting the WebUI + +**Option A: Manual Start** +```bash +cd /home/tbaltzakis/new-portfolio/figma-cloud-portfolio/stable-diffusion-webui +python3.10 launch.py --xformers --api +``` + +**Option B: Background (recommended for dev)** +```bash +cd stable-diffusion-webui +nohup python3.10 launch.py --xformers --api > /tmp/sd.log 2>&1 & +echo "PID: $!" # Save this PID to stop later +``` + +**Option C: With auto-start script** +```bash +# Create a start script +echo '#!/bin/bash +cd stable-diffusion-webui +nohup python3.10 launch.py --xformers --api > sd.log 2>&1 & +echo "WebUI starting at http://127.0.0.1:7860"' > start_sd.sh +chmod +x start_sd.sh +./start_sd.sh +``` + +**Stop the WebUI:** +```bash +pkill -f "launch.py" +# Or use the PID you saved +kill +``` + +### 3. Generating Images for Your Portfolio + +#### A. Using the Portfolio Examples Script + +```bash +# Run predefined portfolio generation +python3.10 examples/portfolio_examples.py + +# Output locations: +# - outputs/portfolio/ (hero images) +# - outputs/thumbnails/ (project thumbnails) +# - outputs/placeholders/ (dev placeholders) +# - outputs/batch/ (custom batch) +``` + +#### B. Using the CLI Directly + +**Hero Images (1024x576 - 16:9):** +```bash +python3.10 automate_sd.py \ + --prompt "modern tech dashboard" \ + --steps 30 \ + --width 1024 \ + --height 576 \ + --output outputs/hero1.png +``` + +**Thumbnails (512x512):** +```bash +python3.10 automate_sd.py \ + --prompt "app icon mobile design" \ + --steps 25 \ + --width 512 \ + --height 512 \ + --output outputs/thumb1.png +``` + +**Social Media (1080x1080):** +```bash +python3.10 automate_sd.py \ + --prompt "social media post design" \ + --steps 35 \ + --width 1080 \ + --height 1080 \ + --output outputs/social1.png +``` + +#### C. Using Python API in Your Projects + +Create a script `generate_assets.py` in your project: + +```python +#!/usr/bin/env python3 +"""Generate portfolio assets for your project""" +import sys +import os + +# Add Stable Diffusion WebUI to path +sys.path.insert(0, '/home/tbaltzakis/new-portfolio/figma-cloud-portfolio/stable-diffusion-webui') + +from automate_sd import StableDiffusionAPI + +def main(): + api = StableDiffusionAPI() + if not api.wait_for_api(): + print("Error: WebUI not running") + sys.exit(1) + + # Define your prompts + assets = [ + {"prompt": "hero image for tech portfolio", "file": "hero.png", "size": (1024, 576)}, + {"prompt": "project thumbnail app design", "file": "thumb1.png", "size": (512, 512)}, + {"prompt": "project thumbnail web design", "file": "thumb2.png", "size": (512, 512)}, + ] + + for asset in assets: + print(f"Generating: {asset['file']}") + images = api.txt2img( + prompt=asset["prompt"], + steps=25, + width=asset["size"][0], + height=asset["size"][1] + ) + api.save_image(images[0], asset["file"]) + print(f"Saved: {asset['file']}") + +if __name__ == "__main__": + main() +``` + +Run it: +```bash +python3.10 generate_assets.py +``` + +--- + +## Common Tasks + +### Generate Multiple Variations +```bash +for i in 1 2 3 4 5; do + python3.10 automate_sd.py \ + --prompt "modern minimalist logo" \ + --seed $i \ + --output "logo_v$i.png" +done +``` + +### Use Specific Sampler +```bash +python3.10 automate_sd.py \ + --prompt "landscape" \ + --sampler "DPM++ 2M" \ + --output image.png +``` + +### Image-to-Image (Style Transfer) +```bash +python3.10 automate_sd.py \ + --prompt "oil painting style" \ + --img2img existing_image.png \ + --denoise 0.5 \ + --output styled.png +``` + +--- + +## Best Practices + +### 1. Image Quality Settings + +| Use Case | Steps | CFG | Size | +|----------|-------|-----|------| +| Quick Preview | 15 | 7.0 | 512x512 | +| Standard | 20-25 | 7.0 | 512x512 | +| High Quality | 30+ | 7.5-8.0 | 768+ | +| Hero Images | 30+ | 8.0 | 1024x576 | + +### 2. Prompt Tips + +- **Be specific:** "modern minimalist website hero" not "website" +- **Add style:** "photorealistic", "vector art", "3D render" +- **Use negative prompts:** `--negative "blurry, low quality"` + +### 3. Automation Script Template + +```python +#!/usr/bin/env python3 +"""Your custom image generation script""" +import sys +sys.path.insert(0, '/home/tbaltzakis/new-portfolio/figma-cloud-portfolio/stable-diffusion-webui') + +from automate_sd import StableDiffusionAPI +import os + +# Your prompts +PROMTS = [ + ("project 1 hero", "assets/p1_hero.png", 1024, 576), + ("project 1 thumb", "assets/p1_thumb.png", 512, 512), + ("project 2 hero", "assets/p2_hero.png", 1024, 576), +] + +def main(): + api = StableDiffusionAPI() + api.wait_for_api() + + for prompt, path, w, h in PROMPTS: + os.makedirs(os.path.dirname(path), exist_ok=True) + images = api.txt2img(prompt=prompt, steps=25, width=w, height=h) + api.save_image(images[0], path) + print(f"✓ {path}") + +if __name__ == "__main__": + main() +``` + +--- + +## Troubleshooting + +### WebUI won't start +```bash +# Check logs +tail -50 /tmp/sdwebui.log + +# Restart +pkill -f "launch.py" +cd stable-diffusion-webui +python3.10 launch.py --api # without xformers +``` + +### API not responding +```bash +# Verify WebUI is running +curl http://127.0.0.1:7860/sdapi/v1/samplers + +# Check if port is in use +lsof -i :7860 +``` + +### Out of memory +```bash +# Use smaller images +--width 512 --height 512 + +# Or reduce batch size in automate_sd.py +``` + +--- + +## File Structure + +``` +stable-diffusion-webui/ +├── automate_sd.py # Main automation script +├── examples/ +│ └── portfolio_examples.py # Portfolio generation examples +├── outputs/ # Generated images +│ ├── portfolio/ +│ ├── thumbnails/ +│ └── placeholders/ +├── modules/ +│ ├── paths.py # Module mocks (fixed) +│ └── sd_hijack_unet.py # xformers fix +└── launch.py # WebUI launcher +``` + +--- + +## Quick Commands Reference + +| Task | Command | +|------|---------| +| Start WebUI | `python3.10 launch.py --xformers --api` | +| Generate image | `python3.10 automate_sd.py --prompt "..." --output x.png` | +| List samplers | `curl -s http://127.0.0.1:7860/sdapi/v1/samplers` | +| Stop WebUI | `pkill -f "launch.py"` | +| Check status | `curl -s http://127.0.0.1:7860/sdapi/v1/options` | + +--- + +## Integration with Your Portfolio + +1. Generate images using the tools above +2. Copy from `outputs/` to your portfolio's `public/` or `assets/` folder +3. Reference in your Next.js/React components: + ```jsx + + ``` diff --git a/README.md b/README.md index bc62945c0c5..dc4b2e0eca1 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,7 @@ A web interface for Stable Diffusion, implemented using Gradio library. - API - Support for dedicated [inpainting model](https://github.com/runwayml/stable-diffusion#inpainting-with-stable-diffusion) by RunwayML - via extension: [Aesthetic Gradients](https://github.com/AUTOMATIC1111/stable-diffusion-webui-aesthetic-gradients), a way to generate images with a specific aesthetic by using clip images embeds (implementation of [https://github.com/vicgalle/stable-diffusion-aesthetic-gradients](https://github.com/vicgalle/stable-diffusion-aesthetic-gradients)) -- [Stable Diffusion 2.0](https://github.com/Stability-AI/stablediffusion) support - see [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#stable-diffusion-20) for instructions +- [Stable Diffusion 2.0](https://github.com/CompVis/stable-diffusion) support - see [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#stable-diffusion-20) for instructions - [Alt-Diffusion](https://arxiv.org/abs/2211.06679) support - see [wiki](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features#alt-diffusion) for instructions - Now without any bad letters! - Load checkpoints in safetensors format @@ -172,7 +172,7 @@ For the purposes of getting Google and other search engines to crawl the wiki, h ## Credits Licenses for borrowed code can be found in `Settings -> Licenses` screen, and also in `html/licenses.html` file. -- Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers, https://github.com/mcmonkey4eva/sd3-ref +- Stable Diffusion - https://github.com/CompVis/stable-diffusion, https://github.com/CompVis/taming-transformers, https://github.com/mcmonkey4eva/sd3-ref - k-diffusion - https://github.com/crowsonkb/k-diffusion.git - Spandrel - https://github.com/chaiNNer-org/spandrel implementing - GFPGAN - https://github.com/TencentARC/GFPGAN.git diff --git a/automate_sd.js b/automate_sd.js new file mode 100644 index 00000000000..fb8ba3ad8b9 --- /dev/null +++ b/automate_sd.js @@ -0,0 +1,362 @@ +/** + * Stable Diffusion WebUI Automation - JavaScript/Node.js Version + * + * This script provides programmatic access to the Stable Diffusion WebUI API + * for automated image generation in Node.js/JavaScript environments. + * + * Usage: + * node automate_sd.js --prompt "your prompt" --output image.png + * node automate_sd.js --prompt "landscape" --steps 30 --width 1024 --height 576 + * + * Or use as a module: + * const { StableDiffusionAPI } = require('./automate_sd.js'); + */ + +const http = require('http'); +const fs = require('fs'); +const path = require('path'); + +// Configuration +const DEFAULT_URL = process.env.SD_WEBUI_URL || 'http://127.0.0.1:7860'; +const API_BASE = '/sdapi/v1'; + +/** + * Make HTTP request to WebUI API + */ +function apiRequest(endpoint, method = 'GET', data = null) { + return new Promise((resolve, reject) => { + const url = new URL(endpoint, DEFAULT_URL); + const options = { + hostname: url.hostname, + port: url.port || 7860, + path: url.pathname, + method: method, + headers: { + 'Content-Type': 'application/json' + } + }; + + const req = http.request(options, (res) => { + let body = ''; + res.on('data', chunk => body += chunk); + res.on('end', () => { + try { + resolve(JSON.parse(body)); + } catch (e) { + resolve(body); + } + }); + }); + + req.on('error', reject); + + if (data) { + req.write(JSON.stringify(data)); + } + req.end(); + }); +} + +/** + * Stable Diffusion WebUI API Wrapper + */ +class StableDiffusionAPI { + constructor(baseUrl = DEFAULT_URL) { + this.baseUrl = baseUrl; + } + + /** + * Wait for WebUI API to be ready + */ + async waitForApi(timeout = 60) { + console.log(`Waiting for WebUI at ${this.baseUrl}...`); + const start = Date.now(); + + while (Date.now() - start < timeout * 1000) { + try { + await this.getOptions(); + console.log('WebUI API is ready!'); + return true; + } catch (e) { + await new Promise(r => setTimeout(r, 2000)); + } + } + console.log('Timeout waiting for WebUI'); + return false; + } + + /** + * Get available options + */ + async getOptions() { + return apiRequest(`${API_BASE}/options`, 'GET'); + } + + /** + * Get available models + */ + async getModels() { + return apiRequest(`${API_BASE}/sd-models`, 'GET'); + } + + /** + * Get available samplers + */ + async getSamplers() { + return apiRequest(`${API_BASE}/samplers`, 'GET'); + } + + /** + * Generate images from text prompt (txt2img) + */ + async txt2img(options = {}) { + const { + prompt = '', + negative_prompt = '', + steps = 20, + cfg_scale = 7.0, + width = 512, + height = 512, + seed = -1, + batch_size = 1, + sampler_name = 'Euler a' + } = options; + + console.log(`Generating: '${prompt}' (steps=${steps}, cfg=${cfg_scale})`); + + const payload = { + prompt, + negative_prompt, + steps, + cfg_scale, + width, + height, + seed, + batch_size, + sampler_name + }; + + const result = await apiRequest(`${API_BASE}/txt2img`, 'POST', payload); + return result.images || []; + } + + /** + * Generate images from image + text prompt (img2img) + */ + async img2img(options = {}) { + const { + prompt = '', + negative_prompt = '', + init_images = [], + steps = 20, + cfg_scale = 7.0, + denoising_strength = 0.75, + seed = -1, + sampler_name = 'Euler a' + } = options; + + console.log(`Img2Img: '${prompt}'`); + + const payload = { + prompt, + negative_prompt, + init_images, + steps, + cfg_scale, + denoising_strength, + seed, + sampler_name + }; + + const result = await apiRequest(`${API_BASE}/img2img`, 'POST', payload); + return result.images || []; + } + + /** + * Read and encode image to base64 + */ + readImageAsBase64(imagePath) { + const buffer = fs.readFileSync(imagePath); + return buffer.toString('base64'); + } + + /** + * Save base64 image to file + */ + saveImage(base64Data, outputPath) { + const buffer = Buffer.from(base64Data, 'base64'); + const dir = path.dirname(outputPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + fs.writeFileSync(outputPath, buffer); + return outputPath; + } + + /** + * Generate and save image in one call + */ + async generate(options = {}) { + const { output = 'output.png', ...txt2imgOptions } = options; + const images = await this.txt2img(txt2imgOptions); + if (images.length > 0) { + this.saveImage(images[0], output); + console.log(`Saved: ${output}`); + return output; + } + throw new Error('No images generated'); + } +} + +// CLI Interface +async function main() { + const args = process.argv.slice(2); + const options = { + prompt: '', + negative: '', + output: 'output.png', + steps: 20, + cfg: 7.0, + width: 512, + height: 512, + seed: -1, + sampler: 'Euler a', + url: DEFAULT_URL, + img2img: null, + denoise: 0.75 + }; + + // Parse arguments + for (let i = 0; i < args.length; i++) { + const arg = args[i]; + switch (arg) { + case '--prompt': + case '-p': + options.prompt = args[++i]; + break; + case '--negative': + case '-n': + options.negative = args[++i]; + break; + case '--output': + case '-o': + options.output = args[++i]; + break; + case '--steps': + case '-s': + options.steps = parseInt(args[++i]); + break; + case '--cfg': + options.cfg = parseFloat(args[++i]); + break; + case '--width': + options.width = parseInt(args[++i]); + break; + case '--height': + options.height = parseInt(args[++i]); + break; + case '--seed': + options.seed = parseInt(args[++i]); + break; + case '--sampler': + options.sampler = args[++i]; + break; + case '--url': + options.url = args[++i]; + break; + case '--img2img': + case '-i': + options.img2img = args[++i]; + break; + case '--denoise': + options.denoise = parseFloat(args[++i]); + break; + case '--help': + case '-h': + console.log(` +Stable Diffusion WebUI Automation - JavaScript Version + +Usage: node automate_sd.js [options] + +Options: + -p, --prompt Text prompt (required) + -n, --negative Negative prompt + -o, --output (default: output Output filename.png) + -s, --steps Number of steps (default: 20) + --cfg CFG scale (default: 7.0) + --width Image width (default: 512) + --height Image height (default: 512) + --seed Seed (-1 for random) + --sampler Sampler (default: Euler a) + --url WebUI URL (default: http://127.0.0.1:7860) + -i, --img2img Input image for img2img + --denoise Denoising strength (default: 0.75) + -h, --help Show this help + +Examples: + node automate_sd.js -p "a cat" -o cat.png + node automate_sd.js --prompt "landscape" --steps 50 --width 1024 --height 576 + node automate_sd.js -i input.png --prompt "oil painting" --denoise 0.6 + `); + process.exit(0); + } + } + + if (!options.prompt) { + console.error('Error: --prompt is required'); + console.log('Use --help for usage information'); + process.exit(1); + } + + const api = new StableDiffusionAPI(options.url); + + if (!await api.waitForApi()) { + console.error('Error: Could not connect to WebUI'); + process.exit(1); + } + + try { + if (options.img2img) { + // Image-to-image mode + const imageB64 = api.readImageAsBase64(options.img2img); + const images = await api.img2img({ + prompt: options.prompt, + negative_prompt: options.negative, + init_images: [imageB64], + steps: options.steps, + cfg_scale: options.cfg, + denoising_strength: options.denoise, + seed: options.seed, + sampler_name: options.sampler + }); + if (images.length > 0) { + api.saveImage(images[0], options.output); + console.log(`Saved: ${options.output}`); + } + } else { + // Text-to-image mode + await api.generate({ + prompt: options.prompt, + negative_prompt: options.negative, + output: options.output, + steps: options.steps, + cfg_scale: options.cfg, + width: options.width, + height: options.height, + seed: options.seed, + sampler_name: options.sampler + }); + } + } catch (error) { + console.error('Error:', error.message); + process.exit(1); + } +} + +// Export for use as module +module.exports = { StableDiffusionAPI, apiRequest }; + +// Run CLI if executed directly +if (require.main === module) { + main(); +} diff --git a/automate_sd.py b/automate_sd.py new file mode 100644 index 00000000000..f954a1ef208 --- /dev/null +++ b/automate_sd.py @@ -0,0 +1,237 @@ +#!/usr/bin/env python3 +""" +Stable Diffusion WebUI Automation Script + +This script provides programmatic access to the Stable Diffusion WebUI API +for automated image generation without needing the GUI. + +Usage: + python automate_sd.py --prompt "a beautiful landscape" --output output.png + python automate_sd.py --prompt "portrait of a person" --steps 50 --cfg 7.5 +""" + +import argparse +import base64 +import json +import os +import sys +import time +from pathlib import Path +from typing import Optional + +import requests + + +class StableDiffusionAPI: + """Python wrapper for Stable Diffusion WebUI API""" + + def __init__(self, base_url: str = "http://127.0.0.1:7860"): + self.base_url = base_url + self.api_endpoint = f"{base_url}/sdapi/v1" + + def wait_for_api(self, timeout: int = 60) -> bool: + """Wait for the WebUI API to be ready""" + print(f"Waiting for WebUI at {self.base_url}...") + start_time = time.time() + while time.time() - start_time < timeout: + try: + response = requests.get(f"{self.api_endpoint}/options", timeout=5) + if response.status_code == 200: + print("WebUI API is ready!") + return True + except requests.exceptions.RequestException: + pass + time.sleep(2) + print("Timeout waiting for WebUI") + return False + + def txt2img( + self, + prompt: str, + negative_prompt: str = "", + steps: int = 20, + cfg_scale: float = 7.0, + width: int = 512, + height: int = 512, + seed: int = -1, + batch_size: int = 1, + sampler_name: str = "Euler a" + ) -> list: + """ + Generate images from text prompt + + Args: + prompt: Positive prompt describing what to generate + negative_prompt: What to avoid in the image + steps: Number of denoising steps (higher = better quality, slower) + cfg_scale: Classifier free guidance scale (7 is good default) + width: Image width + height: Image height + seed: Random seed (-1 for random) + batch_size: Number of images to generate + sampler_name: Sampling method + + Returns: + List of base64 encoded images + """ + payload = { + "prompt": prompt, + "negative_prompt": negative_prompt, + "steps": steps, + "cfg_scale": cfg_scale, + "width": width, + "height": height, + "seed": seed, + "batch_size": batch_size, + "sampler_name": sampler_name, + } + + print(f"Generating: '{prompt}' (steps={steps}, cfg={cfg_scale})") + response = requests.post( + f"{self.api_endpoint}/txt2img", + json=payload, + timeout=300 # 5 minute timeout for generation + ) + + if response.status_code != 200: + raise Exception(f"API error: {response.status_code} - {response.text}") + + result = response.json() + return result.get("images", []) + + def img2img( + self, + prompt: str, + image_path: str, + negative_prompt: str = "", + steps: int = 20, + cfg_scale: float = 7.0, + denoising_strength: float = 0.75, + seed: int = -1, + ) -> list: + """Generate images from image + text prompt""" + # Read and encode image + with open(image_path, "rb") as f: + image_bytes = f.read() + image_b64 = base64.b64encode(image_bytes).decode("utf-8") + + payload = { + "prompt": prompt, + "negative_prompt": negative_prompt, + "init_images": [image_b64], + "steps": steps, + "cfg_scale": cfg_scale, + "denoising_strength": denoising_strength, + "seed": seed, + } + + print(f"Img2Img: '{prompt}' from {image_path}") + response = requests.post( + f"{self.api_endpoint}/img2img", + json=payload, + timeout=300 + ) + + if response.status_code != 200: + raise Exception(f"API error: {response.status_code}") + + result = response.json() + return result.get("images", []) + + def save_image(self, base64_data: str, output_path: str) -> str: + """Save base64 image to file""" + image_bytes = base64.b64decode(base64_data) + with open(output_path, "wb") as f: + f.write(image_bytes) + return output_path + + def get_models(self) -> list: + """Get available models""" + response = requests.get(f"{self.api_endpoint}/sd-models") + return response.json() + + def get_samplers(self) -> list: + """Get available samplers""" + response = requests.get(f"{self.api_endpoint}/samplers") + return response.json() + + +def generate_batch(prompts: list, output_dir: str = "outputs", **kwargs): + """Generate multiple images from a list of prompts""" + os.makedirs(output_dir, exist_ok=True) + api = StableDiffusionAPI() + + if not api.wait_for_api(): + print("Failed to connect to WebUI") + return + + for i, prompt in enumerate(prompts): + print(f"\n[{i+1}/{len(prompts)}] {prompt}") + try: + images = api.txt2img(prompt, **kwargs) + for j, img_data in enumerate(images): + output_path = os.path.join(output_dir, f"gen_{i}_{j}.png") + api.save_image(img_data, output_path) + print(f" Saved: {output_path}") + except Exception as e: + print(f" Error: {e}") + + +def main(): + parser = argparse.ArgumentParser(description="Stable Diffusion WebUI Automation") + parser.add_argument("--prompt", type=str, required=True, help="Text prompt") + parser.add_argument("--negative", type=str, default="", help="Negative prompt") + parser.add_argument("--output", type=str, default="output.png", help="Output file") + parser.add_argument("--steps", type=int, default=20, help="Number of steps") + parser.add_argument("--cfg", type=float, default=7.0, help="CFG scale") + parser.add_argument("--width", type=int, default=512, help="Image width") + parser.add_argument("--height", type=int, default=512, help="Image height") + parser.add_argument("--seed", type=int, default=-1, help="Seed (-1 for random)") + parser.add_argument("--sampler", type=str, default="Euler a", help="Sampler name") + parser.add_argument("--url", type=str, default="http://127.0.0.1:7860", help="WebUI URL") + parser.add_argument("--img2img", type=str, help="Input image for img2img") + parser.add_argument("--denoise", type=float, default=0.75, help="Denoising strength for img2img") + + args = parser.parse_args() + + api = StableDiffusionAPI(args.url) + + if not api.wait_for_api(): + print("Error: Could not connect to WebUI. Is it running?") + sys.exit(1) + + try: + if args.img2img: + images = api.img2img( + prompt=args.prompt, + image_path=args.img2img, + negative_prompt=args.negative, + steps=args.steps, + cfg_scale=args.cfg, + denoising_strength=args.denoise, + seed=args.seed + ) + else: + images = api.txt2img( + prompt=args.prompt, + negative_prompt=args.negative, + steps=args.steps, + cfg_scale=args.cfg, + width=args.width, + height=args.height, + seed=args.seed, + sampler_name=args.sampler + ) + + for i, img_data in enumerate(images): + output_path = args.output if i == 0 else args.output.replace(".png", f"_{i}.png") + api.save_image(img_data, output_path) + print(f"Saved: {output_path}") + + except Exception as e: + print(f"Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/examples/portfolio_examples.py b/examples/portfolio_examples.py new file mode 100644 index 00000000000..c6135e07167 --- /dev/null +++ b/examples/portfolio_examples.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python3 +""" +Portfolio Image Generation Examples + +This script demonstrates how to use the Stable Diffusion automation +to generate images for your portfolio projects. + +Usage: + python3.10 examples/portfolio_examples.py +""" + +import os +import sys + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from automate_sd import StableDiffusionAPI + + +def generate_portfolio_hero(api: StableDiffusionAPI, output_dir: str = "outputs/portfolio"): + """Generate hero images for portfolio projects""" + os.makedirs(output_dir, exist_ok=True) + + prompts = [ + ("futuristic tech dashboard", "hero_tech.png"), + ("minimalist workspace", "hero_minimal.png"), + ("creative design studio", "hero_creative.png"), + ] + + for prompt, filename in prompts: + print(f"\nGenerating: {filename}") + images = api.txt2img( + prompt=prompt, + steps=30, + width=1024, + height=576, # 16:9 aspect ratio + cfg_scale=8.0 + ) + api.save_image(images[0], os.path.join(output_dir, filename)) + print(f"Saved: {output_dir}/{filename}") + + +def generate_project_thumbnails(api: StableDiffusionAPI, output_dir: str = "outputs/thumbnails"): + """Generate project thumbnails""" + os.makedirs(output_dir, exist_ok=True) + + prompts = [ + "app icon mobile design", + "website landing page mockup", + "logo design modern minimalist", + "social media post template", + ] + + for i, prompt in enumerate(prompts): + print(f"\nGenerating thumbnail {i+1}: {prompt}") + images = api.txt2img( + prompt=prompt, + steps=25, + width=512, + height=512, + cfg_scale=7.5 + ) + api.save_image(images[0], os.path.join(output_dir, f"thumb_{i+1}.png")) + + +def generate_placeholder_images(api: StableDiffusionAPI, output_dir: str = "outputs/placeholders"): + """Generate placeholder images for development""" + os.makedirs(output_dir, exist_ok=True) + + placeholders = [ + ("abstract geometric shapes", "abstract_1.png"), + ("gradient background purple blue", "gradient_bg.png"), + ("wireframe UI design", "wireframe.png"), + ] + + for prompt, filename in placeholders: + print(f"\nGenerating placeholder: {filename}") + images = api.txt2img( + prompt=prompt, + steps=15, # Faster for placeholders + width=800, + height=600, + cfg_scale=6.0 + ) + api.save_image(images[0], os.path.join(output_dir, filename)) + + +def batch_generate(api: StableDiffusionAPI, prompts: list, output_dir: str = "outputs/batch"): + """Generate multiple images from a list of prompts""" + os.makedirs(output_dir, exist_ok=True) + + for i, prompt in enumerate(prompts): + print(f"\n[{i+1}/{len(prompts)}] {prompt}") + images = api.txt2img( + prompt=prompt, + steps=20, + width=512, + height=512 + ) + api.save_image(images[0], os.path.join(output_dir, f"gen_{i+1}.png")) + + +def main(): + print("=" * 60) + print("Portfolio Image Generator") + print("=" * 60) + + # Connect to WebUI + api = StableDiffusionAPI("http://127.0.0.1:7860") + + # Wait for WebUI to be ready + if not api.wait_for_api(timeout=30): + print("Error: Could not connect to WebUI") + print("Make sure WebUI is running: python3.10 launch.py --xformers --api") + sys.exit(1) + + print("\n✓ Connected to WebUI") + + # Example 1: Generate hero images + print("\n--- Generating Hero Images ---") + generate_portfolio_hero(api) + + # Example 2: Generate thumbnails + print("\n--- Generating Thumbnails ---") + generate_project_thumbnails(api) + + # Example 3: Custom batch + print("\n--- Custom Batch Generation ---") + custom_prompts = [ + "modern office interior", + "coffee shop aesthetic", + "mountain landscape at dawn", + "cyberpunk city night", + ] + batch_generate(api, custom_prompts) + + print("\n" + "=" * 60) + print("All images generated successfully!") + print("=" * 60) + + +if __name__ == "__main__": + main() diff --git a/extensions-builtin/LDSR/sd_hijack_autoencoder.py b/extensions-builtin/LDSR/sd_hijack_autoencoder.py index c29d274da82..bfb5276b6d8 100644 --- a/extensions-builtin/LDSR/sd_hijack_autoencoder.py +++ b/extensions-builtin/LDSR/sd_hijack_autoencoder.py @@ -1,5 +1,5 @@ # The content of this file comes from the ldm/models/autoencoder.py file of the compvis/stable-diffusion repo -# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the stability-ai/stablediffusion repo +# The VQModel & VQModelInterface were subsequently removed from ldm/models/autoencoder.py when we moved to the CompVis/stable-diffusion repo # As the LDSR upscaler relies on VQModel & VQModelInterface, the hijack aims to put them back into the ldm.models.autoencoder import numpy as np import torch diff --git a/modules/initialize.py b/modules/initialize.py index 0365bbb3093..1e50e88e2df 100644 --- a/modules/initialize.py +++ b/modules/initialize.py @@ -29,8 +29,6 @@ def imports(): import ldm.modules.encoders.modules # noqa: F401 startup_timer.record("import ldm") - import sgm.modules.encoders.modules # noqa: F401 - startup_timer.record("import sgm") from modules import shared_init shared_init.initialize() diff --git a/modules/launch_utils.py b/modules/launch_utils.py index 20c7dc127a7..3a90854404d 100644 --- a/modules/launch_utils.py +++ b/modules/launch_utils.py @@ -171,8 +171,9 @@ def run_git(dir, name, command, desc=None, errdesc=None, custom_env=None, live: def git_clone(url, dir, name, commithash=None): # TODO clone into temporary dir and move if successful + if os.path.exists(dir): - if commithash is None: + if not commithash: return current_hash = run_git(dir, name, 'rev-parse HEAD', None, f"Couldn't determine {name}'s hash: {commithash}", live=False).strip() @@ -194,7 +195,7 @@ def git_clone(url, dir, name, commithash=None): shutil.rmtree(dir, ignore_errors=True) raise - if commithash is not None: + if commithash: run(f'"{git}" -C "{dir}" checkout {commithash}', None, "Couldn't checkout {name}'s hash: {commithash}") @@ -346,13 +347,14 @@ def prepare_environment(): openclip_package = os.environ.get('OPENCLIP_PACKAGE', "https://github.com/mlfoundations/open_clip/archive/bb6e834e9c70d9c27d0dc3ecedeebeaeb1ffad6b.zip") assets_repo = os.environ.get('ASSETS_REPO', "https://github.com/AUTOMATIC1111/stable-diffusion-webui-assets.git") - stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/Stability-AI/stablediffusion.git") + stable_diffusion_repo = os.environ.get('STABLE_DIFFUSION_REPO', "https://github.com/CompVis/stable-diffusion.git") stable_diffusion_xl_repo = os.environ.get('STABLE_DIFFUSION_XL_REPO', "https://github.com/Stability-AI/generative-models.git") k_diffusion_repo = os.environ.get('K_DIFFUSION_REPO', 'https://github.com/crowsonkb/k-diffusion.git') blip_repo = os.environ.get('BLIP_REPO', 'https://github.com/salesforce/BLIP.git') assets_commit_hash = os.environ.get('ASSETS_COMMIT_HASH', "6f7db241d2f8ba7457bac5ca9753331f0c266917") - stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "cf1d67a6fd5ea1aa600c4df58e5b47da45f6bdbf") + # Patch: skip commit checkout for CompVis repo, use latest commit + stable_diffusion_commit_hash = os.environ.get('STABLE_DIFFUSION_COMMIT_HASH', "") stable_diffusion_xl_commit_hash = os.environ.get('STABLE_DIFFUSION_XL_COMMIT_HASH', "45c443b316737a4ab6e40413d7794a7f5657c19f") k_diffusion_commit_hash = os.environ.get('K_DIFFUSION_COMMIT_HASH', "ab527a9a6d347f364e3d185ba6d714e22d80cb3c") blip_commit_hash = os.environ.get('BLIP_COMMIT_HASH', "48211a1594f1321b00f14c9f7a5b4813144b2fb9") @@ -410,7 +412,6 @@ def prepare_environment(): git_clone(assets_repo, repo_dir('stable-diffusion-webui-assets'), "assets", assets_commit_hash) git_clone(stable_diffusion_repo, repo_dir('stable-diffusion-stability-ai'), "Stable Diffusion", stable_diffusion_commit_hash) - git_clone(stable_diffusion_xl_repo, repo_dir('generative-models'), "Stable Diffusion XL", stable_diffusion_xl_commit_hash) git_clone(k_diffusion_repo, repo_dir('k-diffusion'), "K-diffusion", k_diffusion_commit_hash) git_clone(blip_repo, repo_dir('BLIP'), "BLIP", blip_commit_hash) diff --git a/modules/paths.py b/modules/paths.py index 030646519c3..23e8328eaf8 100644 --- a/modules/paths.py +++ b/modules/paths.py @@ -5,19 +5,126 @@ import modules.safe # noqa: F401 -def mute_sdxl_imports(): - """create fake modules that SDXL wants to import but doesn't actually use for our purposes""" - - class Dummy: - pass - - module = Dummy() - module.LPIPS = None - sys.modules['taming.modules.losses.lpips'] = module - - module = Dummy() - module.StableDataModuleFromConfig = None - sys.modules['sgm.data'] = module +def mute_sdxl_imports(): + """create fake modules that SDXL wants to import but doesn't actually use for our purposes""" + + class Dummy: + pass + + module = Dummy() + module.LPIPS = None + sys.modules['taming.modules.losses.lpips'] = module + + module = Dummy() + module.StableDataModuleFromConfig = None + sys.modules['sgm.data'] = module + + # Mock taming.modules.vqvae.quantize for VQModel/VQModelInterface + class DummyVectorQuantizer: + def __init__(self, *args, **kwargs): + pass + module = Dummy() + module.VectorQuantizer2 = DummyVectorQuantizer + sys.modules['taming.modules.vqvae.quantize'] = module + + # Mock taming.modules.vqvae for completeness + module = Dummy() + sys.modules['taming.modules.vqvae'] = module + + # Mock taming.modules for completeness + module = Dummy() + sys.modules['taming.modules'] = module + + # Mock ldm.modules.midas for depth estimation + class MidasApiDummy: + ISL_PATHS = {} + load_model = None + load_model_inner = None + module = Dummy() + module.api = MidasApiDummy() + sys.modules['ldm.modules.midas'] = module + + # Mock sgm module for SDXL when generative-models repo is not available + import types + + def create_sgm_modules(): + """Create all required sgm module mocks""" + sgm_module = types.ModuleType('sgm') + sgm_models = types.ModuleType('sgm.models') + sgm_diffusion = types.ModuleType('sgm.models.diffusion') + sgm_sgm = types.ModuleType('sgm.models.diffusion.sgm') + sgm_modules = types.ModuleType('sgm.modules') + sgm_diffusionmodules = types.ModuleType('sgm.modules.diffusionmodules') + sgm_attention = types.ModuleType('sgm.modules.attention') + sgm_encoders = types.ModuleType('sgm.modules.encoders') + sgm_encoders_modules = types.ModuleType('sgm.modules.encoders.modules') + + # Add submodules with proper attributes + class DummyDiffusionEngine: + pass + sgm_diffusion.DiffusionEngine = DummyDiffusionEngine + + class DummyDenoiserScaling: + pass + sgm_diffusionmodules.denoiser_scaling = DummyDenoiserScaling() + + class DummyDiscretizer: + pass + sgm_diffusionmodules.discretizer = DummyDiscretizer() + + class DummyAttention: + XFORMERS_IS_AVAILABLE = False + SDP_IS_AVAILABLE = True + sgm_attention.CrossAttention = DummyAttention + + class DummyModelClass: + class AttnBlock: + forward = lambda self, *args, **kwargs: None + nonlinearity = lambda *args, **kwargs: None + sgm_diffusionmodules.model = DummyModelClass() + + class DummyUNetModel: + forward = lambda self, *args, **kwargs: None + + class DummyOpenAIModelClass: + UNetModel = DummyUNetModel + sgm_diffusionmodules.openaimodel = DummyOpenAIModelClass() + + class DummyGeneralConditioner: + pass + sgm_modules.GeneralConditioner = DummyGeneralConditioner + + class DummyUtil: + pass + sgm_diffusionmodules.util = DummyUtil() + + # Set up module hierarchy - this is critical for attribute access + sgm_module.models = sgm_models + sgm_module.modules = sgm_modules + sgm_models.diffusion = sgm_diffusion + sgm_diffusion.sgm = sgm_sgm + sgm_modules.diffusionmodules = sgm_diffusionmodules + sgm_modules.attention = sgm_attention + sgm_modules.encoders = sgm_encoders + sgm_encoders.modules = sgm_encoders_modules + + # Register all modules + sys.modules['sgm'] = sgm_module + sys.modules['sgm.models'] = sgm_models + sys.modules['sgm.models.diffusion'] = sgm_diffusion + sys.modules['sgm.models.diffusion.sgm'] = sgm_sgm + sys.modules['sgm.modules'] = sgm_modules + sys.modules['sgm.modules.diffusionmodules'] = sgm_diffusionmodules + sys.modules['sgm.modules.attention'] = sgm_attention + sys.modules['sgm.modules.encoders'] = sgm_encoders + sys.modules['sgm.modules.encoders.modules'] = sgm_encoders_modules + sys.modules['sgm.modules.diffusionmodules.denoiser_scaling'] = sgm_diffusionmodules.denoiser_scaling + sys.modules['sgm.modules.diffusionmodules.discretizer'] = sgm_diffusionmodules.discretizer + sys.modules['sgm.modules.diffusionmodules.model'] = sgm_diffusionmodules.model + sys.modules['sgm.modules.diffusionmodules.openaimodel'] = sgm_diffusionmodules.openaimodel + sys.modules['sgm.modules.diffusionmodules.util'] = sgm_diffusionmodules.util + + create_sgm_modules() # data_path = cmd_opts_pre.data @@ -44,21 +151,24 @@ class Dummy: paths = {} -for d, must_exist, what, options in path_dirs: - must_exist_path = os.path.abspath(os.path.join(script_path, d, must_exist)) - if not os.path.exists(must_exist_path): - print(f"Warning: {what} not found at path {must_exist_path}", file=sys.stderr) - else: - d = os.path.abspath(d) - if "atstart" in options: - sys.path.insert(0, d) - elif "sgm" in options: - # Stable Diffusion XL repo has scripts dir with __init__.py in it which ruins every extension's scripts dir, so we - # import sgm and remove it from sys.path so that when a script imports scripts.something, it doesbn't use sgm's scripts dir. - - sys.path.insert(0, d) - import sgm # noqa: F401 - sys.path.pop(0) - else: - sys.path.append(d) - paths[what] = d +for d, must_exist, what, options in path_dirs: + must_exist_path = os.path.abspath(os.path.join(script_path, d, must_exist)) + if not os.path.exists(must_exist_path): + print(f"Warning: {what} not found at path {must_exist_path}", file=sys.stderr) + # Add fallback path for SDXL even if not found to prevent KeyError + if what == "Stable Diffusion XL": + paths[what] = os.path.join(script_path, 'repositories/generative-models') + else: + d = os.path.abspath(d) + if "atstart" in options: + sys.path.insert(0, d) + elif "sgm" in options: + # Stable Diffusion XL repo has scripts dir with __init__.py in it which ruins every extension's scripts dir, so we + # import sgm and remove it from sys.path so that when a script imports scripts.something, it doesbn't use sgm's scripts dir. + + sys.path.insert(0, d) + import sgm # noqa: F401 + sys.path.pop(0) + else: + sys.path.append(d) + paths[what] = d diff --git a/modules/sd_hijack.py b/modules/sd_hijack.py index 0de83054186..a7feff48c1a 100644 --- a/modules/sd_hijack.py +++ b/modules/sd_hijack.py @@ -24,10 +24,11 @@ diffusionmodules_model_nonlinearity = ldm.modules.diffusionmodules.model.nonlinearity diffusionmodules_model_AttnBlock_forward = ldm.modules.diffusionmodules.model.AttnBlock.forward -# new memory efficient cross attention blocks do not support hypernets and we already -# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention -ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention -ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention +# new memory efficient cross attention blocks do not support hypernets and we already +# have memory efficient cross attention anyway, so this disables SD2.0's memory efficient cross attention +ldm.modules.attention.MemoryEfficientCrossAttention = ldm.modules.attention.CrossAttention +if hasattr(ldm.modules.attention.BasicTransformerBlock, 'ATTENTION_MODES'): + ldm.modules.attention.BasicTransformerBlock.ATTENTION_MODES["softmax-xformers"] = ldm.modules.attention.CrossAttention # silence new console spam from SD2 ldm.modules.attention.print = shared.ldm_print diff --git a/modules/sd_hijack_unet.py b/modules/sd_hijack_unet.py index b4f03b138a4..ca3aea75dbe 100644 --- a/modules/sd_hijack_unet.py +++ b/modules/sd_hijack_unet.py @@ -78,28 +78,30 @@ def timestep_embedding(_, timesteps, dim, max_period=10000, repeat_only=False): return embedding -# Monkey patch to SpatialTransformer removing unnecessary contiguous calls. -# Prevents a lot of unnecessary aten::copy_ calls -def spatial_transformer_forward(_, self, x: torch.Tensor, context=None): - # note: if no context is given, cross-attention defaults to self-attention - if not isinstance(context, list): - context = [context] - b, c, h, w = x.shape - x_in = x - x = self.norm(x) - if not self.use_linear: - x = self.proj_in(x) - x = x.permute(0, 2, 3, 1).reshape(b, h * w, c) - if self.use_linear: - x = self.proj_in(x) - for i, block in enumerate(self.transformer_blocks): - x = block(x, context=context[i]) - if self.use_linear: - x = self.proj_out(x) - x = x.view(b, h, w, c).permute(0, 3, 1, 2) - if not self.use_linear: - x = self.proj_out(x) - return x + x_in +# Monkey patch to SpatialTransformer removing unnecessary contiguous calls. +# Prevents a lot of unnecessary aten::copy_ calls +def spatial_transformer_forward(_, self, x: torch.Tensor, context=None): + # note: if no context is given, cross-attention defaults to self-attention + if not isinstance(context, list): + context = [context] + b, c, h, w = x.shape + x_in = x + x = self.norm(x) + # Handle case where use_linear attribute may not exist + use_linear = getattr(self, 'use_linear', False) + if not use_linear: + x = self.proj_in(x) + x = x.permute(0, 2, 3, 1).reshape(b, h * w, c) + if use_linear: + x = self.proj_in(x) + for i, block in enumerate(self.transformer_blocks): + x = block(x, context=context[i]) + if use_linear: + x = self.proj_out(x) + x = x.view(b, h, w, c).permute(0, 3, 1, 2) + if not use_linear: + x = self.proj_out(x) + return x + x_in class GELUHijack(torch.nn.GELU, torch.nn.Module): diff --git a/test_cat.png b/test_cat.png new file mode 100644 index 00000000000..f075650b0ac Binary files /dev/null and b/test_cat.png differ diff --git a/test_city.png b/test_city.png new file mode 100644 index 00000000000..f73f50ce29f Binary files /dev/null and b/test_city.png differ diff --git a/test_js.png b/test_js.png new file mode 100644 index 00000000000..d7c9dcfbfe2 Binary files /dev/null and b/test_js.png differ diff --git a/verify_test.png b/verify_test.png new file mode 100644 index 00000000000..269840746e2 Binary files /dev/null and b/verify_test.png differ