CompCon/generate_prompt_descriptions.py at main · adobe-research/CompCon · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
# Copyright 2025 Adobe Research. All rights reserved.
# To view a copy of the license, visit LICENSE.md.

import argparse
import os
import random
import re
import string
import time
from typing import Dict, List

import numpy as np
import pandas as pd
import seaborn as sns
import torch
from matplotlib import pyplot as plt
from scipy.stats import ttest_ind
from sklearn.metrics import roc_auc_score
from torch import nn
from tqdm import tqdm, trange

import weave
import wandb
from serve.utils_clip import get_embeddings
# from serve.utils_diffusion import generate_images
from serve.utils_llm import get_llm_output
from serve.utils_gpu import initialize_model_with_gpu, get_gpu_status, cleanup_gpu_memory
# from compcon_utils import get_cosine_similarity_batch, parse_string, plot_prompts, timeit
from compcon_utils import get_cosine_similarity_batch, plot_prompts, timeit

from diffusers import (
    AutoPipelineForText2Image,
    EulerDiscreteScheduler,
    PixArtAlphaPipeline,
    StableDiffusionXLPipeline,
    UNet2DConditionModel,
    DiffusionPipeline,
    EDMDPMSolverMultistepScheduler,
    FluxPipeline,
    StableDiffusion3Pipeline,
)
from flask import Flask, jsonify, request
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file

CLIP_MODEL = "ViT-bigG-14"
# CLIP_MODEL = "SIGLIP"

from diffusers import BitsAndBytesConfig, SD3Transformer2DModel
from diffusers import StableDiffusion3Pipeline
import torch

# Initialize models with automatic GPU distribution
print("Initializing models with automatic GPU distribution...")
gpu_status = get_gpu_status()
print(f"GPU Status: {gpu_status}")

# Initialize models using the new GPU management system
playground_pipe, playground_params, playground_device = initialize_model_with_gpu("playground")
# dreamlike_pipe, dreamlike_params, dreamlike_device = initialize_model_with_gpu("dreamlike")
sd3_5_large_pipe, sd3_5_large_params, sd3_5_large_device = initialize_model_with_gpu("sd3.5-large")

# Store device information for each model
model_devices = {
    "playground": playground_device,
    "sd3.5-large": sd3_5_large_device,
}

print("Model initialization complete!")
print(f"Model devices: {model_devices}")

def generate_images(prompt, model, num_images=1, negative_prompt=None):
    """
    Generate images using various diffusion models.

    Args:
        prompt (str): The text prompt to generate images from
        model (str): The model to use ('playground', 'dreamlike', 'pixart', 'sd-lightning')
        num_images (int): Number of images to generate
        negative_prompt (str, optional): Negative prompt to guide generation

    Returns:
        list: List of PIL Image objects
    """

    if model == "playground":
        pipe = playground_pipe
        # Optional: Use DPM++ 2M Karras scheduler for crisper fine details
        pipe.scheduler = EDMDPMSolverMultistepScheduler()
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            **playground_params
        ).images

    elif model == "dreamlike":
        # Initialize dreamlike model if not already done
        if 'dreamlike_pipe' not in globals():
            global dreamlike_pipe, dreamlike_params, dreamlike_device
            dreamlike_pipe, dreamlike_params, dreamlike_device = initialize_model_with_gpu("dreamlike")
            model_devices["dreamlike"] = dreamlike_device
        pipe = dreamlike_pipe
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            negative_prompt=negative_prompt,
            **dreamlike_params
        ).images
    elif model == "sd3.5-large":
        pipe = sd3_5_large_pipe
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            **sd3_5_large_params
        ).images
        print(images)

    elif model == "flux":
        # Initialize flux model if not already done
        if 'flux_pipe' not in globals():
            global flux_pipe, flux_params, flux_device
            flux_pipe, flux_params, flux_device = initialize_model_with_gpu("flux")
            model_devices["flux"] = flux_device
        pipe = flux_pipe
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            **flux_params
        ).images

    elif model == "sd-lightning":
        # Initialize sd-lightning model if not already done
        if 'sd_lightning_pipe' not in globals():
            global sd_lightning_pipe, sd_lightning_params, sd_lightning_device
            sd_lightning_pipe, sd_lightning_params, sd_lightning_device = initialize_model_with_gpu("sd-lightning")
            model_devices["sd-lightning"] = sd_lightning_device
        pipe = sd_lightning_pipe
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            **sd_lightning_params
        ).images

    elif model == "pixart":
        # Initialize pixart model if not already done
        if 'pixart_pipe' not in globals():
            global pixart_pipe, pixart_params, pixart_device
            pixart_pipe, pixart_params, pixart_device = initialize_model_with_gpu("pixart")
            model_devices["pixart"] = pixart_device
        pipe = pixart_pipe
        images = pipe(
            prompt=prompt,
            num_images_per_prompt=num_images,
            **pixart_params
        ).images
    else:
        raise ValueError(f"Model {model} not supported")
    return images

def parse_string(input_string):
    # Split the input string into thought process, description, and prompts
    parts = input_string.split("Description:")
    thought_process = parts[0].replace("Thought Process:", "").strip()

    remaining = parts[1] if len(parts) > 1 else ""
    description_and_prompts = remaining.split("New Prompts:")

    description = description_and_prompts[0].strip()

    prompts_text = description_and_prompts[1] if len(description_and_prompts) > 1 else ""
    prompts = re.findall(r"\d+\.\s*(.*?)(?=\n\d+\.|\Z)", prompts_text, re.DOTALL)
    # if any of the prompts contain a new line and there is not a new prompt number, remove the new line and any following text
    prompts = [re.sub(r"\n.*", "", prompt) for prompt in prompts]
    prompts = [prompt.strip() for prompt in prompts]

    return thought_process, description, "", prompts

description_prompt_format = """I am a machine learning engineer comparing 2 text-2-image models, which we will call A and B. I have discovered that for the following set of prompts (separable prompts), images generated by model A contain an unintended artifact of "{attribute}" while images generated by model B with the same prompt does not contain this. Here are the seperable prompts:
{set_1_prompts}

Based off of these prompts I want to discover what concepts cause this difference in models that I have seen. For reference, I here is a set of prompts for which this difference is not seen (inseparable prompts):
{set_2_prompts}

Please describe the concepts shared across many separable prompts that are largely not seen in inseperable prompts. Note that I am NOT interested in concepts that are directly related to "{attribute}". For instance, if the attribute is "reflections on water", the description may reference water but not water and reflections. If the attribute is "creepy", the descrtipion should not reference concepts like "unsettling", "horrific", etc. This description should be clear, objective, and human interpretable such that a human could construct a set of separable prompts from this description (AKA the images generated by model A contain {attribute} while the images generated by model B using the same prompt do not contain this). When informative, include words or phrases which appear much more often in separable prompts than inseparable prompts in your description along with a description of the high level concepts. Do NOT provide analysis as to why this artifact may be seen, only provide the d (line truncated to 1000 characters)

Please think step by step and explain your through process before you come up with your short description, and provide 25 new prompts which are likely to be separable. As a reminder these prompts should cover a diverse range of concepts, should contain different subjects, phrasing, and context from the prompts you have already provided, and should NOT contain references to "{attribute}". For example, if the artifact is "man with a beard", prompts can mention men, but cannot mention both men and beards. Your new prompts will be evaluated to refine the description of what concepts are indicative of separable prompts. Ensure that your new prompts are diverse and noticably different from prompts you have already seen. The new prompts must align with the description you have provided.

Your response should be in the following format. Please ensure your though process and description are in two seperate paragraphs as shown:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{a description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

description_prompt_iterative_soft_assignment = """I am a machine learning engineer comparing two text-to-image models, A and B. In my analysis, I've computed the "seperability score" of  a set of text-2-image prompts.

Below are the prompts and their seperability scores:
{prompts}

Your task is to identify and describe the shared concepts among prompts with a high seperability score that are not present in prompts with a low seperability score. This analysis should focus on underlying concepts and exclude any direct reference to "{attribute}".

Please think step by step and explain your through process before you come up with your short description, and provide {num_prompts} new prompts which are likely to be separable. As a reminder these prompts should cover a diverse range of concepts, should contain different subjects, phrasing, and context from the prompts you have already provided, and should NOT contain references to "{attribute}". Your new prompts will be evaluated to refine the description of what concepts are indicative of separable prompts. Ensure that your new prompts are diverse and cover a wide range of concepts, art styles, and themes that are not seen in any of the prompts you have already seen.

Your response should be in the following format. Please ensure your though process and description are in two seperate paragraphs as shown:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{a description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

iterate_prompts_format = """I have run the prompts you suggested and here are the results on the prompts which were separable and inseparable.
Separable Prompts:
{separable_prompts}

Inseparable Prompts:
{inseparable_prompts}

Given this feedback, please update your short description and key concepts of what concepts are indicative of all the separable prompts you have seen so far and provide 25 new prompts which are likely separable. As stated previously, this description should be short, objective, and human interpretable. The new prompts you provide should be diverse and cover a wide range of concepts, as I will use them to further refine my understanding of the concepts that cause the difference in models A and B. As before, please think step by step and explain your through process before you come up with your short description, and provide 25 new prompts which are likely to be separable.

Your response should be in the following the same format as before. Please strictly follow this format as it will be pased into a string parser:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{your revised description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

Key Concepts: [{{seperable concept 1}}, {{seperable concept 2}}, ..]

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

iterate_prompts_format_iterative_soft_assignment = """I have run the prompts you suggested and here are the seperability scores for the prompts you have seen so far.

{prompts_with_scores}

Given this feedback, please update your short description and key concepts of what concepts are indicative of all the separable prompts you have seen so far and provide 25 new prompts which are likely separable. As stated previously, this description should be short, objective, and human interpretable. The new prompts you provide should be diverse and cover a wide range of concepts, as I will use them to further refine my understanding of the concepts that cause the difference in models A and B. As before, please think step by step and explain your through process before you come up with your short description, and provide 25 new prompts which are likely to be separable.

Your response should be in the following format. Please ensure your though process and description are in two seperate paragraphs as shown:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{a description of what concepts are indicative of separable prompts, should be 1 sentence}}

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

iterate_prompts_format_no_sep = """I have run the prompts you suggested and but none of these prompts are seperable

Given this feedback, please update your short description and key concepts of what concepts are indicative of all the separable prompts you have seen so far and provide 25 new prompts which are likely separable. As stated previously, this description should be short, objective, and human interpretable. The new prompts you provide should be diverse and cover a wide range of concepts, as I will use them to further refine my understanding of the concepts that cause the difference in models A and B. As before, please think step by step and explain your through process before you come up with your short description, and provide 25 new prompts which are likely to be separable.

Your response should be in the following the same format as before. Please strictly follow this format as it will be pased into a string parser:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{your revised description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

Key Concepts: [{{seperable concept 1}}, {{seperable concept 2}}, ..]

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

description_prompt_iterative = """I am a machine learning engineer comparing 2 text-2-image models, which we will call A and B. I have discovered that for the following set of prompts (separable prompts), images generated by model A contain an unintended artifact of "{attribute}" while images generated by model B with the same prompt does not contain this. Here are the seperable prompts:
{set_1_prompts}

Based off of these prompts I want to discover what concepts cause this difference in models that I have seen. For reference, I here is a set of prompts for which this difference is not seen (inseparable prompts):
{set_2_prompts}

Please describe the concepts shared across many separable prompts that are largely not seen in inseperable prompts. Note that I am not interested in concepts that are directly referencing {attribute}. I would like both a free form decription and a list of 1-3 word concepts which are defining features of seperable prompts. The description should be short, objective, human interpretable such that a human could construct a set of separable prompts from this description. Please think step by step and explain your through process before you come up with your short description, and make small changes to the existing seperable prompts to develop 25 new prompts which will likely be separable. Your new prompts will be evaluated to refine the description of what concepts are indicative of separable prompts.

Your response should be in the following format. Please ensure your thought process, description, key concepts, and new prompts are in the following format:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{a description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

Key Concepts: [{{seperable concept 1}}, {{seperable concept 2}}, ..]

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

iiterate_prompts_format_iterative = """I have run the prompts you suggested and here are the results on the prompts which were separable and inseparable.
Separable Prompts:
{separable_prompts}

Inseparable Prompts:
{inseparable_prompts}

Given this feedback, please update your short description and key concepts of what concepts are indicative of all the separable prompts you have seen so far and provide 25 new prompts which are likely separable. As stated previously, this description should be short, objective and human interpretable. The new prompts you provide should be diverse and cover a wide range of concepts, as I will use them to further refine my understanding of the concepts that cause the difference in models A and B. As before, please think step by step and explain your through process before you come up with your short description, and provide 25 new prompts which are likely to be separable by making small changes to the existing prompts.

Your response should be in the following the same format as before. Please strictly follow this format as it will be pased into a string parser:
Thought Process: {{your thought process on the differences between separable and inseperable prompts}}

Description: {{your revised description of what concepts are indicative of separable prompts, should be 1 detailed sentence}}

Key Concepts: [{{seperable concept 1}}, {{seperable concept 2}}, ..]

New Prompts:
1. {{a new prompt}}
2. {{another new prompt}}
3. {{another new prompt}}
"""

@timeit
def find_prompts(prompts: List[Dict], attribute: str, threshold: float, delta: float):
    """
    Takes a list of prompt dictionaries and returns the separable prompts with respect to the attribute.
    """
    print(f"Finding separable prompts for {len(prompts)} prompts")
    separable_prompts = []
    not_separable_prompts = []
    details = []

    # Prepare batches of image paths
    batch_size = 32  # Adjust this based on your GPU memory
    all_paths = [path for prompt in prompts for path in prompt["paths"]]

    # Get embeddings in batches
    all_embeddings = []
    for i in tqdm(range(0, len(all_paths), batch_size), desc="Getting embeddings"):
        batch_paths = all_paths[i : i + batch_size]
        batch_embeddings = get_embeddings(batch_paths, CLIP_MODEL, "image")
        all_embeddings.append(batch_embeddings)

    img_emb = np.vstack(all_embeddings)
    attr_emb = get_embeddings([attribute], CLIP_MODEL, "text")
    cos_sims = get_cosine_similarity_batch(img_emb, attr_emb)

    # Process results
    for i, prompt in enumerate(prompts):
        cos_sim_1 = cos_sims[2 * i]
        cos_sim_2 = cos_sims[2 * i + 1]

        is_separable = cos_sim_1 >= threshold and cos_sim_2 + delta < cos_sim_1

        prompt_detail = {
            "prompt": prompt["prompt"],
            "cos_sim_1": cos_sim_1,
            "cos_sim_2": cos_sim_2,
            "delta": delta,
            "separable": is_separable,
        }

        if is_separable:
            separable_prompts.append(prompt)
        else:
            not_separable_prompts.append(prompt)

        details.append(prompt_detail)

        # Add cosine similarities to the original prompt dictionary for later use
        prompt["cos_sims"] = [cos_sim_1, cos_sim_2]

    print(
        f"Separable prompts: {len(separable_prompts)}, Not separable prompts: {len(not_separable_prompts)}"
    )

    try:
        plot_prompts(separable_prompts, attribute, min(5, len(separable_prompts)), "Separable")
        plot_prompts(
            not_separable_prompts, attribute, min(5, len(not_separable_prompts)), "Not Separable"
        )
    except Exception as e:
        print(f"Error plotting prompts: {e}")

    return separable_prompts, not_separable_prompts, details


def propose_prompts(
    attribute: str,
    separable_prompts: List[Dict],
    inseparable_prompts: List[Dict],
    max_num_prompts=25,
):
    """
    Propose new prompts based on separable and inseparable prompts.
    """
    separable_prompts_str = "\n".join(
        [
            p["prompt"]
            for p in random.sample(separable_prompts, min(max_num_prompts, len(separable_prompts)))
        ]
    )
    inseparable_prompts_str = "\n".join(
        [
            p["prompt"]
            for p in random.sample(
                inseparable_prompts, min(max_num_prompts, len(inseparable_prompts))
            )
        ]
    )
    prompt_format = description_prompt_format if not args.iterative_prompt else description_prompt_iterative
    description_prompt = prompt_format.format(
        attribute=attribute,
        set_1_prompts=separable_prompts_str,
        set_2_prompts=inseparable_prompts_str,
    )
    for _ in range(3):
        cache = True
        try:
            description_response = get_llm_output(description_prompt, "gpt-4o", cache=cache)
            thought_process, description, key_concepts, new_prompts = parse_string(description_response)
            assert len(new_prompts) == 25, f"Expected 25 new prompts, got {len(new_prompts)}"
            break
        except Exception as e:
            print(f"Error parsing description response: {e}")
            cache = False


    new_prompts = [{"prompt": p, "paths": [], "models": []} for p in new_prompts]

    history = [
        {"role": "user", "content": [{"type": "text", "text": description_prompt}]},
        {"role": "assistant", "content": [{"type": "text", "text": description_response}]},
    ]

    log = {
        "description_prompt": description_prompt,
        "description_response": description_response,
        "description": description,
        "key_concepts": str(key_concepts),
        "new_prompts": [n["prompt"] for n in new_prompts],
    }
    print("----------------------------")
    print(description)
    print("----------------------------")
    return description, new_prompts, log, history


def iterate_prompts(
    attribute: str,
    chat_history: List[Dict],
    separable_prompts: List[Dict],
    inseparable_prompts: List[Dict],
    max_num_prompts=15,
):
    """
    Iterate on prompts based on previous results and chat history.
    """
    separable_prompts_str = "\n".join(
        [
            p["prompt"]
            for p in random.sample(separable_prompts, min(max_num_prompts, len(separable_prompts)))
        ]
    )
    inseparable_prompts_str = "\n".join(
        [
            p["prompt"]
            for p in random.sample(
                inseparable_prompts, min(max_num_prompts, len(inseparable_prompts))
            )
        ]
    )
    prompt_format = iterate_prompts_format if not args.iterative_prompt else iiterate_prompts_format_iterative

    if len(separable_prompts) == 0:
        description_prompt = iterate_prompts_format_no_sep.format(
            attribute=attribute,
            inseparable_prompts=inseparable_prompts_str,
        )
    else:
        description_prompt = prompt_format.format(
            attribute=attribute,
            separable_prompts=separable_prompts_str,
            inseparable_prompts=inseparable_prompts_str,
        )

    for _ in range(3):
        cache = True
        try:
            description_response = get_llm_output(
                description_prompt, "gpt-4o", history=chat_history, cache=cache
            )
            thought_process, updated_description, key_concepts, new_prompts = parse_string(description_response)
            new_prompts = [{"prompt": p, "paths": [], "models": []} for p in new_prompts]

            assert len(new_prompts) == 25, f"Expected 25 new prompts, got {len(new_prompts)}"
            break
        except Exception as e:
            print(f"Error parsing description response: {e}")
            cache = False

    log = {
        "description_prompt": description_prompt,
        "description_response": description_response,
        "description": updated_description,
        "key_concepts": str(key_concepts),
        "new_prompts": [n["prompt"] for n in new_prompts],
    }

    print("----------------------------")
    print(updated_description)
    print("----------------------------")

    return updated_description, new_prompts, log

def remove_punctuation(input_string):
    # Create a translation table where each punctuation mark is mapped to None
    translator = str.maketrans("", "", string.punctuation)
    # Use the translate method to remove punctuation
    return input_string.translate(translator)


def compcon(prompts: List[Dict], models: List[str], attribute: str, image_save_dir: str, no_history: bool = False, threshold: float = 0.2, delta: float = 0.05):
    """
    Implementation of the CoolMethod algorithm as described in the image.
    """
    P_a = []
    P_a_new = []
    d_P_a = ""
    chat_history = []
    wandb_logs = []
    proposal_logs = []
    best_prop, best_iter = 0, 0

    # Initial cheap evaluation
    P_a, P_a_not_seperable, details = find_prompts(prompts, attribute, threshold, delta)

    # Initial proposal
    d_P_a, P_a_new, logs, chat = propose_prompts(attribute, P_a, P_a_not_seperable)
    proposal_logs.append(logs)
    chat_history += chat

    wandb_logs.append(
        {
            "description": d_P_a,
            "num_separable_prompts": len(P_a) / len(prompts),
            "separable_prompts": "\n".join([p["prompt"] for p in P_a]),
            "inseparable_prompts": "\n".join([p["prompt"] for p in P_a_not_seperable]),
            "iteration": 0,
        }
    )

    wandb.log({"prop_separable_prompts": len(P_a) / len(prompts), "iteration": 0})

    if not os.path.exists(image_save_dir):
        os.makedirs(image_save_dir)

    for i in range(3):
        if not os.path.exists(f"{image_save_dir}/iteration_{i}"):
            os.makedirs(f"{image_save_dir}/iteration_{i}")
        # Generate new images
        I_new = {}
        image_generation_times = []
        for prompt in P_a_new:
            for model in models:
                start_time = time.time()
                # if generations fails, skip the prompt
                for _ in range(3):
                    try:
                        image = generate_images(prompt["prompt"], model.lower())
                        break
                    except Exception as e:
                        print(f"Error generating image: {e}")
                        continue
                end_time = time.time()
                image_generation_times.append(end_time - start_time)

                save_path = f"{image_save_dir}/iteration_{i}/{remove_punctuation(prompt['prompt'])[:100].replace(' ', '_')}_{model}.png"
                image[0].save(save_path)
                if prompt["prompt"] not in I_new:
                    I_new[prompt["prompt"]] = {}
                I_new[prompt["prompt"]][model] = save_path
                prompt["paths"].append(save_path)
                if model not in prompt["models"]:
                    prompt["models"].append(model)

        # save the generated images to wandb table
        I_data = {"prompt": list(I_new.keys()), models[0]: [I_new[prompt][models[0]] for prompt in I_new], models[1]: [I_new[prompt][models[1]] for prompt in I_new]}
        image_df = pd.DataFrame(I_data)
        # caste the image paths to wandb media
        image_df[models[0]] = image_df[models[0]].apply(lambda x: wandb.Image(x))
        image_df[models[1]] = image_df[models[1]].apply(lambda x: wandb.Image(x))

        # Find separable prompts in new set
        P_a_new_separable, P_a_new_inseparable, details = find_prompts(
            P_a_new, attribute, threshold, delta
        )

        image_df = image_df.set_index("prompt")
        image_df['prompt'] = image_df.index
        details_df = pd.DataFrame(details).set_index("prompt")
        image_df = image_df.join(details_df, how="left")
        wandb.log({f"generated_images-iter-{i}": wandb.Table(dataframe=image_df)})

        print("iter", i, len(prompts), len(P_a), len(P_a_new_separable), len(P_a_new_inseparable))
        wandb.log({"prop_new_sep_prompts": len(P_a_new_separable)/len(P_a_new), "prop_total_separable_prompts": len(P_a) / len(prompts), "iteration": i + 1, "total_image_generation_time": np.sum(image_generation_times)})
        wandb_logs.append(
            {
                "description": d_P_a,
                "num_separable_prompts": len(P_a_new_separable) / len(P_a_new),
                "separable_prompts": "\n".join([p["prompt"] for p in P_a_new_separable]),
                "inseparable_prompts": "\n".join([p["prompt"] for p in P_a_new_inseparable]),
                "iteration": i + 1,
            }
        )

        if len(P_a_new_separable)/len(P_a_new) > best_prop:
            if best_prop == 1:
                break
            best_prop =  len(P_a_new_separable)/len(P_a_new)
            best_iter = i + 1
            wandb.summary["best_prop"] =  len(P_a_new_separable)/len(P_a_new)
            wandb.summary["best_iter"] = best_iter

        if best_prop < 0.1 and i >= 3:
            break

        # Iterate prompts
        if no_history:
            if len(P_a_new_separable) < 25:
                # supplement with random prompts from P_a
                P_a_new_separable += random.sample(P_a, min(25 - len(P_a_new_separable), len(P_a)))
            if len(P_a_new_inseparable) < 25:
                # supplement with random prompts from P_a
                P_a_new_inseparable += random.sample(P_a_not_seperable, max(25 - len(P_a_new_inseparable), len(P_a_not_seperable)))
            d_P_a, P_a_new, logs, _ = propose_prompts(attribute, P_a_new_separable, P_a_new_inseparable)
        else:
            d_P_a, P_a_new, logs = iterate_prompts(
                attribute, chat_history, P_a_new_separable, P_a_new_inseparable
            )
        proposal_logs.append(logs)

        # # Update P_a
        # P_a.extend(P_a_new_separable)

        # Update P_a
        P_a_prompts = [p["prompt"] for p in P_a]
        P_a_not_seperable_prompts = [p["prompt"] for p in P_a_not_seperable]
        P_a.extend([p for p in P_a_new if p["prompt"] not in P_a_prompts])
        P_a_not_seperable.extend([p for p in P_a_new if p["prompt"] not in P_a_not_seperable_prompts])

        wandb.log(
            {
                "results table": wandb.Table(dataframe=pd.DataFrame(wandb_logs)),
                "proposals": wandb.Table(dataframe=pd.DataFrame(proposal_logs)),
            }
        )

    return P_a, P_a_new, d_P_a


def print_gpu_status():
    """Print current GPU status and model assignments."""
    status = get_gpu_status()
    print("\n" + "="*50)
    print("GPU STATUS")
    print("="*50)

    if not status["available_gpus"]:
        print("No GPUs available - using CPU")
        return

    for gpu_id, gpu_info in status["available_gpus"].items():
        print(f"GPU {gpu_id}: {gpu_info['name']}")
        print(f"  Total Memory: {gpu_info['memory_total_gb']:.1f} GB")
        print(f"  Available Memory: {gpu_info['memory_available_gb']:.1f} GB")
        print(f"  Used Memory: {status['memory_usage'].get(gpu_id, 0):.1f} GB")

        # Show models assigned to this GPU
        models_on_gpu = [model for model, assigned_gpu in status["model_assignments"].items() if assigned_gpu == gpu_id]
        if models_on_gpu:
            print(f"  Models: {', '.join(models_on_gpu)}")
        else:
            print(f"  Models: None")
        print()

    print("Model Assignments:")
    for model, gpu_id in status["model_assignments"].items():
        print(f"  {model} -> GPU {gpu_id}")
    print("="*50 + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="CoolMethod")
    parser.add_argument(
        "--attribute",
        type=str,
        default="special effects resembling fire and electricity",
        help="Attribute to compare models on",
    )
    parser.add_argument(
        "--output_dir",
        type=str,
        default="data/compcon_images",
        help="Output directory for images",
    )
    parser.add_argument("--num_prompts", type=int, help="Number of prompts to sample")
    parser.add_argument("--name", type=str, default="compcon", help="Name of the wandb run")
    parser.add_argument(
        "--data_file", type=str, default="data/TemplatesExpanded/results.csv", help="Path to the data file"
    )
    parser.add_argument("--no_history", action="store_true", help="Do not use chat history")
    parser.add_argument("--iterative_prompt", action="store_true", help="Do not use chat history")
    parser.add_argument("--models", nargs="+", default=["PixArt", "SD-Lightning"], help="Models to compare")
    parser.add_argument("--threshold", type=float, default=0.2, help="Threshold for cosine similarity")
    parser.add_argument("--delta", type=float, default=0.05, help="Threshold for cosine similarity")
    args = parser.parse_args()

    # Print GPU status before starting
    print_gpu_status()

    wandb.init(project="cool-method", name=args.name, config=vars(args))
    prompts = pd.read_csv(args.data_file)
    models = args.models

    prompt_data = []
    if args.num_prompts:
        prompts_sample = random.sample(prompts["Prompt"].unique().tolist(), args.num_prompts)
        prompts = prompts[prompts["Prompt"].isin(prompts_sample)]

    for prompt in prompts["Prompt"].unique():
        model_1_sample = prompts[
            (prompts["Prompt"] == prompt) & (prompts["group_name"] == models[0])
        ]["path"].tolist()
        model_2_sample = prompts[
            (prompts["Prompt"] == prompt) & (prompts["group_name"] == models[1])
        ]["path"].tolist()
        prompt_data.append(
            {"prompt": prompt, "paths": [model_1_sample[0], model_2_sample[0]], "models": models}
        )

    attribute = args.attribute
    compcon(prompt_data, models, attribute, f"{args.output_dir}/{attribute.replace(' ', '_')}", no_history = args.no_history, threshold=args.threshold, delta=args.delta)

    # Print final GPU status
    print_gpu_status()