-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathnodes.py
More file actions
75 lines (66 loc) · 2.3 KB
/
nodes.py
File metadata and controls
75 lines (66 loc) · 2.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import io
import os
import openai
import base64
import numpy as np
from PIL import Image
from dotenv import load_dotenv
load_dotenv()
MODELS = [
"gpt-4o",
"gpt-4o-mini",
"chatgpt-4o-latest",
"gpt-4-turbo"
]
class OpenAICaptionImage:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"image_in" : ("IMAGE", {}),
"model": (MODELS, ),
"system_prompt": ("STRING", {"default": "You are a helpful assistant."}),
"caption_prompt": ("STRING", {"default": "What's in this image?"}),
"max_tokens": ("INT", {"default": 300}),
"temperature": ("FLOAT", {"default": 0.5}),
},
}
RETURN_TYPES = ("STRING",)
RETURN_NAMES = ("text_out",)
CATEGORY = "openai"
FUNCTION = "caption"
def caption(self, image_in, model, system_prompt, caption_prompt, max_tokens, temperature):
# image to base64, image is bwhc tensor
# Convert tensor to PIL Image
pil_image = Image.fromarray(np.clip(255. * image_in.cpu().numpy().squeeze(), 0, 255).astype(np.uint8))
# Convert PIL Image to base64
buffered = io.BytesIO()
pil_image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
# Set up OpenAI client
api_key = os.getenv("OPENAI_API_KEY")
client = openai.OpenAI(api_key=api_key)
# Make API call to OpenAI
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": system_prompt
},
{
"role": "user",
"content": [
{"type": "text", "text": caption_prompt},
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}
],
}
],
max_tokens=max_tokens,
temperature=temperature,
)
if response.choices[0].message.content is None:
raise ValueError("No content in response")
# Extract and return the caption
caption = response.choices[0].message.content.strip()
return (caption,)