Skip to content

Commit 308c69a

Browse files
committed
Enhance Vision Model Training with Robust Dataset Conversion and Memory Optimization
- Significantly improved train_vision.py with more robust dataset conversion process - Added detailed debugging and logging for dataset loading and conversion - Implemented advanced memory management techniques for vision model training - Enhanced tokenizer handling with fallback mechanisms - Updated version to 2.0.76 across all relevant files
1 parent 666027d commit 308c69a

7 files changed

Lines changed: 78 additions & 38 deletions

File tree

docker/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
FROM python:3.11-slim
22
WORKDIR /app
33
COPY . .
4-
RUN pip install flask praisonai==2.0.75 gunicorn markdown
4+
RUN pip install flask praisonai==2.0.76 gunicorn markdown
55
EXPOSE 8080
66
CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]

docs/api/praisonai/deploy.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ <h2 id="raises">Raises</h2>
110110
file.write(&#34;FROM python:3.11-slim\n&#34;)
111111
file.write(&#34;WORKDIR /app\n&#34;)
112112
file.write(&#34;COPY . .\n&#34;)
113-
file.write(&#34;RUN pip install flask praisonai==2.0.75 gunicorn markdown\n&#34;)
113+
file.write(&#34;RUN pip install flask praisonai==2.0.76 gunicorn markdown\n&#34;)
114114
file.write(&#34;EXPOSE 8080\n&#34;)
115115
file.write(&#39;CMD [&#34;gunicorn&#34;, &#34;-b&#34;, &#34;0.0.0.0:8080&#34;, &#34;api:app&#34;]\n&#39;)
116116

praisonai.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ class Praisonai < Formula
33

44
desc "AI tools for various AI applications"
55
homepage "https://github.com/MervinPraison/PraisonAI"
6-
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/2.0.75.tar.gz"
6+
url "https://github.com/MervinPraison/PraisonAI/archive/refs/tags/2.0.76.tar.gz"
77
sha256 "1828fb9227d10f991522c3f24f061943a254b667196b40b1a3e4a54a8d30ce32" # Replace with actual SHA256 checksum
88
license "MIT"
99

praisonai/deploy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def create_dockerfile(self):
5656
file.write("FROM python:3.11-slim\n")
5757
file.write("WORKDIR /app\n")
5858
file.write("COPY . .\n")
59-
file.write("RUN pip install flask praisonai==2.0.75 gunicorn markdown\n")
59+
file.write("RUN pip install flask praisonai==2.0.76 gunicorn markdown\n")
6060
file.write("EXPOSE 8080\n")
6161
file.write('CMD ["gunicorn", "-b", "0.0.0.0:8080", "api:app"]\n')
6262

praisonai/train_vision.py

Lines changed: 71 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,14 @@
1212
import torch
1313
import shutil
1414
import subprocess
15+
import gc # For garbage collection
1516

16-
from datasets import load_dataset, concatenate_datasets
17+
from datasets import load_dataset, concatenate_datasets, Dataset
1718
from unsloth import FastVisionModel, is_bf16_supported
1819
from unsloth.trainer import UnslothVisionDataCollator
19-
from trl import SFTTrainer, SFTConfig
20+
from transformers import TrainingArguments
21+
from trl import SFTTrainer
22+
from tqdm import tqdm # Add progress bar
2023

2124

2225
class TrainVisionModel:
@@ -62,11 +65,21 @@ def prepare_model(self):
6265
use_gradient_checkpointing="unsloth"
6366
)
6467
print("DEBUG: Vision model and original tokenizer loaded.")
65-
if original_tokenizer.pad_token is None:
66-
original_tokenizer.pad_token = original_tokenizer.eos_token
67-
original_tokenizer.model_max_length = self.config.get("max_seq_length", 2048)
68+
69+
# Use the full processor that supports image inputs.
6870
self.hf_tokenizer = original_tokenizer
6971

72+
# Set pad token if needed
73+
if not hasattr(self.hf_tokenizer, 'pad_token') or self.hf_tokenizer.pad_token is None:
74+
if hasattr(self.hf_tokenizer, 'eos_token'):
75+
self.hf_tokenizer.pad_token = self.hf_tokenizer.eos_token
76+
elif hasattr(self.hf_tokenizer, 'bos_token'):
77+
self.hf_tokenizer.pad_token = self.hf_tokenizer.bos_token
78+
79+
# Set max length
80+
if hasattr(self.hf_tokenizer, 'model_max_length'):
81+
self.hf_tokenizer.model_max_length = self.config.get("max_seq_length", 2048)
82+
7083
# Add vision-specific LoRA adapters
7184
self.model = FastVisionModel.get_peft_model(
7285
self.model,
@@ -85,38 +98,62 @@ def prepare_model(self):
8598
print("DEBUG: Vision LoRA adapters added.")
8699

87100
def convert_sample(self, sample):
88-
# Use a default instruction or one from config
89-
instr = self.config.get("vision_instruction", "You are an expert radiographer. Describe accurately what you see in this image.")
101+
102+
instruction = self.config.get(
103+
"vision_instruction",
104+
"You are an expert radiographer. Describe accurately what you see in this image."
105+
)
90106
conversation = [
91-
{"role": "user", "content": [
92-
{"type": "text", "text": instr},
93-
{"type": "image", "image": sample["image"]}
94-
]},
95-
{"role": "assistant", "content": [
96-
{"type": "text", "text": sample["caption"]}
97-
]}
107+
{
108+
"role": "user",
109+
"content": [
110+
{"type": "text", "text": instruction},
111+
{"type": "image", "image": sample["image"]}
112+
]
113+
},
114+
{
115+
"role": "assistant",
116+
"content": [
117+
{"type": "text", "text": sample["caption"]}
118+
]
119+
},
98120
]
121+
99122
return {"messages": conversation}
100123

101124
def load_datasets(self):
102-
datasets = []
125+
all_converted = []
103126
for dataset_info in self.config["dataset"]:
104-
print("DEBUG: Loading vision dataset:", dataset_info)
105-
ds = load_dataset(dataset_info["name"], split=dataset_info.get("split_type", "train"))
106-
print("DEBUG: Converting dataset to vision conversation format...")
107-
ds = ds.map(self.convert_sample)
108-
datasets.append(ds)
109-
combined = concatenate_datasets(datasets)
110-
print("DEBUG: Combined vision dataset has", len(combined), "examples.")
111-
return combined
127+
print("\nDEBUG: Loading vision dataset:", dataset_info)
128+
ds = load_dataset(
129+
dataset_info["name"],
130+
split=dataset_info.get("split_type", "train")
131+
)
132+
print("DEBUG: Dataset size:", len(ds))
133+
print("DEBUG: First raw sample:", ds[0])
134+
print("DEBUG: Dataset features:", ds.features)
135+
136+
print("\nDEBUG: Converting dataset to vision conversation format...")
137+
converted_ds = [self.convert_sample(sample) for sample in ds]
138+
139+
# Debug first converted sample
140+
print("\nDEBUG: First converted sample structure:")
141+
first = converted_ds[0]
142+
print("DEBUG: Message keys:", first["messages"][0]["content"][1].keys())
143+
print("DEBUG: Image type in converted:", type(first["messages"][0]["content"][1].get("image")))
144+
145+
all_converted.extend(converted_ds)
146+
147+
print("\nDEBUG: Combined vision dataset has", len(all_converted), "examples.")
148+
return all_converted
112149

113150
def train_model(self):
114151
print("DEBUG: Starting vision training...")
115152
raw_dataset = self.load_datasets()
116153

117-
# Build training arguments using SFTConfig for vision tasks
118-
sft_config = SFTConfig(
119-
per_device_train_batch_size=self.config.get("per_device_train_batch_size", 2),
154+
# Build training arguments using TrainingArguments
155+
training_args = TrainingArguments(
156+
per_device_train_batch_size=self.config.get("per_device_train_batch_size", 1),
120157
gradient_accumulation_steps=self.config.get("gradient_accumulation_steps", 4),
121158
warmup_steps=self.config.get("warmup_steps", 5),
122159
max_steps=self.config.get("max_steps", 30),
@@ -131,18 +168,21 @@ def train_model(self):
131168
output_dir=self.config.get("output_dir", "outputs"),
132169
report_to="none" if not os.getenv("PRAISON_WANDB") else "wandb",
133170
remove_unused_columns=False,
134-
dataset_text_field="",
135-
dataset_kwargs={"skip_prepare_dataset": True},
136-
dataset_num_proc=self.config.get("dataset_num_proc", 4),
137-
max_seq_length=self.config.get("max_seq_length", 2048)
171+
# Add memory optimization settings
172+
gradient_checkpointing=True,
173+
max_grad_norm=1.0,
138174
)
139175

140176
trainer = SFTTrainer(
141177
model=self.model,
142178
tokenizer=self.hf_tokenizer,
143179
data_collator=UnslothVisionDataCollator(self.model, self.hf_tokenizer),
144180
train_dataset=raw_dataset,
145-
args=sft_config
181+
args=training_args,
182+
max_seq_length=self.config.get("max_seq_length", 2048),
183+
dataset_text_field="", # Required for vision training
184+
dataset_kwargs={"skip_prepare_dataset": True}, # Required for vision training
185+
packing=False # Explicitly set packing to False
146186
)
147187
print("DEBUG: Beginning vision trainer.train() ...")
148188
trainer.train()

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "PraisonAI"
3-
version = "2.0.75"
3+
version = "2.0.76"
44
description = "PraisonAI is an AI Agents Framework with Self Reflection. PraisonAI application combines PraisonAI Agents, AutoGen, and CrewAI into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customisation, and efficient human-agent collaboration."
55
readme = "README.md"
66
license = ""
@@ -84,7 +84,7 @@ autogen = ["pyautogen>=0.2.19", "praisonai-tools>=0.0.7", "crewai"]
8484

8585
[tool.poetry]
8686
name = "PraisonAI"
87-
version = "2.0.75"
87+
version = "2.0.76"
8888
description = "PraisonAI is an AI Agents Framework with Self Reflection. PraisonAI application combines PraisonAI Agents, AutoGen, and CrewAI into a low-code solution for building and managing multi-agent LLM systems, focusing on simplicity, customisation, and efficient human–agent collaboration."
8989
authors = ["Mervin Praison"]
9090
license = ""

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)