Skip to content

Commit da9e076

Browse files
committed
update
1 parent 1471256 commit da9e076

37 files changed

Lines changed: 867 additions & 1124 deletions

editscore/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ def __init__(
2626
num_pass: int=1,
2727
reduction: str="average_last",
2828
seed: int=42,
29-
enable_lora: bool=False,
30-
lora_path: str="",
29+
lora_path: Optional[str]=None,
3130
cache_dir: Optional[str]=None,
3231
) -> None:
3332
self.backbone = backbone
@@ -45,7 +44,6 @@ def __init__(
4544
vlm_model=model_name_or_path,
4645
temperature=temperature,
4746
seed=seed,
48-
enable_lora=enable_lora,
4947
lora_path=lora_path,
5048
)
5149
elif self.backbone == "qwen25vl_vllm":
@@ -58,7 +56,6 @@ def __init__(
5856
max_num_batched_tokens=max_num_batched_tokens,
5957
temperature=temperature,
6058
seed=seed,
61-
enable_lora=enable_lora,
6259
lora_path=lora_path,
6360
cache_dir=cache_dir,
6461
)

editscore/mllm_tools/qwen25vl.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,12 @@ def __init__(
3636
vlm_model,
3737
temperature: float = 0.7,
3838
seed: Optional[int] = None,
39-
enable_lora: bool = False,
40-
lora_path: str = "",
39+
lora_path: Optional[str] = None,
4140
) -> None:
42-
self.enable_lora = enable_lora
43-
self.lora_path = lora_path
44-
4541
self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
4642
vlm_model, torch_dtype=torch.bfloat16, device_map="auto"
4743
)
48-
if enable_lora:
44+
if lora_path:
4945
self.model = PeftModel.from_pretrained(self.model, lora_path)
5046
self.model = self.model.merge_and_unload()
5147

editscore/mllm_tools/qwen25vl_vllm.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,10 @@ def __init__(
4848
max_num_batched_tokens=1536,
4949
temperature: float = 0.7,
5050
seed: Optional[int] = None,
51-
enable_lora: bool = False,
52-
lora_path: str = "",
51+
lora_path: Optional[str] = None,
5352
cache_dir: Optional[str] = None,
5453
) -> None:
55-
self.enable_lora = enable_lora
56-
self.lora_path = lora_path
57-
58-
if self.enable_lora:
54+
if lora_path:
5955
if cache_dir is None:
6056
root_dir = torch.hub.get_dir() # default: ~/.cache/torch/hub
6157

examples/OmniGen2-RL/README.md

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,31 +68,78 @@ bash evaluation/GEdit-Bench/omnigen2_16samples_select_best_editscore_pass4_eval.
6868
By comparing these results to the baseline performance of the original model, you will see the benefits of applying EditScore as a reranker.
6969

7070
## Application 2: Reinforcement Fine-Tuning
71-
Use EditScore to provide a high-quality reward signal to train models for significantly better image editing performance. We employ the FlowGRPO algorithm combined with EditScore's accurate evaluation capabilities to achieve end-to-end reinforcement learning fine-tuning.
71+
Beyond evaluation, **EditScore** can be used as a high-quality reward signal to fine-tune your image editing models using Reinforcement Learning (RL), leading to significantly improved performance.
7272

73-
### 1. Data and Model Download
74-
Download RL training data from [EditScore-RL-Data](https://huggingface.co/datasets/EditScore/EditScore-RL-Data), then put the `rl.jsonl` into `data/` and change its path in `data_configs/train/train.yml`
75-
To convert relative image paths to your absolute paths:
73+
We employ the **FlowGRPO** algorithm, combining its strengths with EditScore's accurate, real-time feedback to create a powerful end-to-end fine-tuning pipeline. This process effectively guides the model toward generating better edits.
74+
75+
### 1. Prepare Training Data
76+
First, set up the dataset for RL fine-tuning.
77+
1. Download the Data
78+
Downlaod the official RL training data from [EditScore-RL-Data](https://huggingface.co/datasets/EditScore/EditScore-RL-Data).
79+
2. Create Meta File
80+
The uploaded dataset uses relative image paths. Run the following script to convert them to absolute paths based on your local environment:
81+
```bash
82+
python scripts/data/process_jsonl.py --input /path/to/EditScore-RL-Data/rl.jsonl --output /path/to/EditScore-RL-Data/rl_abs.jsonl --base-path /path/to/EditScore-RL-Data
83+
```
84+
3. Configure the Data Path
85+
Specify the path to your processed `.jsonl` file in the data configuration located at `data_configs/train/example/edit/all.yml`.
86+
For example:
87+
```yaml
88+
ratio_type: inside_ratio
89+
90+
data:
91+
-
92+
path: '/path/to/EditScore-RL-Data/rl_abs.jsonl' # <-- Ensure this path is correct
93+
type: 'edit'
94+
ratio: !!float 1
95+
```
96+
97+
### 2. Prepare the Base Model (OmniGen2)
7698
```bash
77-
cd examples/OmniGen2-RL/data
78-
python process_jsonl.py --input path/to/input.jsonl --output path/to/output.jsonl --base-path /your/absolute/base/path
99+
python scripts/misc/extract_bin_from_pipe.py
79100
```
80-
Download the base model OmniGen2 form [OmniGen2](https://huggingface.co/OmniGen2/OmniGen2),then change the model file format to pytorch_model.bin and modify `model.pretrained_model_path` in `options/omnigen2_edit_rl.yml`
81101

82-
### 2. Start Reward Server
102+
### 3. Launch the Reward Server
103+
RL training requires a live reward signal. Before starting the training process, you must launch the **EditScore Reward Server**. This server will provide real-time scores for the generated images during training.
83104

84-
Before beginning training, you need to start the EditScore reward server to provide real-time reward signal evaluation for RL training.
105+
Our reward server is built with two components: a **proxy** and one or more **reward servers**. The proxy receives requests from the training node, distributes them to the individual reward servers for computation, and then collects the results to send back. This architecture allows for easy scaling across multiple machines.
85106

86-
### 3. Start Training
107+
We provide a convenient script to launch the entire server stack across multiple machines, assuming you have `ssh` access to all reward server nodes.
108+
109+
```bash
110+
# Launch EditScore-7B Reward Server
111+
bash reward_server/start_multi_machines.sh --model_name=editscore_7B --config_path=reward_server/server_configs/editscore_7B.yml
112+
113+
# Launch EditScore-7B (Avg@4) Reward Server
114+
bash reward_server/start_multi_machines.sh --model_name=editscore_7B_pass4 --config_path=reward_server/server_configs/editscore_7B_pass4.yml
115+
116+
# Launch EditScore-72B Reward Server
117+
bash reward_server/start_multi_machines.sh --model_name=editscore_72B --config_path=reward_server/server_configs/editscore_72B.yml
118+
```
119+
120+
> **⚠️ Important Notes**
121+
>
122+
> * Before running the script, you **must** specify the IP addresses of your reward server machines in the corresponding `.yml` configuration file.
123+
> * If you cannot use `ssh` to control the nodes, please refer to the logic in `reward_server/start_multi_machines.sh` to manually start the proxy and server processes on each machine.
124+
> * You can monitor the status of the proxy and servers by checking the log files in the `reward_server/logs/` directory.
125+
126+
## 3.5 (Optional) Reward Server Sanity Check
127+
To ensure the reward server is configured correctly and running as expected, we provide a sanity check script.
128+
```bash
129+
python reward_server/scripts/utils/reward_server_sanity_check.py --config_path=reward_server/server_configs/editscore_7B.yml
130+
```
131+
Once these steps are complete, your environment is ready to begin the reinforcement learning fine-tuning process.
132+
133+
### 4. Start Training
87134

88135
**Configure Training Parameters**
89136

90137
Edit the `options/omnigen2_edit_rl.yml` configuration file, focusing on these key parameters:
91-
- `train.global_batch_size`: Global batch size (num_machines * num_unique_prompts_per_sampling * num_images_per_prompt)
92-
- `train.rl.num_images_per_prompt`: Rollout number of one prompt
138+
- `train.global_batch_size`: Global batch size across all GPUs (num_unique_prompts_per_sampling * num_images_per_prompt)
139+
- `train.batch_size`: Batch size per GPU (batch_size_per_forward * gradient_accumulation_steps * num_update_steps_per_sampling)
140+
- `train.rl.num_images_per_prompt`: The number of roolout of one prompt
93141
- `train.rl.num_unique_prompts_per_sampling`: Number of global unique prompts
94142

95-
96143
**Launch Distributed Training**
97144
```bash
98145
# Single machine training (8*H100 GPUs)

examples/OmniGen2-RL/data_configs/train/train.yml renamed to examples/OmniGen2-RL/data_configs/train/example/edit/all.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,6 @@ ratio_type: inside_ratio
22

33
data:
44
-
5-
path: 'data/rl.jsonl'
5+
path: '/path/to/EditScore-RL-Data/rl_abs.jsonl'
66
type: 'edit'
77
ratio: !!float 1
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
data:
2+
-
3+
path: 'data_configs/train/example/edit/all.yml'
4+
type: 'edit'
5+
ratio: !!float 1

examples/OmniGen2-RL/omnigen2/schedulers/scheduling_flow_match_euler_maruyama_discrete.py

Lines changed: 1 addition & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,6 @@ def index_for_timestep(self, timestep, schedule_timesteps=None):
136136

137137
return indices[pos].item()
138138

139-
# def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
140-
# return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
141-
142139
def set_timesteps(
143140
self,
144141
num_inference_steps: int = None,
@@ -196,21 +193,7 @@ def get_sigma_t(self, t, t_next=None):
196193
if t_next is None:
197194
t_next = t
198195
def _get_sigma_t(t, t_next):
199-
if self.config.sigma_schedule == "v1":
200-
return 0.7 * math.sqrt((1 - t) / max(t, 1e-4))
201-
elif self.config.sigma_schedule == "v2":
202-
if t <= 0.2:
203-
return (1 - t) ** 2
204-
else:
205-
return (1 - t) ** 4
206-
elif self.config.sigma_schedule == "v3":
207-
return 0.7 * ((1 - t) / (t_next)) ** 0.5
208-
elif self.config.sigma_schedule == "v4":
209-
return torch.tensor(0.3, dtype=torch.float32, device=t.device)
210-
elif self.config.sigma_schedule == "zero":
211-
return torch.tensor(0, dtype=torch.float32, device=t.device)
212-
else:
213-
raise ValueError(f"Invalid sigma scheduler: {self.config.sigma_schedule}")
196+
return self.config.sigma_coef * ((1 - t) / (t_next)) ** 0.5
214197
if t.ndim > 0:
215198
return torch.stack([_get_sigma_t(_t, _t_next) for _t, _t_next in zip(t, t_next)])
216199
else:
@@ -227,34 +210,6 @@ def step(
227210
mixed_precision: bool = False,
228211
return_dict: bool = True,
229212
) -> Union[FlowMatchEulerMaruyamaDiscreteSchedulerOutput, Tuple]:
230-
"""
231-
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
232-
process from the learned model outputs (most often the predicted noise).
233-
234-
Args:
235-
model_output (`torch.FloatTensor`):
236-
The direct output from learned diffusion model.
237-
timestep (`float`):
238-
The current discrete timestep in the diffusion chain.
239-
sample (`torch.FloatTensor`):
240-
A current instance of a sample created by the diffusion process.
241-
s_churn (`float`):
242-
s_tmin (`float`):
243-
s_tmax (`float`):
244-
s_noise (`float`, defaults to 1.0):
245-
Scaling factor for noise added to the sample.
246-
generator (`torch.Generator`, *optional*):
247-
A random number generator.
248-
return_dict (`bool`):
249-
Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
250-
tuple.
251-
252-
Returns:
253-
[`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
254-
If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
255-
returned, otherwise a tuple is returned where the first element is the sample tensor.
256-
"""
257-
258213
if (
259214
isinstance(timestep, int)
260215
or isinstance(timestep, torch.IntTensor)

0 commit comments

Comments
 (0)