VectorSpaceLab
diff --git a/‎editscore/__init__.py‎
Lines changed: 1 addition & 4 deletions b/‎editscore/__init__.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎editscore/mllm_tools/qwen25vl.py‎
Lines changed: 2 additions & 6 deletions b/‎editscore/mllm_tools/qwen25vl.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎editscore/mllm_tools/qwen25vl_vllm.py‎
Lines changed: 2 additions & 6 deletions b/‎editscore/mllm_tools/qwen25vl_vllm.py‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎examples/OmniGen2-RL/README.md‎
Lines changed: 60 additions & 13 deletions b/‎examples/OmniGen2-RL/README.md‎
Lines changed: 60 additions & 13 deletions
diff --git a/‎…OmniGen2-RL/data_configs/train/train.yml‎ ‎…/data_configs/train/example/edit/all.yml‎examples/OmniGen2-RL/data_configs/train/train.yml renamed to examples/OmniGen2-RL/data_configs/train/example/edit/all.yml
Lines changed: 1 addition & 1 deletion b/‎…OmniGen2-RL/data_configs/train/train.yml‎ ‎…/data_configs/train/example/edit/all.yml‎examples/OmniGen2-RL/data_configs/train/train.yml renamed to examples/OmniGen2-RL/data_configs/train/example/edit/all.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/OmniGen2-RL/data_configs/train/example/train.yml‎
Lines changed: 5 additions & 0 deletions b/‎examples/OmniGen2-RL/data_configs/train/example/train.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/OmniGen2-RL/omnigen2/schedulers/scheduling_flow_match_euler_maruyama_discrete.py‎
Lines changed: 1 addition & 46 deletions b/‎examples/OmniGen2-RL/omnigen2/schedulers/scheduling_flow_match_euler_maruyama_discrete.py‎
Lines changed: 1 addition & 46 deletions
@@ -26,8 +26,7 @@ def __init__(
         num_pass: int=1,
         reduction: str="average_last",
         seed: int=42,
-        enable_lora: bool=False,
-        lora_path: str="",
+        lora_path: Optional[str]=None,
         cache_dir: Optional[str]=None,
     ) -> None:
         self.backbone = backbone
@@ -45,7 +44,6 @@ def __init__(
                 vlm_model=model_name_or_path,
                 temperature=temperature,
                 seed=seed,
-                enable_lora=enable_lora,
                 lora_path=lora_path,
             )
         elif self.backbone == "qwen25vl_vllm":
@@ -58,7 +56,6 @@ def __init__(
                 max_num_batched_tokens=max_num_batched_tokens,
                 temperature=temperature,
                 seed=seed,
-                enable_lora=enable_lora,
                 lora_path=lora_path,
                 cache_dir=cache_dir,
             )
 
@@ -36,16 +36,12 @@ def __init__(
         vlm_model,
         temperature: float = 0.7,
         seed: Optional[int] = None,
-        enable_lora: bool = False,
-        lora_path: str = "",
+        lora_path: Optional[str] = None,
     ) -> None:
-        self.enable_lora = enable_lora
-        self.lora_path = lora_path
-
         self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
             vlm_model, torch_dtype=torch.bfloat16, device_map="auto"
         )
-        if enable_lora:
+        if lora_path:
             self.model = PeftModel.from_pretrained(self.model, lora_path)
             self.model = self.model.merge_and_unload()
 
 
@@ -48,14 +48,10 @@ def __init__(
         max_num_batched_tokens=1536,
         temperature: float = 0.7,
         seed: Optional[int] = None,
-        enable_lora: bool = False,
-        lora_path: str = "",
+        lora_path: Optional[str] = None,
         cache_dir: Optional[str] = None,
     ) -> None:
-        self.enable_lora = enable_lora
-        self.lora_path = lora_path
-
-        if self.enable_lora:
+        if lora_path:
             if cache_dir is None:
                 root_dir = torch.hub.get_dir() # default: ~/.cache/torch/hub
 
 
@@ -68,31 +68,78 @@ bash evaluation/GEdit-Bench/omnigen2_16samples_select_best_editscore_pass4_eval.
 By comparing these results to the baseline performance of the original model, you will see the benefits of applying EditScore as a reranker.
 
 ## Application 2: Reinforcement Fine-Tuning
-Use EditScore to provide a high-quality reward signal to train models for significantly better image editing performance. We employ the FlowGRPO algorithm combined with EditScore's accurate evaluation capabilities to achieve end-to-end reinforcement learning fine-tuning.
+Beyond evaluation, **EditScore** can be used as a high-quality reward signal to fine-tune your image editing models using Reinforcement Learning (RL), leading to significantly improved performance.
 
-### 1. Data and Model Download
-Download RL training data from [EditScore-RL-Data](https://huggingface.co/datasets/EditScore/EditScore-RL-Data), then put the `rl.jsonl` into `data/` and change its path in `data_configs/train/train.yml`
-To convert relative image paths to your absolute paths:
+We employ the **FlowGRPO** algorithm, combining its strengths with EditScore's accurate, real-time feedback to create a powerful end-to-end fine-tuning pipeline. This process effectively guides the model toward generating better edits.
+
+### 1. Prepare Training Data
+First, set up the dataset for RL fine-tuning.
+1. Download the Data
+Downlaod the official RL training data from [EditScore-RL-Data](https://huggingface.co/datasets/EditScore/EditScore-RL-Data).
+2. Create Meta File
+The uploaded dataset uses relative image paths. Run the following script to convert them to absolute paths based on your local environment:
+```bash
+python scripts/data/process_jsonl.py --input /path/to/EditScore-RL-Data/rl.jsonl --output /path/to/EditScore-RL-Data/rl_abs.jsonl --base-path /path/to/EditScore-RL-Data
+```
+3. Configure the Data Path
+Specify the path to your processed `.jsonl` file in the data configuration located at `data_configs/train/example/edit/all.yml`.
+For example:
+```yaml
+ratio_type: inside_ratio
+
+data:
+  - 
+    path: '/path/to/EditScore-RL-Data/rl_abs.jsonl' # <-- Ensure this path is correct
+    type: 'edit'
+    ratio: !!float 1
+```
+
+### 2. Prepare the Base Model (OmniGen2)
 ```bash
-cd examples/OmniGen2-RL/data
-python process_jsonl.py --input path/to/input.jsonl --output path/to/output.jsonl --base-path /your/absolute/base/path
+python scripts/misc/extract_bin_from_pipe.py
 ```
-Download the base model OmniGen2 form [OmniGen2](https://huggingface.co/OmniGen2/OmniGen2),then change the model file format to pytorch_model.bin and modify `model.pretrained_model_path` in `options/omnigen2_edit_rl.yml`
 
-### 2. Start Reward Server
+### 3. Launch the Reward Server
+RL training requires a live reward signal. Before starting the training process, you must launch the **EditScore Reward Server**. This server will provide real-time scores for the generated images during training.
 
-Before beginning training, you need to start the EditScore reward server to provide real-time reward signal evaluation for RL training.
+Our reward server is built with two components: a **proxy** and one or more **reward servers**. The proxy receives requests from the training node, distributes them to the individual reward servers for computation, and then collects the results to send back. This architecture allows for easy scaling across multiple machines.
 
-### 3. Start Training
+We provide a convenient script to launch the entire server stack across multiple machines, assuming you have `ssh` access to all reward server nodes.
+
+```bash
+# Launch EditScore-7B Reward Server
+bash reward_server/start_multi_machines.sh --model_name=editscore_7B --config_path=reward_server/server_configs/editscore_7B.yml
+
+# Launch EditScore-7B (Avg@4) Reward Server
+bash reward_server/start_multi_machines.sh --model_name=editscore_7B_pass4 --config_path=reward_server/server_configs/editscore_7B_pass4.yml
+
+# Launch EditScore-72B Reward Server
+bash reward_server/start_multi_machines.sh --model_name=editscore_72B --config_path=reward_server/server_configs/editscore_72B.yml
+```
+
+> **⚠️ Important Notes**
+>
+> *   Before running the script, you **must** specify the IP addresses of your reward server machines in the corresponding `.yml` configuration file.
+> *   If you cannot use `ssh` to control the nodes, please refer to the logic in `reward_server/start_multi_machines.sh` to manually start the proxy and server processes on each machine.
+> *   You can monitor the status of the proxy and servers by checking the log files in the `reward_server/logs/` directory.
+
+## 3.5 (Optional) Reward Server Sanity Check
+To ensure the reward server is configured correctly and running as expected, we provide a sanity check script.
+```bash
+python reward_server/scripts/utils/reward_server_sanity_check.py --config_path=reward_server/server_configs/editscore_7B.yml
+```
+Once these steps are complete, your environment is ready to begin the reinforcement learning fine-tuning process.
+
+### 4. Start Training
 
 **Configure Training Parameters**
 
 Edit the `options/omnigen2_edit_rl.yml` configuration file, focusing on these key parameters:
-- `train.global_batch_size`: Global batch size (num_machines * num_unique_prompts_per_sampling * num_images_per_prompt)
-- `train.rl.num_images_per_prompt`: Rollout number of one prompt 
+- `train.global_batch_size`: Global batch size across all GPUs (num_unique_prompts_per_sampling * num_images_per_prompt)
+- `train.batch_size`: Batch size per GPU (batch_size_per_forward * gradient_accumulation_steps * num_update_steps_per_sampling)
+- `train.rl.num_images_per_prompt`: The number of roolout of one prompt 
 - `train.rl.num_unique_prompts_per_sampling`: Number of global unique prompts
 
-
 **Launch Distributed Training**
 ```bash
 # Single machine training (8*H100 GPUs)
 
@@ -2,6 +2,6 @@ ratio_type: inside_ratio
 
 data:
   - 
-    path: 'data/rl.jsonl'
+    path: '/path/to/EditScore-RL-Data/rl_abs.jsonl'
     type: 'edit'
     ratio: !!float 1
@@ -0,0 +1,5 @@
+data:
+  - 
+    path: 'data_configs/train/example/edit/all.yml'
+    type: 'edit'
+    ratio: !!float 1
@@ -136,9 +136,6 @@ def index_for_timestep(self, timestep, schedule_timesteps=None):
 
         return indices[pos].item()
 
-    # def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
-    #     return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
-
     def set_timesteps(
         self,
         num_inference_steps: int = None,
@@ -196,21 +193,7 @@ def get_sigma_t(self, t, t_next=None):
         if t_next is None:
             t_next = t
         def _get_sigma_t(t, t_next):
-            if self.config.sigma_schedule == "v1":
-                return 0.7 * math.sqrt((1 - t) / max(t, 1e-4))
-            elif self.config.sigma_schedule == "v2":
-                if t <= 0.2:
-                    return (1 - t) ** 2
-                else:
-                    return (1 - t) ** 4
-            elif self.config.sigma_schedule == "v3":
-                return 0.7 * ((1 - t) / (t_next)) ** 0.5
-            elif self.config.sigma_schedule == "v4":
-                return torch.tensor(0.3, dtype=torch.float32, device=t.device)
-            elif self.config.sigma_schedule == "zero":
-                return torch.tensor(0, dtype=torch.float32, device=t.device)
-            else:
-                raise ValueError(f"Invalid sigma scheduler: {self.config.sigma_schedule}")
+            return self.config.sigma_coef * ((1 - t) / (t_next)) ** 0.5
         if t.ndim > 0:
             return torch.stack([_get_sigma_t(_t, _t_next) for _t, _t_next in zip(t, t_next)])
         else:
@@ -227,34 +210,6 @@ def step(
         mixed_precision: bool = False,
         return_dict: bool = True,
     ) -> Union[FlowMatchEulerMaruyamaDiscreteSchedulerOutput, Tuple]:
-        """
-        Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
-        process from the learned model outputs (most often the predicted noise).
-
-        Args:
-            model_output (`torch.FloatTensor`):
-                The direct output from learned diffusion model.
-            timestep (`float`):
-                The current discrete timestep in the diffusion chain.
-            sample (`torch.FloatTensor`):
-                A current instance of a sample created by the diffusion process.
-            s_churn (`float`):
-            s_tmin  (`float`):
-            s_tmax  (`float`):
-            s_noise (`float`, defaults to 1.0):
-                Scaling factor for noise added to the sample.
-            generator (`torch.Generator`, *optional*):
-                A random number generator.
-            return_dict (`bool`):
-                Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
-                tuple.
-
-        Returns:
-            [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
-                If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
-                returned, otherwise a tuple is returned where the first element is the sample tensor.
-        """
-
         if (
             isinstance(timestep, int)
             or isinstance(timestep, torch.IntTensor)
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,6 @@ ratio_type: inside_ratio`
`2`	`2`
`3`	`3`	`data:`
`4`	`4`	`-`
`5`		`- path: 'data/rl.jsonl'`
	`5`	`+ path: '/path/to/EditScore-RL-Data/rl_abs.jsonl'`
`6`	`6`	`type: 'edit'`
`7`	`7`	`ratio: !!float 1`