Skip to content

Commit 1471256

Browse files
committed
update
1 parent 783d425 commit 1471256

2 files changed

Lines changed: 96 additions & 1 deletion

File tree

examples/OmniGen2-RL/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,11 @@ Use EditScore to provide a high-quality reward signal to train models for signif
7272

7373
### 1. Data and Model Download
7474
Download RL training data from [EditScore-RL-Data](https://huggingface.co/datasets/EditScore/EditScore-RL-Data), then put the `rl.jsonl` into `data/` and change its path in `data_configs/train/train.yml`
75-
75+
To convert relative image paths to your absolute paths:
76+
```bash
77+
cd examples/OmniGen2-RL/data
78+
python process_jsonl.py --input path/to/input.jsonl --output path/to/output.jsonl --base-path /your/absolute/base/path
79+
```
7680
Download the base model OmniGen2 form [OmniGen2](https://huggingface.co/OmniGen2/OmniGen2),then change the model file format to pytorch_model.bin and modify `model.pretrained_model_path` in `options/omnigen2_edit_rl.yml`
7781

7882
### 2. Start Reward Server
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import json
2+
import os
3+
import argparse
4+
5+
def convert_jsonl_paths(input_file, output_file, base_path, max_records=100000):
6+
"""
7+
Convert JSONL file image paths from relative to absolute paths
8+
9+
Args:
10+
input_file: Path to input JSONL file
11+
output_file: Path to output JSONL file
12+
base_path: Base directory path for converting relative paths
13+
max_records: Maximum number of records to process, default 10000
14+
"""
15+
16+
with open(input_file, 'r', encoding='utf-8') as infile, \
17+
open(output_file, 'w', encoding='utf-8') as outfile:
18+
19+
count = 0
20+
for line in infile:
21+
if count >= max_records:
22+
break
23+
24+
# Parse JSON line
25+
try:
26+
data = json.loads(line.strip())
27+
28+
# Check if input_images field exists
29+
if "input_images" in data and isinstance(data["input_images"], list):
30+
# Convert paths
31+
new_paths = []
32+
for path in data["input_images"]:
33+
if isinstance(path, str) and path.startswith("images/"):
34+
# Convert relative path to absolute path
35+
new_path = path.replace("images/", f"{base_path}/images/")
36+
new_paths.append(new_path)
37+
else:
38+
# Keep original path unchanged
39+
new_paths.append(path)
40+
41+
data["input_images"] = new_paths
42+
43+
# Write converted data
44+
outfile.write(json.dumps(data, ensure_ascii=False) + '\n')
45+
count += 1
46+
47+
# Print progress every 1000 records
48+
if count % 1000 == 0:
49+
print(f"Processed {count} records")
50+
51+
except json.JSONDecodeError as e:
52+
print(f"Skipping invalid JSON line: {e}")
53+
continue
54+
55+
print(f"Conversion completed! Total processed records: {count}")
56+
print(f"Output file: {output_file}")
57+
58+
def main():
59+
parser = argparse.ArgumentParser(description="Convert JSONL file image paths from relative to absolute")
60+
parser.add_argument("--input", "-i", required=True, help="Input JSONL file path")
61+
parser.add_argument("--output", "-o", required=True, help="Output JSONL file path")
62+
parser.add_argument("--base-path", "-b", required=True, help="Base directory path for converting relative paths")
63+
parser.add_argument("--max-records", "-m", type=int, default=100000, help="Maximum number of records to process (default: 100000)")
64+
65+
args = parser.parse_args()
66+
67+
# Validate input file exists
68+
if not os.path.exists(args.input):
69+
print(f"Error: Input file '{args.input}' does not exist")
70+
return
71+
72+
# Create output directory if it doesn't exist
73+
output_dir = os.path.dirname(args.output)
74+
if output_dir and not os.path.exists(output_dir):
75+
os.makedirs(output_dir)
76+
print(f"Created output directory: {output_dir}")
77+
78+
# Validate base path exists
79+
if not os.path.exists(args.base_path):
80+
print(f"Warning: Base path '{args.base_path}' does not exist")
81+
82+
print(f"Input file: {args.input}")
83+
print(f"Output file: {args.output}")
84+
print(f"Base path: {args.base_path}")
85+
print(f"Max records: {args.max_records}")
86+
print("-" * 50)
87+
88+
convert_jsonl_paths(args.input, args.output, args.base_path, args.max_records)
89+
90+
if __name__ == "__main__":
91+
main()

0 commit comments

Comments
 (0)