1010 "_class_name" : " dwm.fs.czip.CombinedZipFileSystem" ,
1111 "fs" : {
1212 "_class_name" : " dwm.fs.dirfs.DirFileSystem" ,
13- "path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan"
13+ "path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan"
1414 },
1515 "paths" : [
1616 " data/opendv/opendv-youtube-10hz-720_0.zip" ,
2323 "_class_name" : " torch.distributed.device_mesh.init_device_mesh" ,
2424 "device_type" : " cuda" ,
2525 "mesh_shape" : [
26- 4 ,
26+ 2 ,
2727 8
2828 ]
2929 }
3636 "_class_name" : " dwm.pipelines.ctsd.CrossviewTemporalSD" ,
3737 "common_config" : {
3838 "frame_prediction_style" : " ctsd" ,
39- "reference_frame_count" : 3 ,
4039 "cat_condition" : true ,
4140 "cond_with_action" : false ,
4241 "condition_on_all_frames" : true ,
101100 "memory_efficient_batch" : 12
102101 },
103102 "training_config" : {
103+ "freezing_pattern" : " ^(transformer_blocks|time_text_embed|context_embedder|pos_embed|norm_out|proj_out)$" ,
104104 "text_prompt_condition_ratio" : 0.8 ,
105- "3dbox_condition_ratio" : 0 ,
106- "hdmap_condition_ratio" : 0 ,
107105 "explicit_view_modeling_ratio" : 0.8 ,
108106 "reference_frame_count" : 3 ,
109107 "generation_task_ratio" : 0.2 ,
110108 "image_generation_ratio" : 0.5 ,
111109 "all_reference_visible_ratio" : 0.8 ,
112110 "reference_visible_rate" : 0.5 ,
113111 "disable_reference_frame_loss" : true ,
114- "enable_grad_scaler" : true ,
115- "freezing_pattern" : " ^(transformer_blocks|time_text_embed|context_embedder|pos_embed|norm_out|proj_out)"
112+ "enable_grad_scaler" : true
116113 },
117114 "inference_config" : {
118115 "guidance_scale" : 3 ,
121118 448 ,
122119 252
123120 ],
124- "sequence_length_per_iteration" : 6 ,
121+ "generate_frames_for_reference" : false ,
122+ "sequence_length_per_iteration" : 19 ,
125123 "reference_frame_count" : 3 ,
126124 "autoregression_data_exception_for_take_sequence" : [
127125 " crossview_mask"
167165 "mixer_type" : " AlphaBlender" ,
168166 "merge_factor" : 2
169167 },
170- "pretrained_model_name_or_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/models/stable-diffusion-3-medium-diffusers" ,
171- "model_checkpoint_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/models/stable-diffusion-3-medium-diffusers/transformer/diffusion_pytorch_model.safetensors" ,
168+ "pretrained_model_name_or_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/models/stable-diffusion-3-medium-diffusers" ,
169+ "model_checkpoint_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/models/stable-diffusion-3-medium-diffusers/transformer/diffusion_pytorch_model.safetensors" ,
172170 "model_load_state_args" : {
173171 "strict" : false
174172 },
179177 },
180178 "fvd" : {
181179 "_class_name" : " dwm.metrics.fvd.FrechetVideoDistance" ,
182- "inception_3d_checkpoint_path" : " /mnt/storage /user/wuzehuan/Downloads/models/inception_3d /i3d_pretrained_400.pt" ,
180+ "inception_3d_checkpoint_path" : " /mnt/afs /user/wuzehuan/Documents/DWM/externals/TATS/tats/fvd /i3d_pretrained_400.pt" ,
183181 "sequence_count" : 16
184182 }
185183 }
192190 "_class_name" : " dwm.common.get_state" ,
193191 "key" : " opendv_fs"
194192 },
195- "meta_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/nijingcheng/datasets/OpenDV-YouTube.json" ,
196- "sequence_length" : 6 ,
193+ "meta_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/nijingcheng/datasets/OpenDV-YouTube.json" ,
194+ "sequence_length" : 19 ,
197195 "fps_stride_tuples" : [
198196 [
199197 10 ,
207205 ],
208206 "enable_fake_camera_transforms" : true ,
209207 "image_description_settings" : {
210- "path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_caption.json" ,
211- "candidates_times_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_candidates_times.json" ,
208+ "path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_caption.json" ,
209+ "candidates_times_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_candidates_times.json" ,
212210 "seed" : 5 ,
213211 "reorder_keys" : true ,
214212 "drop_rates" : {
280278 "_class_name" : " dwm.common.get_state" ,
281279 "key" : " opendv_fs"
282280 },
283- "meta_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/nijingcheng/datasets/OpenDV-YouTube.json" ,
284- "sequence_length" : 18 ,
281+ "meta_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/nijingcheng/datasets/OpenDV-YouTube.json" ,
282+ "sequence_length" : 19 ,
285283 "fps_stride_tuples" : [
286284 [
287285 10 ,
288- 180
286+ 60
289287 ]
290288 ],
291289 "split" : " Val" ,
292290 "mini_batch" : 6 ,
293291 "enable_fake_camera_transforms" : true ,
294292 "image_description_settings" : {
295- "path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_caption.json" ,
296- "candidates_times_path" : " /cache/aoss-v2.st -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_candidates_times.json"
293+ "path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_caption.json" ,
294+ "candidates_times_path" : " /cache/aoss.cn -sh-01.sensecoreapi-oss.cn/users/wuzehuan/workspaces/worldmodels/data/opendv_candidates_times.json"
297295 },
298296 "stub_key_data_dict" : {
299297 "crossview_mask" : [
351349 ]
352350 },
353351 "training_dataloader" : {
354- "batch_size" : 1 ,
352+ "batch_size" : 2 ,
355353 "num_workers" : 3 ,
356354 "prefetch_factor" : 3 ,
357355 "collate_fn" : {
389387 "persistent_workers" : true
390388 },
391389 "informations" : {
392- "fid" : -1 ,
393- "fvd" : -1 ,
390+ "fid" : 9.10 ,
391+ "fvd" : 132.89 ,
394392 "total_batch_sizes" : 32 ,
395- "steps" : 60000
393+ "steps" : 30000
396394 }
397395}
0 commit comments