Skip to content

Commit 1544f0e

Browse files
Copilotanxiangsir
andcommitted
Translate Chinese comments in remaining dataloader and tools files
Co-authored-by: anxiangsir <31175974+anxiangsir@users.noreply.github.com>
1 parent e6626b1 commit 1544f0e

3 files changed

Lines changed: 116 additions & 116 deletions

File tree

dataloader/ap_dataloader_dali_ip_mv.py

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ def __init__(self, mode, source_params):
122122

123123
def get_frame_id_list(self, video_path, sequence_length,
124124
mv_unit_div: float = 4.0, # quarter-pel -> pixel
125-
mv_pct: float = 95.0, # MV 归一化分位数(传给 _mv_energy_norm
126-
res_pct: float = 95.0, # 残差归一化分位数(传给 _residual_energy_norm
125+
mv_pct: float = 95.0, # MV normalization percentile (passed to _mv_energy_norm)
126+
res_pct: float = 95.0, # Residual normalization percentile (passed to _residual_energy_norm)
127127
fuse_mode: str = "weighted",
128128
w_mv: float = 1.0,
129129
w_res: float = 1.0,):
@@ -133,7 +133,7 @@ def get_frame_id_list(self, video_path, sequence_length,
133133

134134
if self.mode in ["train", "val"]:
135135

136-
# 按照每一个seq进行分group
136+
# Group by each sequence
137137
# average_duration = duration // sequence_length
138138

139139
# if average_duration > 0:
@@ -153,24 +153,24 @@ def get_frame_id_list(self, video_path, sequence_length,
153153
elif hasattr(decord_vr, "get_keyframes"):
154154
key_idx = decord_vr.get_keyframes()
155155
if key_idx is not None:
156-
# key_idx 可能是 NDArray;转成 Python list 的整型帧号集合 只保留一帧为I帧
156+
# key_idx may be NDArray; convert to Python list of integer frame numbers, keep only one frame as I-frame
157157
I_list = np.asarray(key_idx)
158158
I_list = I_list.tolist()[0] if I_list.ndim > 1 else I_list.tolist()
159159
I_list = [int(i) for i in I_list if int(i) in frame_id_list]
160160
if len(I_list) >= self.tokeq_target_frames:
161-
# 如果 I 帧过多,优先保留前面的
161+
# If there are too many I-frames, prioritize keeping the earlier ones
162162
I_list = I_list[:self.tokeq_target_frames]
163163
P_list = []
164164
else:
165165
P_list = [i for i in range(len(frame_id_list)) if i not in I_list]
166166
except Exception:
167-
# 保底处理:忽略异常,后续用默认策略
168-
print("没有读取成功")
167+
# Fallback: ignore exception, use default strategy later
168+
print("Failed to read")
169169
# gop = max(1,int(self.gop_size))
170170
# I_list = [i for i, fid in enumerate(frame_id_list)if(int(fid)% gop)== 0]
171-
# 第一帧为I帧
171+
# First frame is I-frame
172172
I_list = [0]
173-
# 其余为 P 帧
173+
# Rest are P-frames
174174
P_list = [i for i in range(len(frame_id_list))if i not in I_list]
175175
# Map absolute frame id -> position in the sampled sequence
176176
frame_ids = frame_id_list
@@ -179,11 +179,11 @@ def get_frame_id_list(self, video_path, sequence_length,
179179
frame_ids = frame_id_list
180180
pos_map = {fid: i for i, fid in enumerate(frame_ids)}
181181

182-
# 读取视频帧
182+
# Read video frames
183183
decord_vr.seek(0)
184184
video_data = decord_vr.get_batch(frame_id_list).asnumpy()
185185

186-
# 转成 numpy array
186+
# Convert to numpy array
187187
I_list = np.array(I_list, dtype=np.int64)
188188
P_list = np.array(P_list, dtype=np.int64)
189189
I_pos_set = set(I_list.tolist())
@@ -226,47 +226,47 @@ def get_frame_id_list(self, video_path, sequence_length,
226226
residual,
227227
) = frame_tuple
228228

229-
# I 帧:直接置 0(与你 residual 逻辑一致)
229+
# I-frame: directly set to 0 (consistent with your residual logic)
230230
if pos in I_pos_set:
231231
if H0 is None:
232-
# 用残差Y来确定输出尺寸/类型
232+
# Use residual Y to determine output size/type
233233
y0 = residual if residual.ndim == 2 else cv2.cvtColor(residual, cv2.COLOR_BGR2YUV)[:, :, 0]
234234
y0 = np.asarray(y0)
235235
H0, W0, dtype0 = int(y0.shape[0]), int(y0.shape[1]), y0.dtype
236236
residuals_y[pos] = np.zeros((H0, W0), dtype=dtype0 or np.uint8)
237237

238238
else:
239-
# 1) MV (L0) 并上采样到 H×W
239+
# 1) Get MV (L0) and upsample to H×W
240240
mvx_hw = rdr._upsample_mv_to_hw(mv_x_L0.astype(np.float32))
241241
mvy_hw = rdr._upsample_mv_to_hw(mv_y_L0.astype(np.float32))
242242

243-
# 2) 取残差 Y
243+
# 2) Get residual Y
244244
Y_res = residual if residual.ndim == 2 else cv2.cvtColor(residual, cv2.COLOR_BGR2YUV)[:, :, 0]
245245

246-
# 初始化输出尺寸/类型(只在第一次命中时做)
246+
# Initialize output size/type (only done on first hit)
247247
if H0 is None:
248248
H0, W0, dtype0 = int(Y_res.shape[0]), int(Y_res.shape[1]), Y_res.dtype
249249

250-
# 若当前帧的尺寸与 H0×W0 不一致,做一次 resize 对齐(极少见,兜底)
250+
# If current frame size does not match H0×W0, do a resize alignment (rare, fallback)
251251
if (Y_res.shape[0] != H0) or (Y_res.shape[1] != W0):
252252
Y_res = cv2.resize(Y_res, (W0, H0), interpolation=cv2.INTER_AREA)
253253
if (mvx_hw.shape[0] != H0) or (mvx_hw.shape[1] != W0):
254254
mvx_hw = cv2.resize(mvx_hw, (W0, H0), interpolation=cv2.INTER_NEAREST)
255255
mvy_hw = cv2.resize(mvy_hw, (W0, H0), interpolation=cv2.INTER_NEAREST)
256256

257-
# 3) 归一化到 [0,1]
258-
# 下面这些超参请确保在外层有定义;如果没有,你也可以给个默认值:
257+
# 3) Normalize to [0,1]
258+
# Make sure these hyperparameters are defined in outer scope; if not, you can give default values:
259259
# mv_unit_div, mv_pct, res_pct, fuse_mode, w_mv, w_res
260260
mv_norm, _ = _mv_energy_norm(mvx_hw, mvy_hw, H0, W0, mv_unit_div=mv_unit_div, pct=mv_pct)
261261
res_norm, _ = _residual_energy_norm(Y_res, pct=res_pct)
262262

263-
# 4) 融合(weighted/sum/max/geomean 均可,默认 weighted
263+
# 4) Fusion (weighted/sum/max/geomean all work, default weighted)
264264
fused = _fuse_energy(mv_norm, res_norm, mode=fuse_mode, w_mv=w_mv, w_res=w_res)
265265

266-
# 写回你原来的容器(保持最小改动,用 uint8 存)
266+
# Write back to your original container (minimal change, store as uint8)
267267
residuals_y[pos] = (np.clip(fused, 0.0, 1.0) * 255.0).astype(dtype0 or np.uint8)
268268

269-
# 结束条件
269+
# End condition
270270
if all(x is not None for x in residuals_y):
271271
break
272272

@@ -289,9 +289,9 @@ def get_frame_id_list(self, video_path, sequence_length,
289289
combined_data = np.concatenate([video_data, residuals_y], axis=-1)
290290

291291
if H0 != video_data.shape[1] or W0 != video_data.shape[2]:
292-
print("[warn] residual尺寸与视频不一致: res=(%d,%d) video=(%d,%d)" % (H0, W0, video_data.shape[1], video_data.shape[2]))
292+
print("[warn] residual size does not match video: res=(%d,%d) video=(%d,%d)" % (H0, W0, video_data.shape[1], video_data.shape[2]))
293293
finally:
294-
# 恢复环境变量
294+
# Restore environment variables
295295
if _prev_y_only is None:
296296
os.environ.pop("UMT_HEVC_Y_ONLY", None)
297297
else:
@@ -348,13 +348,13 @@ def dali_pipeline(mode, source_params):
348348
combined_data,
349349
device="gpu",
350350
crop=[input_size, input_size],
351-
crop_pos_x=0.5, # 中心裁剪
351+
crop_pos_x=0.5, # Center crop
352352
crop_pos_y=0.5,
353353
dtype=types.UINT8,
354354
output_layout="FHWC"
355355
)
356356

357-
video_channels = source_params.get('video_channels', 3) # 例如 RGB=3
357+
video_channels = source_params.get('video_channels', 3) # e.g. RGB=3
358358
videos = fn.slice(combined_data, start=[0], shape=[video_channels], axes=[3])
359359

360360
res_zero_masks = fn.slice(combined_data, start=[video_channels], shape=[1], axes=[3])
@@ -395,7 +395,7 @@ def dali_pipeline(mode, source_params):
395395
combined_data = fn.resize(combined_data, device="gpu", resize_shorter=input_size, interp_type=types.INTERP_CUBIC)
396396
combined_data = fn.crop_mirror_normalize(combined_data, device="gpu", crop=[input_size, input_size], crop_pos_x=0.5, crop_pos_y=0.5, dtype=types.UINT8, output_layout="FHWC")
397397

398-
video_channels = source_params.get('video_channels', 3) # 例如 RGB=3
398+
video_channels = source_params.get('video_channels', 3) # e.g. RGB=3
399399
videos = fn.slice(combined_data, start=[0], shape=[video_channels], axes=[3])
400400

401401
res_zero_masks = fn.slice(combined_data, start=[video_channels], shape=[1], axes=[3])

0 commit comments

Comments
 (0)