Skip to content

Commit 8110b2e

Browse files
leo-q8claude
andcommitted
fix: restore training data augmentation behavior for det model
1. Restore set_epoch_as_seed in SimpleDataSet to re-enable adaptive shrink_ratio (0.4→0.6) curriculum learning in MakeBorderMap and MakeShrinkMap across training epochs. 2. Remove default p=1.0 for Affine augmentation, restoring albumentations default p=0.5 (50% rotation probability). These two changes caused V5 dataset precision to drop significantly (e.g. blur 0.904→0.709, printing_ch 0.926→0.708) while V4 remained unaffected. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cbf6837 commit 8110b2e

2 files changed

Lines changed: 13 additions & 2 deletions

File tree

ppocr/data/imaug/iaa_augment.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,6 @@ def map_arguments(self, augmenter_type, augmenter_args):
158158
augmenter_args["rotate"] = rotate
159159
# Set fit_output=True to expand canvas when rotating to prevent content loss
160160
# Use BORDER_CONSTANT with value 0 (black padding) for clean borders
161-
if "p" not in augmenter_args:
162-
augmenter_args["p"] = 1.0
163161
if "fit_output" not in augmenter_args:
164162
augmenter_args["fit_output"] = True
165163
if "border_mode" not in augmenter_args:

ppocr/data/simple_dataset.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(self, config, mode, logger, seed=None):
5151
self.data_idx_order_list = list(range(len(self.data_lines)))
5252
if self.mode == "train" and self.do_shuffle:
5353
self.shuffle_data_random()
54+
self.set_epoch_as_seed(seed, dataset_config)
5455
self.ops = create_operators(dataset_config["transforms"], global_config)
5556
self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx", 2)
5657
self.need_reset = True in [x < 1 for x in ratio_list]
@@ -73,6 +74,18 @@ def shuffle_data_random(self):
7374
random.shuffle(self.data_lines)
7475
return
7576

77+
def set_epoch_as_seed(self, seed, dataset_config):
78+
if self.mode == "train":
79+
try:
80+
dataset_config["transforms"][5]["MakeBorderMap"][
81+
"epoch"
82+
] = (seed if seed is not None else 0)
83+
dataset_config["transforms"][6]["MakeShrinkMap"][
84+
"epoch"
85+
] = (seed if seed is not None else 0)
86+
except Exception:
87+
return
88+
7689
def _try_parse_filename_list(self, file_name):
7790
# multiple images -> one gt label
7891
if len(file_name) > 0 and file_name[0] == "[":

0 commit comments

Comments
 (0)