Skip to content

Commit c4703e1

Browse files
Chamberlain0w0kilinchange
authored andcommitted
fix: rename dp_* to ddp_*, remove unnecessary comments
1 parent 1e37842 commit c4703e1

3 files changed

Lines changed: 10 additions & 12 deletions

File tree

infini_train/include/nn/parallel/ddp/distributed_optimizer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ class DistributedOptimizer final : public infini_train::Optimizer {
2121
public:
2222
DistributedOptimizer(OptimizerCreator base_optimizer_creator,
2323
const std::vector<std::shared_ptr<Tensor>> &full_params,
24-
const std::vector<std::shared_ptr<Module>> &model_chunks, size_t dp_world_size,
25-
size_t dp_rank);
24+
const std::vector<std::shared_ptr<Module>> &model_chunks, size_t ddp_world_size,
25+
size_t ddp_rank);
2626

2727
void Step() override;
2828

@@ -43,8 +43,8 @@ class DistributedOptimizer final : public infini_train::Optimizer {
4343
std::vector<std::shared_ptr<ParamAndGradBucketGroup>> bucket_groups_;
4444

4545
// DP info
46-
size_t dp_world_size_;
47-
size_t dp_rank_;
46+
size_t ddp_world_size_;
47+
size_t ddp_rank_;
4848

4949
// shard params
5050
std::vector<std::shared_ptr<Tensor>> shard_params_;

infini_train/src/nn/parallel/ddp/distributed_optimizer.cc

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ namespace infini_train::nn::parallel {
99
DistributedOptimizer::DistributedOptimizer(OptimizerCreator creator,
1010
const std::vector<std::shared_ptr<Tensor>> &full_params,
1111
const std::vector<std::shared_ptr<Module>> &model_chunks,
12-
size_t dp_world_size, size_t dp_rank)
13-
: Optimizer(full_params), dp_world_size_(dp_world_size), dp_rank_(dp_rank) {
12+
size_t ddp_world_size, size_t ddp_rank)
13+
: Optimizer(full_params), ddp_world_size_(ddp_world_size), ddp_rank_(ddp_rank) {
1414

15-
CHECK(dp_world_size_ > 1) << "DistributedOptimizer: dp_world_size must be greater than 1.";
15+
CHECK(ddp_world_size_ > 1) << "DistributedOptimizer: ddp_world_size must be greater than 1.";
1616

1717
for (size_t i = 0; i < model_chunks.size(); ++i) {
1818
auto ddp_chunk = std::dynamic_pointer_cast<DistributedDataParallel>(model_chunks[i]);
@@ -43,9 +43,9 @@ void DistributedOptimizer::BuildShardParamsAndBindGrads() {
4343
CHECK(bucket_param) << "DistributedOptimizer requires param buffer.";
4444
CHECK(bucket_grad) << "DistributedOptimizer requires grad buffer.";
4545

46-
CHECK_EQ(bucket_param->NumElements() % dp_world_size_, 0);
47-
const size_t bucket_shard_numel = bucket_param->NumElements() / dp_world_size_;
48-
const size_t bucket_shard_start = dp_rank_ * bucket_shard_numel;
46+
CHECK_EQ(bucket_param->NumElements() % ddp_world_size_, 0);
47+
const size_t bucket_shard_numel = bucket_param->NumElements() / ddp_world_size_;
48+
const size_t bucket_shard_start = ddp_rank_ * bucket_shard_numel;
4949
const size_t bucket_shard_end = bucket_shard_start + bucket_shard_numel;
5050

5151
// Iterate param in bucket, build each param(or param_shard) seperately

infini_train/src/nn/parallel/ddp/param_and_grad_buffer.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,6 @@ PartitionBuckets(const std::vector<std::shared_ptr<ParamAndGradBuffer>> &buffers
545545
auto ddp_world_size = buffers.front()->ddp_world_size();
546546

547547
for (const auto &buffer : buffers) {
548-
// TODO(zbl): override == for ddp config
549-
// CHECK(buffer->ddp_config() == ddp_config) << "PartitionBuckets: buffers have different ddp_config.";
550548
CHECK(buffer->ddp_pg() == ddp_pg) << "PartitionBuckets: buffers have different ddp_pg.";
551549
CHECK(buffer->ddp_world_size() == ddp_world_size)
552550
<< "PartitionBuckets: buffers have different ddp_world_size.";

0 commit comments

Comments
 (0)