|
15 | 15 |
|
16 | 16 | """Unit tests for TransferQueue samplers.""" |
17 | 17 |
|
| 18 | +import sys |
| 19 | +from pathlib import Path |
18 | 20 | from typing import Any |
19 | 21 |
|
20 | 22 | import pytest |
21 | 23 |
|
22 | | -from transfer_queue.sampler import BaseSampler |
23 | | -from transfer_queue.sampler.grpo_group_n_sampler import GRPOGroupNSampler |
24 | | -from transfer_queue.sampler.sequential_sampler import SequentialSampler |
| 24 | +# Setup path |
| 25 | +parent_dir = Path(__file__).resolve().parent.parent |
| 26 | +sys.path.append(str(parent_dir)) |
| 27 | + |
| 28 | +from transfer_queue.sampler import BaseSampler # noqa: E402 |
| 29 | +from transfer_queue.sampler.grpo_group_n_sampler import GRPOGroupNSampler # noqa: E402 |
| 30 | +from transfer_queue.sampler.rank_aware_sampler import RankAwareSampler # noqa: E402 |
| 31 | +from transfer_queue.sampler.sequential_sampler import SequentialSampler # noqa: E402 |
25 | 32 |
|
26 | 33 |
|
27 | 34 | class TestBaseSampler: |
@@ -427,6 +434,156 @@ def test_grpo_sampler_insufficient_groups(self): |
427 | 434 | assert consumed == [] |
428 | 435 |
|
429 | 436 |
|
| 437 | +class TestRankAwareSampler: |
| 438 | + """Test cases for RankAwareSampler.""" |
| 439 | + |
| 440 | + def test_rank_aware_sampler_initialization(self): |
| 441 | + """Test RankAwareSampler initialization.""" |
| 442 | + sampler = RankAwareSampler() |
| 443 | + assert isinstance(sampler, BaseSampler) |
| 444 | + assert hasattr(sampler, "_states") |
| 445 | + assert sampler._states == {} |
| 446 | + |
| 447 | + def test_rank_aware_sampler_first_rank_sampling(self): |
| 448 | + """Test that first rank in DP group performs actual sampling.""" |
| 449 | + sampler = RankAwareSampler() |
| 450 | + ready_indexes = [0, 1, 2, 3, 4, 5] |
| 451 | + batch_size = 3 |
| 452 | + |
| 453 | + # When world_size == dp_world_size, fetches_per_batch = 1 |
| 454 | + # First rank samples and immediately marks consumed (no other ranks to wait for) |
| 455 | + sampled, consumed = sampler.sample(ready_indexes, batch_size, dp_group=0, dp_world_size=2, world_size=2) |
| 456 | + |
| 457 | + assert sampled == [0, 1, 2] |
| 458 | + # consumed is returned |
| 459 | + assert consumed == [0, 1, 2] |
| 460 | + assert len(sampled) == batch_size |
| 461 | + # State should be cleaned up |
| 462 | + assert sampler._states == {} |
| 463 | + |
| 464 | + def test_rank_aware_sampler_second_rank_gets_cached(self): |
| 465 | + """Test that second rank in DP group gets cached indices.""" |
| 466 | + sampler = RankAwareSampler() |
| 467 | + ready_indexes = [0, 1, 2, 3, 4, 5] |
| 468 | + batch_size = 3 |
| 469 | + dp_world_size = 2 |
| 470 | + world_size = 4 # Use world_size=4 so fetches_per_batch=2 |
| 471 | + |
| 472 | + # Rank 0 (dp_group=0) samples first |
| 473 | + sampled1, consumed1 = sampler.sample( |
| 474 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 475 | + ) |
| 476 | + |
| 477 | + # Rank 1 (dp_group=0) should get same cached indices |
| 478 | + sampled2, consumed2 = sampler.sample( |
| 479 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 480 | + ) |
| 481 | + |
| 482 | + assert sampled1 == sampled2 == [0, 1, 2] |
| 483 | + # First rank returns empty consumed (not all ranks have fetched yet) |
| 484 | + assert consumed1 == [0, 1, 2] |
| 485 | + # Last rank returns consumed when all ranks have fetched |
| 486 | + assert consumed2 == [0, 1, 2] |
| 487 | + # State should be cleaned up |
| 488 | + assert sampler._states == {} |
| 489 | + |
| 490 | + def test_rank_aware_sampler_multiple_dp_groups(self): |
| 491 | + """Test that multiple DP groups work independently.""" |
| 492 | + sampler = RankAwareSampler() |
| 493 | + ready_indexes = [0, 1, 2, 3, 4, 5, 6, 7] |
| 494 | + batch_size = 2 |
| 495 | + dp_world_size = 4 |
| 496 | + world_size = 8 |
| 497 | + |
| 498 | + # DP group 0: rank 0 samples first |
| 499 | + sampled0_g0, consumed0_g0 = sampler.sample( |
| 500 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 501 | + ) |
| 502 | + # minic the consumption status update managed in TransferQueueController |
| 503 | + ready_indexes = [i for i in ready_indexes if i not in consumed0_g0] |
| 504 | + |
| 505 | + # DP group 1: rank 0 samples first |
| 506 | + sampled0_g1, consumed0_g1 = sampler.sample( |
| 507 | + ready_indexes, batch_size, dp_group=1, dp_world_size=dp_world_size, world_size=world_size |
| 508 | + ) |
| 509 | + ready_indexes = [i for i in ready_indexes if i not in consumed0_g1] |
| 510 | + |
| 511 | + # Both should have sampled their first batch |
| 512 | + assert sampled0_g0 == [0, 1] |
| 513 | + assert sampled0_g1 == [2, 3] |
| 514 | + assert consumed0_g0 == [0, 1] |
| 515 | + assert consumed0_g1 == [2, 3] |
| 516 | + |
| 517 | + # DP group 0: rank 1 fetches cached, and all the data should be labeled as consumed |
| 518 | + sampled1_g0, consumed1_g0 = sampler.sample( |
| 519 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 520 | + ) |
| 521 | + ready_indexes = [i for i in ready_indexes if i not in consumed1_g0] |
| 522 | + assert sampled1_g0 == [0, 1] |
| 523 | + assert consumed1_g0 == [0, 1] |
| 524 | + |
| 525 | + # DP group 1: rank 1 fetches cached, and all the data should be labeled as consumed |
| 526 | + sampled1_g1, consumed1_g1 = sampler.sample( |
| 527 | + ready_indexes, batch_size, dp_group=1, dp_world_size=dp_world_size, world_size=world_size |
| 528 | + ) |
| 529 | + ready_indexes = [i for i in ready_indexes if i not in consumed1_g1] |
| 530 | + assert sampled1_g1 == [2, 3] |
| 531 | + assert consumed1_g1 == [2, 3] |
| 532 | + |
| 533 | + # DP group 0: rank 0 fetches again, this should return new data |
| 534 | + sampled2_g0, consumed2_g0 = sampler.sample( |
| 535 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 536 | + ) |
| 537 | + ready_indexes = [i for i in ready_indexes if i not in consumed2_g0] |
| 538 | + assert sampled2_g0 == [4, 5] |
| 539 | + assert consumed2_g0 == [4, 5] |
| 540 | + |
| 541 | + # DP group 0: rank 1 fetches cached |
| 542 | + sampled3_g0, consumed3_g0 = sampler.sample( |
| 543 | + ready_indexes, batch_size, dp_group=0, dp_world_size=dp_world_size, world_size=world_size |
| 544 | + ) |
| 545 | + assert sampled3_g0 == [4, 5] |
| 546 | + assert consumed3_g0 == [4, 5] |
| 547 | + |
| 548 | + # Both groups should be cleaned up |
| 549 | + assert sampler._states == {} |
| 550 | + |
| 551 | + def test_rank_aware_sampler_empty_ready_indexes(self): |
| 552 | + """Test behavior with empty ready indexes.""" |
| 553 | + sampler = RankAwareSampler() |
| 554 | + ready_indexes = [] |
| 555 | + batch_size = 3 |
| 556 | + |
| 557 | + sampled, consumed = sampler.sample(ready_indexes, batch_size, dp_group=0, dp_world_size=2, world_size=2) |
| 558 | + |
| 559 | + assert sampled == [] |
| 560 | + assert consumed == [] |
| 561 | + |
| 562 | + def test_rank_aware_sampler_batch_size_larger_than_ready(self): |
| 563 | + """Test behavior when batch_size > len(ready_indexes).""" |
| 564 | + sampler = RankAwareSampler() |
| 565 | + ready_indexes = [0, 1] |
| 566 | + batch_size = 5 |
| 567 | + |
| 568 | + # When world_size == dp_world_size, fetches_per_batch=1, consumed returned immediately |
| 569 | + sampled, consumed = sampler.sample(ready_indexes, batch_size, dp_group=0, dp_world_size=2, world_size=2) |
| 570 | + |
| 571 | + assert sampled == [0, 1] |
| 572 | + assert consumed == [0, 1] |
| 573 | + assert len(sampled) == len(ready_indexes) |
| 574 | + |
| 575 | + def test_rank_aware_sampler_zero_batch_size(self): |
| 576 | + """Test behavior with zero batch size.""" |
| 577 | + sampler = RankAwareSampler() |
| 578 | + ready_indexes = [0, 1, 2, 3] |
| 579 | + batch_size = 0 |
| 580 | + |
| 581 | + sampled, consumed = sampler.sample(ready_indexes, batch_size, dp_group=0, dp_world_size=2, world_size=2) |
| 582 | + |
| 583 | + assert sampled == [] |
| 584 | + assert consumed == [] |
| 585 | + |
| 586 | + |
430 | 587 | class TestSamplerIntegration: |
431 | 588 | """Integration tests for samplers.""" |
432 | 589 |
|
|
0 commit comments