@@ -71,7 +71,12 @@ impl Toploc {
7171 pub fn matches_file_name ( & self , file_name : & str ) -> bool {
7272 let normalized_name = self . normalize_path ( file_name) ;
7373 match & self . config . file_prefix_filter {
74- Some ( prefix) => normalized_name. starts_with ( prefix) ,
74+ Some ( prefix) => {
75+ normalized_name == * prefix || {
76+ normalized_name. starts_with ( prefix)
77+ && normalized_name[ prefix. len ( ) ..] . starts_with ( '/' )
78+ }
79+ }
7580 None => true ,
7681 }
7782 }
@@ -588,20 +593,101 @@ mod tests {
588593 assert_eq ! ( group_result. failing_indices, vec![ 1 , 3 , 5 ] ) ;
589594 Ok ( ( ) )
590595 }
591-
592596 #[ tokio:: test]
593597 async fn test_file_prefix_filter_matching ( ) {
594- let config = ToplocConfig {
595- server_url : "http://test" . to_string ( ) ,
596- auth_token : None ,
597- file_prefix_filter : Some ( "Qwen3" . to_string ( ) ) ,
598- } ;
599- let toploc = Toploc :: new ( config, None ) ;
598+ let configs = vec ! [
599+ ToplocConfig {
600+ server_url: "http://test" . to_string( ) ,
601+ auth_token: None ,
602+ file_prefix_filter: Some ( "Qwen/Qwen3-235B-A22B" . to_string( ) ) ,
603+ } ,
604+ ToplocConfig {
605+ server_url: "http://test" . to_string( ) ,
606+ auth_token: None ,
607+ file_prefix_filter: Some ( "Qwen/Qwen3-32B" . to_string( ) ) ,
608+ } ,
609+ ToplocConfig {
610+ server_url: "http://test" . to_string( ) ,
611+ auth_token: None ,
612+ file_prefix_filter: Some ( "Qwen/Qwen3-30B-A3B" . to_string( ) ) ,
613+ } ,
614+ ToplocConfig {
615+ server_url: "http://test" . to_string( ) ,
616+ auth_token: None ,
617+ file_prefix_filter: Some ( "Qwen/Qwen3-14B" . to_string( ) ) ,
618+ } ,
619+ ToplocConfig {
620+ server_url: "http://test" . to_string( ) ,
621+ auth_token: None ,
622+ file_prefix_filter: Some ( "deepseek-ai/DeepSeek-R1-0528" . to_string( ) ) ,
623+ } ,
624+ ToplocConfig {
625+ server_url: "http://test" . to_string( ) ,
626+ auth_token: None ,
627+ file_prefix_filter: Some ( "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B" . to_string( ) ) ,
628+ } ,
629+ ] ;
630+
631+ let test_cases = vec ! [
632+ // Test Qwen 235B model
633+ ( "Qwen/Qwen3-235B-A22B/data.parquet" , Some ( 0 ) ) ,
634+ ( "Qwen/Qwen3-235B-A22B" , Some ( 0 ) ) ,
635+ ( "Qwen/Qwen3-235B-A22B-extra/data.parquet" , None ) ,
636+ ( "qwen/qwen3-235b-a22b/data.parquet" , None ) , // Case sensitive
637+ // Test Qwen 32B model
638+ ( "Qwen/Qwen3-32B/data.parquet" , Some ( 1 ) ) ,
639+ ( "Qwen/Qwen3-32B" , Some ( 1 ) ) ,
640+ ( "Qwen/Qwen3-32B-extra/data.parquet" , None ) ,
641+ // Test Qwen 30B model
642+ ( "Qwen/Qwen3-30B-A3B/data.parquet" , Some ( 2 ) ) ,
643+ ( "Qwen/Qwen3-30B-A3B" , Some ( 2 ) ) ,
644+ ( "Qwen/Qwen3-30B-A3B-extra/data.parquet" , None ) ,
645+ // Test Qwen 14B model
646+ ( "Qwen/Qwen3-14B/data.parquet" , Some ( 3 ) ) ,
647+ ( "Qwen/Qwen3-14B" , Some ( 3 ) ) ,
648+ ( "Qwen/Qwen3-14B-extra/data.parquet" , None ) ,
649+ // Test DeepSeek base model
650+ ( "deepseek-ai/DeepSeek-R1-0528/data.parquet" , Some ( 4 ) ) ,
651+ ( "deepseek-ai/DeepSeek-R1-0528" , Some ( 4 ) ) ,
652+ (
653+ "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B/data.parquet" ,
654+ Some ( 5 ) ,
655+ ) ,
656+ ( "deepseek-ai/deepseek-r1-0528/data.parquet" , None ) , // Case sensitive
657+ ] ;
658+
659+ for ( test_file, expected_match) in test_cases {
660+ let mut matched = false ;
661+ let mut matched_idx = None ;
662+
663+ for ( idx, config) in configs. iter ( ) . enumerate ( ) {
664+ let toploc = Toploc :: new ( config. clone ( ) , None ) ;
665+ if toploc. matches_file_name ( test_file) {
666+ matched = true ;
667+ matched_idx = Some ( idx) ;
668+ break ;
669+ }
670+ }
600671
601- assert ! ( toploc. matches_file_name( "Qwen3-model-data.parquet" ) ) ;
602- assert ! ( toploc. matches_file_name( "Qwen3" ) ) ;
603- assert ! ( !toploc. matches_file_name( "GPT4-model-data.parquet" ) ) ;
604- assert ! ( !toploc. matches_file_name( "qwen3-lowercase.parquet" ) ) ; // Case sensitive
672+ match expected_match {
673+ Some ( expected_idx) => {
674+ assert ! (
675+ matched,
676+ "Expected file {} to match config {}" ,
677+ test_file, expected_idx
678+ ) ;
679+ assert_eq ! (
680+ matched_idx,
681+ Some ( expected_idx) ,
682+ "File {} matched config {} but expected {}" ,
683+ test_file,
684+ matched_idx. unwrap( ) ,
685+ expected_idx
686+ ) ;
687+ }
688+ None => assert ! ( !matched, "File {} should not match any config" , test_file) ,
689+ }
690+ }
605691 }
606692
607693 #[ tokio:: test]
0 commit comments