@@ -46,8 +46,10 @@ typedef struct sas7bdat_ctx_s {
4646 uint32_t row_length ;
4747 uint32_t page_row_count ;
4848 uint32_t parsed_row_count ;
49+ uint32_t parsed_deleted_row_count ;
4950 uint32_t column_count ;
5051 uint32_t row_limit ;
52+ uint32_t deleted_row_limit ;
5153 uint32_t row_offset ;
5254
5355 uint64_t header_size ;
@@ -232,7 +234,7 @@ static readstat_error_t sas7bdat_parse_column_size_subheader(const char *subhead
232234static readstat_error_t sas7bdat_parse_row_size_subheader (const char * subheader , size_t len , sas7bdat_ctx_t * ctx ) {
233235 readstat_error_t retval = READSTAT_OK ;
234236 uint64_t total_row_count ;
235- uint64_t row_length , page_row_count ;
237+ uint64_t row_length , deleted_row_limit , page_row_count ;
236238
237239 if (len < (ctx -> u64 ? 250 : 190 )) {
238240 retval = READSTAT_ERROR_PARSE ;
@@ -242,13 +244,21 @@ static readstat_error_t sas7bdat_parse_row_size_subheader(const char *subheader,
242244 if (ctx -> u64 ) {
243245 row_length = sas_read8 (& subheader [40 ], ctx -> bswap );
244246 total_row_count = sas_read8 (& subheader [48 ], ctx -> bswap );
247+ deleted_row_limit = sas_read8 (& subheader [56 ], ctx -> bswap );
245248 page_row_count = sas_read8 (& subheader [120 ], ctx -> bswap );
246249 } else {
247250 row_length = sas_read4 (& subheader [20 ], ctx -> bswap );
248251 total_row_count = sas_read4 (& subheader [24 ], ctx -> bswap );
252+ deleted_row_limit = sas_read4 (& subheader [28 ], ctx -> bswap );
249253 page_row_count = sas_read4 (& subheader [60 ], ctx -> bswap );
250254 }
251255
256+ if (deleted_row_limit > total_row_count ) {
257+ retval = READSTAT_ERROR_PARSE ;
258+ goto cleanup ;
259+ }
260+ ctx -> deleted_row_limit = deleted_row_limit ;
261+
252262 sas_text_ref_t file_label_ref = sas7bdat_parse_text_ref (& subheader [len - 130 ], ctx );
253263 if (file_label_ref .length ) {
254264 if ((retval = sas7bdat_copy_text_ref (ctx -> file_label , sizeof (ctx -> file_label ),
@@ -390,6 +400,19 @@ static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subhe
390400 return retval ;
391401}
392402
403+ static readstat_error_t sas7bdat_register_deleted_row (sas7bdat_ctx_t * ctx ) {
404+ if (ctx -> parsed_deleted_row_count >= ctx -> deleted_row_limit ) {
405+ return READSTAT_ERROR_PARSE ;
406+ }
407+ ctx -> parsed_row_count ++ ;
408+ ctx -> parsed_deleted_row_count ++ ;
409+ return READSTAT_OK ;
410+ }
411+
412+ static uint32_t sas7bdat_get_current_row_id (sas7bdat_ctx_t * ctx ) {
413+ return ctx -> parsed_row_count - ctx -> parsed_deleted_row_count ;
414+ }
415+
393416static readstat_error_t sas7bdat_handle_data_value (readstat_variable_t * variable ,
394417 col_info_t * col_info , const char * col_data , sas7bdat_ctx_t * ctx ) {
395418 readstat_error_t retval = READSTAT_OK ;
@@ -406,7 +429,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
406429 if (ctx -> handle .error ) {
407430 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
408431 "ReadStat: Error converting string (row=%u, col=%u) to specified encoding: %.*s" ,
409- ctx -> parsed_row_count + 1 , col_info -> index + 1 , col_info -> width , col_data );
432+ sas7bdat_get_current_row_id ( ctx ) + 1 , col_info -> index + 1 , col_info -> width , col_data );
410433 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
411434 }
412435 goto cleanup ;
@@ -438,7 +461,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
438461 value .v .double_value = dval ;
439462 }
440463 }
441- cb_retval = ctx -> handle .value (ctx -> parsed_row_count , variable , value , ctx -> user_ctx );
464+ cb_retval = ctx -> handle .value (sas7bdat_get_current_row_id ( ctx ) , variable , value , ctx -> user_ctx );
442465
443466 if (cb_retval != READSTAT_HANDLER_OK )
444467 retval = READSTAT_ERROR_USER_ABORT ;
@@ -487,7 +510,14 @@ static readstat_error_t sas7bdat_parse_single_row(const char *data, sas7bdat_ctx
487510 return retval ;
488511}
489512
490- static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , sas7bdat_ctx_t * ctx ) {
513+ static uint8_t sas7bdat_read_bitmap (const uint8_t * bitmap , int index ) {
514+ uint8_t current_byte = bitmap [index / 8 ];
515+ uint8_t mask = 1 << (7 - index % 8 );
516+
517+ return current_byte & mask ;
518+ }
519+
520+ static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , const uint8_t * deleted_bitmap , sas7bdat_ctx_t * ctx ) {
491521 readstat_error_t retval = READSTAT_OK ;
492522 int i ;
493523 size_t row_offset = 0 ;
@@ -496,8 +526,13 @@ static readstat_error_t sas7bdat_parse_rows(const char *data, size_t len, sas7bd
496526 retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH ;
497527 goto cleanup ;
498528 }
499- if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK )
529+ if (deleted_bitmap != NULL && sas7bdat_read_bitmap (deleted_bitmap , i )) {
530+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
531+ goto cleanup ;
532+ }
533+ } else if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK ) {
500534 goto cleanup ;
535+ }
501536
502537 row_offset += ctx -> row_length ;
503538 }
@@ -608,7 +643,7 @@ static readstat_error_t sas7bdat_parse_subheader_rle(const char *subheader, size
608643 if (ctx -> handle .error ) {
609644 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
610645 "ReadStat: Row #%d decompressed to %ld bytes (expected %d bytes)" ,
611- ctx -> parsed_row_count , (long )(bytes_decompressed ), ctx -> row_length );
646+ sas7bdat_get_current_row_id ( ctx ) , (long )(bytes_decompressed ), ctx -> row_length );
612647 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
613648 }
614649 goto cleanup ;
@@ -735,7 +770,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres
735770 readstat_error_t retval = READSTAT_OK ;
736771 if (ctx -> handle .metadata ) {
737772 readstat_metadata_t metadata = {
738- .row_count = ctx -> row_limit ,
773+ .row_count = ctx -> row_limit - ctx -> deleted_row_limit ,
739774 .var_count = ctx -> column_count ,
740775 .table_name = ctx -> table_name ,
741776 .file_label = ctx -> file_label ,
@@ -895,7 +930,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
895930 goto cleanup ;
896931 }
897932 }
898- } else if (shp_info .compression == SAS_COMPRESSION_ROW ) {
933+ } else if (shp_info .compression == SAS_COMPRESSION_ROW || shp_info . compression == SAS_COMPRESSION_ROW_DELETED ) {
899934 /* void */
900935 } else {
901936 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
@@ -911,6 +946,26 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
911946 return retval ;
912947}
913948
949+ static readstat_error_t sas7bdat_parse_deleted_row_bitmap (const char * page , const char * data ,
950+ size_t page_size , const uint8_t * * deleted_row_bitmap , sas7bdat_ctx_t * ctx ) {
951+ uint64_t page_unused_bytes ;
952+ if (ctx -> u64 ) {
953+ page_unused_bytes = sas_read8 (& page [24 ], ctx -> bswap );
954+ }
955+ else {
956+ page_unused_bytes = sas_read4 (& page [12 ], ctx -> bswap );
957+ }
958+ uint32_t row_count = ctx -> page_row_count < ctx -> row_limit ? ctx -> page_row_count : ctx -> row_limit ;
959+ uint64_t deleted_row_bitmap_offset = row_count * ctx -> row_length + page_unused_bytes ;
960+ uint32_t required_bytes = row_count / 8 + (row_count % 8 == 0 ? 0 : 1 );
961+
962+ if ((data - page ) + deleted_row_bitmap_offset + required_bytes > page_size ) {
963+ return READSTAT_ERROR_PARSE ;
964+ }
965+ * deleted_row_bitmap = (const uint8_t * )data + deleted_row_bitmap_offset ;
966+ return READSTAT_OK ;
967+ }
968+
914969static readstat_error_t sas7bdat_parse_page_pass2 (const char * page , size_t page_size , sas7bdat_ctx_t * ctx ) {
915970 uint16_t page_type ;
916971
@@ -975,6 +1030,10 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
9751030 if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
9761031 goto cleanup ;
9771032 }
1033+ } else if (shp_info .compression == SAS_COMPRESSION_ROW_DELETED ) {
1034+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
1035+ goto cleanup ;
1036+ }
9781037 } else {
9791038 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
9801039 goto cleanup ;
@@ -1004,7 +1063,14 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10041063 goto cleanup ;
10051064 }
10061065 if (ctx -> handle .value ) {
1007- retval = sas7bdat_parse_rows (data , page + page_size - data , ctx );
1066+ const uint8_t * deleted_row_bitmap = NULL ;
1067+ if (page_type & SAS_PAGE_TYPE_DELETED ) {
1068+ if ((retval = sas7bdat_parse_deleted_row_bitmap (page , data , page_size ,
1069+ & deleted_row_bitmap , ctx )) != READSTAT_OK ) {
1070+ goto cleanup ;
1071+ }
1072+ }
1073+ retval = sas7bdat_parse_rows (data , page + page_size - data , deleted_row_bitmap , ctx );
10081074 }
10091075 }
10101076cleanup :
@@ -1276,11 +1342,22 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *
12761342 goto cleanup ;
12771343 }
12781344
1345+ if (ctx -> handle .value && ctx -> parsed_deleted_row_count != ctx -> deleted_row_limit ) {
1346+ retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
1347+ if (ctx -> handle .error ) {
1348+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d deleted rows in file, found %d" ,
1349+ ctx -> deleted_row_limit , ctx -> parsed_deleted_row_count );
1350+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
1351+ }
1352+ goto cleanup ;
1353+ }
1354+
12791355 if (ctx -> handle .value && ctx -> parsed_row_count != ctx -> row_limit ) {
12801356 retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
12811357 if (ctx -> handle .error ) {
12821358 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d rows in file, found %d" ,
1283- ctx -> row_limit , ctx -> parsed_row_count );
1359+ ctx -> row_limit - ctx -> deleted_row_limit ,
1360+ ctx -> parsed_row_count - ctx -> parsed_deleted_row_count );
12841361 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
12851362 }
12861363 goto cleanup ;
0 commit comments