@@ -46,8 +46,10 @@ typedef struct sas7bdat_ctx_s {
4646 uint32_t row_length ;
4747 uint32_t page_row_count ;
4848 uint32_t parsed_row_count ;
49+ uint32_t parsed_deleted_row_count ;
4950 uint32_t column_count ;
5051 uint32_t row_limit ;
52+ uint32_t deleted_row_limit ;
5153 uint32_t row_offset ;
5254
5355 uint64_t header_size ;
@@ -232,7 +234,7 @@ static readstat_error_t sas7bdat_parse_column_size_subheader(const char *subhead
232234static readstat_error_t sas7bdat_parse_row_size_subheader (const char * subheader , size_t len , sas7bdat_ctx_t * ctx ) {
233235 readstat_error_t retval = READSTAT_OK ;
234236 uint64_t total_row_count ;
235- uint64_t row_length , page_row_count ;
237+ uint64_t row_length , deleted_row_limit , page_row_count ;
236238
237239 if (len < (ctx -> u64 ? 250 : 190 )) {
238240 retval = READSTAT_ERROR_PARSE ;
@@ -242,13 +244,21 @@ static readstat_error_t sas7bdat_parse_row_size_subheader(const char *subheader,
242244 if (ctx -> u64 ) {
243245 row_length = sas_read8 (& subheader [40 ], ctx -> bswap );
244246 total_row_count = sas_read8 (& subheader [48 ], ctx -> bswap );
247+ deleted_row_limit = sas_read8 (& subheader [56 ], ctx -> bswap );
245248 page_row_count = sas_read8 (& subheader [120 ], ctx -> bswap );
246249 } else {
247250 row_length = sas_read4 (& subheader [20 ], ctx -> bswap );
248251 total_row_count = sas_read4 (& subheader [24 ], ctx -> bswap );
252+ deleted_row_limit = sas_read4 (& subheader [28 ], ctx -> bswap );
249253 page_row_count = sas_read4 (& subheader [60 ], ctx -> bswap );
250254 }
251255
256+ if (deleted_row_limit > total_row_count ) {
257+ retval = READSTAT_ERROR_PARSE ;
258+ goto cleanup ;
259+ }
260+ ctx -> deleted_row_limit = deleted_row_limit ;
261+
252262 sas_text_ref_t file_label_ref = sas7bdat_parse_text_ref (& subheader [len - 130 ], ctx );
253263 if (file_label_ref .length ) {
254264 if ((retval = sas7bdat_copy_text_ref (ctx -> file_label , sizeof (ctx -> file_label ),
@@ -393,6 +403,19 @@ static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subhe
393403 return retval ;
394404}
395405
406+ static readstat_error_t sas7bdat_register_deleted_row (sas7bdat_ctx_t * ctx ) {
407+ if (ctx -> parsed_deleted_row_count >= ctx -> deleted_row_limit ) {
408+ return READSTAT_ERROR_PARSE ;
409+ }
410+ ctx -> parsed_row_count ++ ;
411+ ctx -> parsed_deleted_row_count ++ ;
412+ return READSTAT_OK ;
413+ }
414+
415+ static uint32_t sas7bdat_get_current_row_id (sas7bdat_ctx_t * ctx ) {
416+ return ctx -> parsed_row_count - ctx -> parsed_deleted_row_count ;
417+ }
418+
396419static readstat_error_t sas7bdat_handle_data_value (readstat_variable_t * variable ,
397420 col_info_t * col_info , const char * col_data , sas7bdat_ctx_t * ctx ) {
398421 readstat_error_t retval = READSTAT_OK ;
@@ -409,7 +432,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
409432 if (ctx -> handle .error ) {
410433 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
411434 "ReadStat: Error converting string (row=%u, col=%u) to specified encoding: %.*s" ,
412- ctx -> parsed_row_count + 1 , col_info -> index + 1 , col_info -> width , col_data );
435+ sas7bdat_get_current_row_id ( ctx ) + 1 , col_info -> index + 1 , col_info -> width , col_data );
413436 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
414437 }
415438 goto cleanup ;
@@ -441,7 +464,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
441464 value .v .double_value = dval ;
442465 }
443466 }
444- cb_retval = ctx -> handle .value (ctx -> parsed_row_count , variable , value , ctx -> user_ctx );
467+ cb_retval = ctx -> handle .value (sas7bdat_get_current_row_id ( ctx ) , variable , value , ctx -> user_ctx );
445468
446469 if (cb_retval != READSTAT_HANDLER_OK )
447470 retval = READSTAT_ERROR_USER_ABORT ;
@@ -490,7 +513,14 @@ static readstat_error_t sas7bdat_parse_single_row(const char *data, sas7bdat_ctx
490513 return retval ;
491514}
492515
493- static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , sas7bdat_ctx_t * ctx ) {
516+ static uint8_t sas7bdat_read_bitmap (const uint8_t * bitmap , int index ) {
517+ uint8_t current_byte = bitmap [index / 8 ];
518+ uint8_t mask = 1 << (7 - index % 8 );
519+
520+ return current_byte & mask ;
521+ }
522+
523+ static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , const uint8_t * deleted_bitmap , sas7bdat_ctx_t * ctx ) {
494524 readstat_error_t retval = READSTAT_OK ;
495525 int i ;
496526 size_t row_offset = 0 ;
@@ -499,8 +529,13 @@ static readstat_error_t sas7bdat_parse_rows(const char *data, size_t len, sas7bd
499529 retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH ;
500530 goto cleanup ;
501531 }
502- if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK )
532+ if (deleted_bitmap != NULL && sas7bdat_read_bitmap (deleted_bitmap , i )) {
533+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
534+ goto cleanup ;
535+ }
536+ } else if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK ) {
503537 goto cleanup ;
538+ }
504539
505540 row_offset += ctx -> row_length ;
506541 }
@@ -611,7 +646,7 @@ static readstat_error_t sas7bdat_parse_subheader_rle(const char *subheader, size
611646 if (ctx -> handle .error ) {
612647 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
613648 "ReadStat: Row #%d decompressed to %ld bytes (expected %d bytes)" ,
614- ctx -> parsed_row_count , (long )(bytes_decompressed ), ctx -> row_length );
649+ sas7bdat_get_current_row_id ( ctx ) , (long )(bytes_decompressed ), ctx -> row_length );
615650 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
616651 }
617652 goto cleanup ;
@@ -739,7 +774,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres
739774 readstat_error_t retval = READSTAT_OK ;
740775 if (ctx -> handle .metadata ) {
741776 readstat_metadata_t metadata = {
742- .row_count = ctx -> row_limit ,
777+ .row_count = ctx -> row_limit - ctx -> deleted_row_limit ,
743778 .var_count = ctx -> column_count ,
744779 .table_name = ctx -> table_name ,
745780 .file_label = ctx -> file_label ,
@@ -931,7 +966,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
931966 goto cleanup ;
932967 }
933968 }
934- } else if (shp_info .compression == SAS_COMPRESSION_ROW ) {
969+ } else if (shp_info .compression == SAS_COMPRESSION_ROW || shp_info . compression == SAS_COMPRESSION_ROW_DELETED ) {
935970 /* void */
936971 } else {
937972 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
@@ -947,6 +982,26 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
947982 return retval ;
948983}
949984
985+ static readstat_error_t sas7bdat_parse_deleted_row_bitmap (const char * page , const char * data ,
986+ size_t page_size , const uint8_t * * deleted_row_bitmap , sas7bdat_ctx_t * ctx ) {
987+ uint64_t page_unused_bytes ;
988+ if (ctx -> u64 ) {
989+ page_unused_bytes = sas_read8 (& page [24 ], ctx -> bswap );
990+ }
991+ else {
992+ page_unused_bytes = sas_read4 (& page [12 ], ctx -> bswap );
993+ }
994+ uint32_t row_count = ctx -> page_row_count < ctx -> row_limit ? ctx -> page_row_count : ctx -> row_limit ;
995+ uint64_t deleted_row_bitmap_offset = row_count * ctx -> row_length + page_unused_bytes ;
996+ uint32_t required_bytes = row_count / 8 + (row_count % 8 == 0 ? 0 : 1 );
997+
998+ if ((data - page ) + deleted_row_bitmap_offset + required_bytes > page_size ) {
999+ return READSTAT_ERROR_PARSE ;
1000+ }
1001+ * deleted_row_bitmap = (const uint8_t * )data + deleted_row_bitmap_offset ;
1002+ return READSTAT_OK ;
1003+ }
1004+
9501005static readstat_error_t sas7bdat_parse_page_pass2 (const char * page , size_t page_size , sas7bdat_ctx_t * ctx ) {
9511006 uint16_t page_type ;
9521007
@@ -1007,6 +1062,10 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10071062 if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
10081063 goto cleanup ;
10091064 }
1065+ } else if (shp_info .compression == SAS_COMPRESSION_ROW_DELETED ) {
1066+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
1067+ goto cleanup ;
1068+ }
10101069 } else {
10111070 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
10121071 goto cleanup ;
@@ -1036,7 +1095,14 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10361095 goto cleanup ;
10371096 }
10381097 if (ctx -> handle .value ) {
1039- retval = sas7bdat_parse_rows (data , page + page_size - data , ctx );
1098+ const uint8_t * deleted_row_bitmap = NULL ;
1099+ if (page_type & SAS_PAGE_TYPE_DELETED ) {
1100+ if ((retval = sas7bdat_parse_deleted_row_bitmap (page , data , page_size ,
1101+ & deleted_row_bitmap , ctx )) != READSTAT_OK ) {
1102+ goto cleanup ;
1103+ }
1104+ }
1105+ retval = sas7bdat_parse_rows (data , page + page_size - data , deleted_row_bitmap , ctx );
10401106 }
10411107 }
10421108cleanup :
@@ -1308,11 +1374,22 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *
13081374 goto cleanup ;
13091375 }
13101376
1377+ if (ctx -> handle .value && ctx -> parsed_deleted_row_count != ctx -> deleted_row_limit ) {
1378+ retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
1379+ if (ctx -> handle .error ) {
1380+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d deleted rows in file, found %d" ,
1381+ ctx -> deleted_row_limit , ctx -> parsed_deleted_row_count );
1382+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
1383+ }
1384+ goto cleanup ;
1385+ }
1386+
13111387 if (ctx -> handle .value && ctx -> parsed_row_count != ctx -> row_limit ) {
13121388 retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
13131389 if (ctx -> handle .error ) {
13141390 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d rows in file, found %d" ,
1315- ctx -> row_limit , ctx -> parsed_row_count );
1391+ ctx -> row_limit - ctx -> deleted_row_limit ,
1392+ ctx -> parsed_row_count - ctx -> parsed_deleted_row_count );
13161393 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
13171394 }
13181395 goto cleanup ;
0 commit comments