55#include <string.h>
66#include <math.h>
77#include <inttypes.h>
8+ #include <limits.h>
89#include "readstat_sas.h"
910#include "readstat_sas_rle.h"
1011#include "../readstat_iconv.h"
@@ -46,8 +47,10 @@ typedef struct sas7bdat_ctx_s {
4647 uint32_t row_length ;
4748 uint32_t page_row_count ;
4849 uint32_t parsed_row_count ;
50+ uint32_t parsed_deleted_row_count ;
4951 uint32_t column_count ;
5052 uint32_t row_limit ;
53+ uint32_t deleted_row_limit ;
5154 uint32_t row_offset ;
5255
5356 uint64_t header_size ;
@@ -232,7 +235,7 @@ static readstat_error_t sas7bdat_parse_column_size_subheader(const char *subhead
232235static readstat_error_t sas7bdat_parse_row_size_subheader (const char * subheader , size_t len , sas7bdat_ctx_t * ctx ) {
233236 readstat_error_t retval = READSTAT_OK ;
234237 uint64_t total_row_count ;
235- uint64_t row_length , page_row_count ;
238+ uint64_t row_length , deleted_row_limit , page_row_count ;
236239
237240 if (len < (ctx -> u64 ? 250 : 190 )) {
238241 retval = READSTAT_ERROR_PARSE ;
@@ -242,13 +245,21 @@ static readstat_error_t sas7bdat_parse_row_size_subheader(const char *subheader,
242245 if (ctx -> u64 ) {
243246 row_length = sas_read8 (& subheader [40 ], ctx -> bswap );
244247 total_row_count = sas_read8 (& subheader [48 ], ctx -> bswap );
248+ deleted_row_limit = sas_read8 (& subheader [56 ], ctx -> bswap );
245249 page_row_count = sas_read8 (& subheader [120 ], ctx -> bswap );
246250 } else {
247251 row_length = sas_read4 (& subheader [20 ], ctx -> bswap );
248252 total_row_count = sas_read4 (& subheader [24 ], ctx -> bswap );
253+ deleted_row_limit = sas_read4 (& subheader [28 ], ctx -> bswap );
249254 page_row_count = sas_read4 (& subheader [60 ], ctx -> bswap );
250255 }
251256
257+ if (deleted_row_limit > total_row_count ) {
258+ retval = READSTAT_ERROR_PARSE ;
259+ goto cleanup ;
260+ }
261+ ctx -> deleted_row_limit = deleted_row_limit ;
262+
252263 sas_text_ref_t file_label_ref = sas7bdat_parse_text_ref (& subheader [len - 130 ], ctx );
253264 if (file_label_ref .length ) {
254265 if ((retval = sas7bdat_copy_text_ref (ctx -> file_label , sizeof (ctx -> file_label ),
@@ -393,6 +404,22 @@ static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subhe
393404 return retval ;
394405}
395406
407+ static readstat_error_t sas7bdat_register_deleted_row (sas7bdat_ctx_t * ctx ) {
408+ if (ctx -> parsed_row_count == ctx -> row_limit ) {
409+ return READSTAT_OK ;
410+ }
411+ if (ctx -> parsed_deleted_row_count >= ctx -> deleted_row_limit ) {
412+ return READSTAT_ERROR_PARSE ;
413+ }
414+ ctx -> parsed_row_count ++ ;
415+ ctx -> parsed_deleted_row_count ++ ;
416+ return READSTAT_OK ;
417+ }
418+
419+ static uint32_t sas7bdat_get_current_row_id (sas7bdat_ctx_t * ctx ) {
420+ return ctx -> parsed_row_count - ctx -> parsed_deleted_row_count ;
421+ }
422+
396423static readstat_error_t sas7bdat_handle_data_value (readstat_variable_t * variable ,
397424 col_info_t * col_info , const char * col_data , sas7bdat_ctx_t * ctx ) {
398425 readstat_error_t retval = READSTAT_OK ;
@@ -409,7 +436,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
409436 if (ctx -> handle .error ) {
410437 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
411438 "ReadStat: Error converting string (row=%u, col=%u) to specified encoding: %.*s" ,
412- ctx -> parsed_row_count + 1 , col_info -> index + 1 , col_info -> width , col_data );
439+ sas7bdat_get_current_row_id ( ctx ) + 1 , col_info -> index + 1 , col_info -> width , col_data );
413440 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
414441 }
415442 goto cleanup ;
@@ -441,7 +468,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
441468 value .v .double_value = dval ;
442469 }
443470 }
444- cb_retval = ctx -> handle .value (ctx -> parsed_row_count , variable , value , ctx -> user_ctx );
471+ cb_retval = ctx -> handle .value (sas7bdat_get_current_row_id ( ctx ) , variable , value , ctx -> user_ctx );
445472
446473 if (cb_retval != READSTAT_HANDLER_OK )
447474 retval = READSTAT_ERROR_USER_ABORT ;
@@ -490,7 +517,15 @@ static readstat_error_t sas7bdat_parse_single_row(const char *data, sas7bdat_ctx
490517 return retval ;
491518}
492519
493- static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , sas7bdat_ctx_t * ctx ) {
520+ static unsigned char sas7bdat_read_bitmap (const unsigned char * bitmap , int index ) {
521+ unsigned char current_byte = bitmap [index / CHAR_BIT ];
522+ unsigned char mask = 1 << (CHAR_BIT - 1 - index % CHAR_BIT );
523+
524+ return current_byte & mask ;
525+ }
526+
527+ static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len ,
528+ const unsigned char * deleted_bitmap , sas7bdat_ctx_t * ctx ) {
494529 readstat_error_t retval = READSTAT_OK ;
495530 int i ;
496531 size_t row_offset = 0 ;
@@ -499,8 +534,13 @@ static readstat_error_t sas7bdat_parse_rows(const char *data, size_t len, sas7bd
499534 retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH ;
500535 goto cleanup ;
501536 }
502- if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK )
537+ if (deleted_bitmap != NULL && sas7bdat_read_bitmap (deleted_bitmap , i )) {
538+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
539+ goto cleanup ;
540+ }
541+ } else if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK ) {
503542 goto cleanup ;
543+ }
504544
505545 row_offset += ctx -> row_length ;
506546 }
@@ -611,7 +651,7 @@ static readstat_error_t sas7bdat_parse_subheader_rle(const char *subheader, size
611651 if (ctx -> handle .error ) {
612652 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
613653 "ReadStat: Row #%d decompressed to %ld bytes (expected %d bytes)" ,
614- ctx -> parsed_row_count , (long )(bytes_decompressed ), ctx -> row_length );
654+ sas7bdat_get_current_row_id ( ctx ) , (long )(bytes_decompressed ), ctx -> row_length );
615655 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
616656 }
617657 goto cleanup ;
@@ -739,7 +779,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres
739779 readstat_error_t retval = READSTAT_OK ;
740780 if (ctx -> handle .metadata ) {
741781 readstat_metadata_t metadata = {
742- .row_count = ctx -> row_limit ,
782+ .row_count = ctx -> row_limit - ctx -> deleted_row_limit ,
743783 .var_count = ctx -> column_count ,
744784 .table_name = ctx -> table_name ,
745785 .file_label = ctx -> file_label ,
@@ -931,7 +971,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
931971 goto cleanup ;
932972 }
933973 }
934- } else if (shp_info .compression == SAS_COMPRESSION_ROW ) {
974+ } else if (shp_info .compression == SAS_COMPRESSION_ROW || shp_info . compression == SAS_COMPRESSION_DELETED_ROW ) {
935975 /* void */
936976 } else {
937977 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
@@ -947,6 +987,26 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
947987 return retval ;
948988}
949989
990+ static readstat_error_t sas7bdat_parse_deleted_row_bitmap (const char * page , const char * data ,
991+ size_t page_size , const unsigned char * * deleted_row_bitmap , sas7bdat_ctx_t * ctx ) {
992+ uint64_t page_unused_bytes ;
993+ if (ctx -> u64 ) {
994+ page_unused_bytes = sas_read8 (& page [24 ], ctx -> bswap );
995+ }
996+ else {
997+ page_unused_bytes = sas_read4 (& page [12 ], ctx -> bswap );
998+ }
999+ uint32_t row_count = ctx -> page_row_count < ctx -> row_limit ? ctx -> page_row_count : ctx -> row_limit ;
1000+ uint64_t deleted_row_bitmap_offset = row_count * ctx -> row_length + page_unused_bytes ;
1001+ uint32_t required_bytes = row_count / CHAR_BIT + (row_count % CHAR_BIT == 0 ? 0 : 1 );
1002+
1003+ if ((data - page ) + deleted_row_bitmap_offset + required_bytes > page_size ) {
1004+ return READSTAT_ERROR_PARSE ;
1005+ }
1006+ * deleted_row_bitmap = (const unsigned char * )data + deleted_row_bitmap_offset ;
1007+ return READSTAT_OK ;
1008+ }
1009+
9501010static readstat_error_t sas7bdat_parse_page_pass2 (const char * page , size_t page_size , sas7bdat_ctx_t * ctx ) {
9511011 uint16_t page_type ;
9521012
@@ -1007,6 +1067,10 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10071067 if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
10081068 goto cleanup ;
10091069 }
1070+ } else if (shp_info .compression == SAS_COMPRESSION_DELETED_ROW ) {
1071+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
1072+ goto cleanup ;
1073+ }
10101074 } else {
10111075 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
10121076 goto cleanup ;
@@ -1036,7 +1100,14 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10361100 goto cleanup ;
10371101 }
10381102 if (ctx -> handle .value ) {
1039- retval = sas7bdat_parse_rows (data , page + page_size - data , ctx );
1103+ const unsigned char * deleted_row_bitmap = NULL ;
1104+ if (page_type & SAS_PAGE_TYPE_DELETED_ROWS ) {
1105+ if ((retval = sas7bdat_parse_deleted_row_bitmap (page , data , page_size ,
1106+ & deleted_row_bitmap , ctx )) != READSTAT_OK ) {
1107+ goto cleanup ;
1108+ }
1109+ }
1110+ retval = sas7bdat_parse_rows (data , page + page_size - data , deleted_row_bitmap , ctx );
10401111 }
10411112 }
10421113cleanup :
@@ -1308,11 +1379,22 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *
13081379 goto cleanup ;
13091380 }
13101381
1382+ if (ctx -> handle .value && ctx -> parsed_deleted_row_count != ctx -> deleted_row_limit ) {
1383+ retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
1384+ if (ctx -> handle .error ) {
1385+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d deleted rows in file, found %d" ,
1386+ ctx -> deleted_row_limit , ctx -> parsed_deleted_row_count );
1387+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
1388+ }
1389+ goto cleanup ;
1390+ }
1391+
13111392 if (ctx -> handle .value && ctx -> parsed_row_count != ctx -> row_limit ) {
13121393 retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
13131394 if (ctx -> handle .error ) {
13141395 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d rows in file, found %d" ,
1315- ctx -> row_limit , ctx -> parsed_row_count );
1396+ ctx -> row_limit - ctx -> deleted_row_limit ,
1397+ ctx -> parsed_row_count - ctx -> parsed_deleted_row_count );
13161398 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
13171399 }
13181400 goto cleanup ;
0 commit comments