55#include <string.h>
66#include <math.h>
77#include <inttypes.h>
8+ #include <limits.h>
89#include "readstat_sas.h"
910#include "readstat_sas_rle.h"
1011#include "../readstat_iconv.h"
@@ -46,8 +47,10 @@ typedef struct sas7bdat_ctx_s {
4647 uint32_t row_length ;
4748 uint32_t page_row_count ;
4849 uint32_t parsed_row_count ;
50+ uint32_t parsed_deleted_row_count ;
4951 uint32_t column_count ;
5052 uint32_t row_limit ;
53+ uint32_t deleted_row_limit ;
5154 uint32_t row_offset ;
5255
5356 uint64_t header_size ;
@@ -232,7 +235,7 @@ static readstat_error_t sas7bdat_parse_column_size_subheader(const char *subhead
232235static readstat_error_t sas7bdat_parse_row_size_subheader (const char * subheader , size_t len , sas7bdat_ctx_t * ctx ) {
233236 readstat_error_t retval = READSTAT_OK ;
234237 uint64_t total_row_count ;
235- uint64_t row_length , page_row_count ;
238+ uint64_t row_length , deleted_row_limit , page_row_count ;
236239
237240 if (len < (ctx -> u64 ? 250 : 190 )) {
238241 retval = READSTAT_ERROR_PARSE ;
@@ -242,13 +245,21 @@ static readstat_error_t sas7bdat_parse_row_size_subheader(const char *subheader,
242245 if (ctx -> u64 ) {
243246 row_length = sas_read8 (& subheader [40 ], ctx -> bswap );
244247 total_row_count = sas_read8 (& subheader [48 ], ctx -> bswap );
248+ deleted_row_limit = sas_read8 (& subheader [56 ], ctx -> bswap );
245249 page_row_count = sas_read8 (& subheader [120 ], ctx -> bswap );
246250 } else {
247251 row_length = sas_read4 (& subheader [20 ], ctx -> bswap );
248252 total_row_count = sas_read4 (& subheader [24 ], ctx -> bswap );
253+ deleted_row_limit = sas_read4 (& subheader [28 ], ctx -> bswap );
249254 page_row_count = sas_read4 (& subheader [60 ], ctx -> bswap );
250255 }
251256
257+ if (deleted_row_limit > total_row_count ) {
258+ retval = READSTAT_ERROR_PARSE ;
259+ goto cleanup ;
260+ }
261+ ctx -> deleted_row_limit = deleted_row_limit ;
262+
252263 sas_text_ref_t file_label_ref = sas7bdat_parse_text_ref (& subheader [len - 130 ], ctx );
253264 if (file_label_ref .length ) {
254265 if ((retval = sas7bdat_copy_text_ref (ctx -> file_label , sizeof (ctx -> file_label ),
@@ -393,6 +404,22 @@ static readstat_error_t sas7bdat_parse_column_format_subheader(const char *subhe
393404 return retval ;
394405}
395406
407+ static readstat_error_t sas7bdat_register_deleted_row (sas7bdat_ctx_t * ctx ) {
408+ if (ctx -> parsed_row_count == ctx -> row_limit ) {
409+ return READSTAT_OK ;
410+ }
411+ if (ctx -> parsed_deleted_row_count >= ctx -> deleted_row_limit ) {
412+ return READSTAT_ERROR_PARSE ;
413+ }
414+ ctx -> parsed_row_count ++ ;
415+ ctx -> parsed_deleted_row_count ++ ;
416+ return READSTAT_OK ;
417+ }
418+
419+ static uint32_t sas7bdat_get_current_row_id (sas7bdat_ctx_t * ctx ) {
420+ return ctx -> parsed_row_count - ctx -> parsed_deleted_row_count ;
421+ }
422+
396423static readstat_error_t sas7bdat_handle_data_value (readstat_variable_t * variable ,
397424 col_info_t * col_info , const char * col_data , sas7bdat_ctx_t * ctx ) {
398425 readstat_error_t retval = READSTAT_OK ;
@@ -409,7 +436,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
409436 if (ctx -> handle .error ) {
410437 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
411438 "ReadStat: Error converting string (row=%u, col=%u) to specified encoding: %.*s" ,
412- ctx -> parsed_row_count + 1 , col_info -> index + 1 , col_info -> width , col_data );
439+ sas7bdat_get_current_row_id ( ctx ) + 1 , col_info -> index + 1 , col_info -> width , col_data );
413440 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
414441 }
415442 goto cleanup ;
@@ -441,7 +468,7 @@ static readstat_error_t sas7bdat_handle_data_value(readstat_variable_t *variable
441468 value .v .double_value = dval ;
442469 }
443470 }
444- cb_retval = ctx -> handle .value (ctx -> parsed_row_count , variable , value , ctx -> user_ctx );
471+ cb_retval = ctx -> handle .value (sas7bdat_get_current_row_id ( ctx ) , variable , value , ctx -> user_ctx );
445472
446473 if (cb_retval != READSTAT_HANDLER_OK )
447474 retval = READSTAT_ERROR_USER_ABORT ;
@@ -490,7 +517,15 @@ static readstat_error_t sas7bdat_parse_single_row(const char *data, sas7bdat_ctx
490517 return retval ;
491518}
492519
493- static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len , sas7bdat_ctx_t * ctx ) {
520+ static unsigned char sas7bdat_read_bitmap (const unsigned char * bitmap , int index ) {
521+ unsigned char current_byte = bitmap [index / CHAR_BIT ];
522+ unsigned char mask = 1 << (CHAR_BIT - 1 - index % CHAR_BIT );
523+
524+ return current_byte & mask ;
525+ }
526+
527+ static readstat_error_t sas7bdat_parse_rows (const char * data , size_t len ,
528+ const unsigned char * deleted_bitmap , sas7bdat_ctx_t * ctx ) {
494529 readstat_error_t retval = READSTAT_OK ;
495530 int i ;
496531 size_t row_offset = 0 ;
@@ -499,8 +534,13 @@ static readstat_error_t sas7bdat_parse_rows(const char *data, size_t len, sas7bd
499534 retval = READSTAT_ERROR_ROW_WIDTH_MISMATCH ;
500535 goto cleanup ;
501536 }
502- if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK )
537+ if (deleted_bitmap != NULL && sas7bdat_read_bitmap (deleted_bitmap , i )) {
538+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
539+ goto cleanup ;
540+ }
541+ } else if ((retval = sas7bdat_parse_single_row (& data [row_offset ], ctx )) != READSTAT_OK ) {
503542 goto cleanup ;
543+ }
504544
505545 row_offset += ctx -> row_length ;
506546 }
@@ -611,7 +651,7 @@ static readstat_error_t sas7bdat_parse_subheader_rle(const char *subheader, size
611651 if (ctx -> handle .error ) {
612652 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ),
613653 "ReadStat: Row #%d decompressed to %ld bytes (expected %d bytes)" ,
614- ctx -> parsed_row_count , (long )(bytes_decompressed ), ctx -> row_length );
654+ sas7bdat_get_current_row_id ( ctx ) , (long )(bytes_decompressed ), ctx -> row_length );
615655 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
616656 }
617657 goto cleanup ;
@@ -739,7 +779,7 @@ static readstat_error_t sas7bdat_submit_columns(sas7bdat_ctx_t *ctx, int compres
739779 readstat_error_t retval = READSTAT_OK ;
740780 if (ctx -> handle .metadata ) {
741781 readstat_metadata_t metadata = {
742- .row_count = ctx -> row_limit ,
782+ .row_count = ctx -> row_limit - ctx -> deleted_row_limit ,
743783 .var_count = ctx -> column_count ,
744784 .table_name = ctx -> table_name ,
745785 .file_label = ctx -> file_label ,
@@ -931,7 +971,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
931971 goto cleanup ;
932972 }
933973 }
934- } else if (shp_info .compression == SAS_COMPRESSION_ROW ) {
974+ } else if (shp_info .compression == SAS_COMPRESSION_ROW || shp_info . compression == SAS_COMPRESSION_DELETED_ROW ) {
935975 /* void */
936976 } else {
937977 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
@@ -947,6 +987,25 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
947987 return retval ;
948988}
949989
990+ static readstat_error_t sas7bdat_parse_deleted_row_bitmap (const char * page , const char * data ,
991+ size_t page_size , const unsigned char * * deleted_row_bitmap , sas7bdat_ctx_t * ctx ) {
992+ uint64_t page_unused_bytes ;
993+ if (ctx -> u64 ) {
994+ page_unused_bytes = sas_read8 (& page [24 ], ctx -> bswap );
995+ } else {
996+ page_unused_bytes = sas_read4 (& page [12 ], ctx -> bswap );
997+ }
998+ uint32_t row_count = ctx -> page_row_count < ctx -> row_limit ? ctx -> page_row_count : ctx -> row_limit ;
999+ uint64_t deleted_row_bitmap_offset = row_count * ctx -> row_length + page_unused_bytes ;
1000+ uint32_t required_bytes = row_count / CHAR_BIT + (row_count % CHAR_BIT == 0 ? 0 : 1 );
1001+
1002+ if ((data - page ) + deleted_row_bitmap_offset + required_bytes > page_size ) {
1003+ return READSTAT_ERROR_PARSE ;
1004+ }
1005+ * deleted_row_bitmap = (const unsigned char * )data + deleted_row_bitmap_offset ;
1006+ return READSTAT_OK ;
1007+ }
1008+
9501009static readstat_error_t sas7bdat_parse_page_pass2 (const char * page , size_t page_size , sas7bdat_ctx_t * ctx ) {
9511010 uint16_t page_type ;
9521011
@@ -1007,6 +1066,10 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10071066 if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
10081067 goto cleanup ;
10091068 }
1069+ } else if (shp_info .compression == SAS_COMPRESSION_DELETED_ROW ) {
1070+ if ((retval = sas7bdat_register_deleted_row (ctx )) != READSTAT_OK ) {
1071+ goto cleanup ;
1072+ }
10101073 } else {
10111074 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
10121075 goto cleanup ;
@@ -1036,7 +1099,14 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10361099 goto cleanup ;
10371100 }
10381101 if (ctx -> handle .value ) {
1039- retval = sas7bdat_parse_rows (data , page + page_size - data , ctx );
1102+ const unsigned char * deleted_row_bitmap = NULL ;
1103+ if (page_type & SAS_PAGE_TYPE_DELETED_ROWS ) {
1104+ if ((retval = sas7bdat_parse_deleted_row_bitmap (page , data , page_size ,
1105+ & deleted_row_bitmap , ctx )) != READSTAT_OK ) {
1106+ goto cleanup ;
1107+ }
1108+ }
1109+ retval = sas7bdat_parse_rows (data , page + page_size - data , deleted_row_bitmap , ctx );
10401110 }
10411111 }
10421112cleanup :
@@ -1308,11 +1378,22 @@ readstat_error_t readstat_parse_sas7bdat(readstat_parser_t *parser, const char *
13081378 goto cleanup ;
13091379 }
13101380
1381+ if (ctx -> handle .value && ctx -> parsed_deleted_row_count != ctx -> deleted_row_limit ) {
1382+ retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
1383+ if (ctx -> handle .error ) {
1384+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d deleted rows in file, found %d" ,
1385+ ctx -> deleted_row_limit , ctx -> parsed_deleted_row_count );
1386+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
1387+ }
1388+ goto cleanup ;
1389+ }
1390+
13111391 if (ctx -> handle .value && ctx -> parsed_row_count != ctx -> row_limit ) {
13121392 retval = READSTAT_ERROR_ROW_COUNT_MISMATCH ;
13131393 if (ctx -> handle .error ) {
13141394 snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Expected %d rows in file, found %d" ,
1315- ctx -> row_limit , ctx -> parsed_row_count );
1395+ ctx -> row_limit - ctx -> deleted_row_limit ,
1396+ ctx -> parsed_row_count - ctx -> parsed_deleted_row_count );
13161397 ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
13171398 }
13181399 goto cleanup ;
0 commit comments