@@ -42,6 +42,7 @@ typedef struct sas7bdat_ctx_s {
4242 readstat_io_t * io ;
4343 int bswap ;
4444 int did_submit_columns ;
45+ int requires_seek ;
4546
4647 uint32_t row_length ;
4748 uint32_t page_row_count ;
@@ -919,7 +920,7 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
919920 if ((retval = sas7bdat_parse_subheader_pointer (shp , page + page_size - shp , & shp_info , ctx )) != READSTAT_OK ) {
920921 goto cleanup ;
921922 }
922- if (shp_info .len > 0 && shp_info .compression != SAS_COMPRESSION_TRUNC ) {
923+ if (shp_info .len > 0 && shp_info .compression != SAS_COMPRESSION_TRUNC && shp_info . compression != SAS_COMPRESSION_MOVED ) {
923924 if ((retval = sas7bdat_validate_subheader_pointer (& shp_info , page_size , subheader_count , ctx )) != READSTAT_OK ) {
924925 goto cleanup ;
925926 }
@@ -931,7 +932,8 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
931932 goto cleanup ;
932933 }
933934 }
934- } else if (shp_info .compression == SAS_COMPRESSION_ROW ) {
935+ } else if (shp_info .compression == SAS_COMPRESSION_ROW || shp_info .compression == SAS_COMPRESSION_MOVED_ROW ||
936+ shp_info .compression == SAS_COMPRESSION_MYSTERY ) {
935937 /* void */
936938 } else {
937939 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
@@ -947,6 +949,83 @@ static readstat_error_t sas7bdat_parse_page_pass1(const char *page, size_t page_
947949 return retval ;
948950}
949951
952+ static readstat_error_t sas7bdat_parse_moved_row (uint64_t page_index , uint64_t subheader_index , sas7bdat_ctx_t * ctx ) {
953+ readstat_error_t retval = READSTAT_OK ;
954+ readstat_io_t * io = ctx -> io ;
955+
956+ const uint64_t page_size = ctx -> page_size ;
957+ char * page = NULL ;
958+
959+ if (page_index >= ctx -> page_count ) {
960+ retval = READSTAT_ERROR_PARSE ;
961+ goto cleanup ;
962+ }
963+
964+ ctx -> requires_seek = 1 ;
965+ if (io -> seek (ctx -> header_size + page_index * page_size , READSTAT_SEEK_SET , io -> io_ctx ) == -1 ) {
966+ retval = READSTAT_ERROR_SEEK ;
967+ if (ctx -> handle .error ) {
968+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Failed to seek to position %" PRId64
969+ " (= %" PRId64 " + %" PRId64 "*%" PRId64 ")" ,
970+ ctx -> header_size + page_index * page_size , ctx -> header_size , page_index , page_size );
971+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
972+ }
973+ goto cleanup ;
974+ }
975+ if ((page = readstat_malloc (page_size )) == NULL ) {
976+ retval = READSTAT_ERROR_MALLOC ;
977+ goto cleanup ;
978+ }
979+ if (io -> read (page , page_size , io -> io_ctx ) < page_size ) {
980+ retval = READSTAT_ERROR_READ ;
981+ goto cleanup ;
982+ }
983+
984+ uint16_t page_type = sas_read2 (& page [ctx -> page_header_size - 8 ], ctx -> bswap );
985+ if ((page_type & SAS_PAGE_TYPE_MASK ) == SAS_PAGE_TYPE_DATA || page_type & SAS_PAGE_TYPE_COMP ) {
986+ retval = READSTAT_ERROR_READ ;
987+ goto cleanup ;
988+ }
989+ uint16_t subheader_count = sas_read2 (& page [ctx -> page_header_size - 4 ], ctx -> bswap );
990+ if (subheader_index >= subheader_count ) {
991+ retval = READSTAT_ERROR_READ ;
992+ goto cleanup ;
993+ }
994+ uint64_t shp_offset = ctx -> page_header_size + subheader_index * ctx -> subheader_pointer_size ;
995+ if (shp_offset + ctx -> subheader_pointer_size >= page_size ) {
996+ retval = READSTAT_ERROR_READ ;
997+ goto cleanup ;
998+ }
999+
1000+ const char * shp = & page [shp_offset ];
1001+ subheader_pointer_t shp_info = { 0 };
1002+ if ((retval = sas7bdat_parse_subheader_pointer (shp , page + page_size - shp , & shp_info , ctx )) != READSTAT_OK ) {
1003+ goto cleanup ;
1004+ }
1005+ if ((retval = sas7bdat_validate_subheader_pointer (& shp_info , page_size , subheader_count , ctx )) != READSTAT_OK ) {
1006+ goto cleanup ;
1007+ }
1008+ if (shp_info .compression != SAS_COMPRESSION_MOVED_ROW ) {
1009+ retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
1010+ goto cleanup ;
1011+ }
1012+
1013+ if ((retval = sas7bdat_submit_columns_if_needed (ctx , 1 )) != READSTAT_OK ) {
1014+ goto cleanup ;
1015+ }
1016+ if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
1017+ goto cleanup ;
1018+ }
1019+
1020+ cleanup :
1021+
1022+ if (page ) {
1023+ free (page );
1024+ }
1025+
1026+ return retval ;
1027+ }
1028+
9501029static readstat_error_t sas7bdat_parse_page_pass2 (const char * page , size_t page_size , sas7bdat_ctx_t * ctx ) {
9511030 uint16_t page_type ;
9521031
@@ -976,7 +1055,13 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
9761055 if ((retval = sas7bdat_parse_subheader_pointer (shp , page + page_size - shp , & shp_info , ctx )) != READSTAT_OK ) {
9771056 goto cleanup ;
9781057 }
979- if (shp_info .len > 0 && shp_info .compression != SAS_COMPRESSION_TRUNC ) {
1058+ if (shp_info .len > 0 && shp_info .compression == SAS_COMPRESSION_MOVED ) {
1059+ uint64_t page_index = shp_info .offset - 1 ;
1060+ uint64_t subheader_index = shp_info .len - 1 ;
1061+ if ((retval = sas7bdat_parse_moved_row (page_index , subheader_index , ctx )) != READSTAT_OK ) {
1062+ goto cleanup ;
1063+ }
1064+ } else if (shp_info .len > 0 && shp_info .compression != SAS_COMPRESSION_TRUNC ) {
9801065 if ((retval = sas7bdat_validate_subheader_pointer (& shp_info , page_size , subheader_count , ctx )) != READSTAT_OK ) {
9811066 goto cleanup ;
9821067 }
@@ -1007,6 +1092,8 @@ static readstat_error_t sas7bdat_parse_page_pass2(const char *page, size_t page_
10071092 if ((retval = sas7bdat_parse_subheader_compressed (page + shp_info .offset , shp_info .len , ctx )) != READSTAT_OK ) {
10081093 goto cleanup ;
10091094 }
1095+ } else if (shp_info .compression == SAS_COMPRESSION_MOVED_ROW || shp_info .compression == SAS_COMPRESSION_MYSTERY ) {
1096+ /* void */
10101097 } else {
10111098 retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION ;
10121099 goto cleanup ;
@@ -1180,6 +1267,19 @@ static readstat_error_t sas7bdat_parse_all_pages_pass2(sas7bdat_ctx_t *ctx) {
11801267 if ((retval = sas7bdat_update_progress (ctx )) != READSTAT_OK ) {
11811268 goto cleanup ;
11821269 }
1270+ if (ctx -> requires_seek ) {
1271+ if (io -> seek (ctx -> header_size + i * ctx -> page_size , READSTAT_SEEK_SET , io -> io_ctx ) == -1 ) {
1272+ retval = READSTAT_ERROR_SEEK ;
1273+ if (ctx -> handle .error ) {
1274+ snprintf (ctx -> error_buf , sizeof (ctx -> error_buf ), "ReadStat: Failed to seek to position %" PRId64
1275+ " (= %" PRId64 " + %" PRId64 "*%" PRId64 ")" ,
1276+ ctx -> header_size + i * ctx -> page_size , ctx -> header_size , i , ctx -> page_size );
1277+ ctx -> handle .error (ctx -> error_buf , ctx -> user_ctx );
1278+ }
1279+ goto cleanup ;
1280+ }
1281+ ctx -> requires_seek = 0 ;
1282+ }
11831283 if (io -> read (ctx -> page , ctx -> page_size , io -> io_ctx ) < ctx -> page_size ) {
11841284 retval = READSTAT_ERROR_READ ;
11851285 goto cleanup ;
0 commit comments