@@ -2,22 +2,23 @@ module ReadStat
22
33using ReadStat_jll
44
5- # #############################################################################
6- # #
7- # # Import
8- # #
9- # #############################################################################
5+ # #############################################################################
6+ # #
7+ # # Import
8+ # #
9+ # #############################################################################
1010
1111using DataValues: DataValueVector
12+ import DataValues
1213using Dates
1314
1415export ReadStatDataFrame, read_dta, read_sav, read_por, read_sas7bdat
1516
16- # #############################################################################
17- # #
18- # # Julia types that mirror C types
19- # #
20- # #############################################################################
17+ # #############################################################################
18+ # #
19+ # # Julia types that mirror C types
20+ # #
21+ # #############################################################################
2122
2223const READSTAT_TYPE_STRING = Cint (0 )
2324const READSTAT_TYPE_CHAR = Cint (1 )
@@ -33,11 +34,11 @@ const READSTAT_ERROR_MALLOC = Cint(3)
3334const READSTAT_ERROR_USER_ABORT = Cint (4 )
3435const READSTAT_ERROR_PARSE = Cint (5 )
3536
36- # #############################################################################
37- # #
38- # # Pure Julia types
39- # #
40- # #############################################################################
37+ # #############################################################################
38+ # #
39+ # # Pure Julia types
40+ # #
41+ # #############################################################################
4142
4243struct ReadStatValue
4344 union:: Int64
@@ -68,19 +69,21 @@ mutable struct ReadStatDataFrame
6869 filelabel:: String
6970 timestamp:: DateTime
7071 format:: Clong
72+ types_as_int:: Vector{Cint}
73+ hasmissings:: Vector{Bool}
7174
7275 ReadStatDataFrame () =
7376 new (Any[], Symbol[], DataType[], String[], String[], Csize_t[], Cint[], Cint[],
74- String[], Dict {String,Dict{Any,String}} (), 0 , 0 , " " , Dates. unix2datetime (0 ), 0 )
77+ String[], Dict {String,Dict{Any,String}} (), 0 , 0 , " " , Dates. unix2datetime (0 ), 0 , Cint[], Bool[] )
7578end
7679
7780include (" C_interface.jl" )
7881
79- # #############################################################################
80- # #
81- # # Julia functions
82- # #
83- # #############################################################################
82+ # #############################################################################
83+ # #
84+ # # Julia functions
85+ # #
86+ # #############################################################################
8487
8588function handle_info! (obs_count:: Cint , var_count:: Cint , ds_ptr:: Ptr{ReadStatDataFrame} )
8689 ds = unsafe_pointer_to_objref (ds_ptr)
@@ -139,15 +142,19 @@ get_alignment(variable::Ptr{Nothing}) = readstat_variable_get_measure(variable)
139142function handle_variable! (var_index:: Cint , variable:: Ptr{Nothing} ,
140143 val_label:: Cstring , ds_ptr:: Ptr{ReadStatDataFrame} )
141144 col = var_index + 1
142- ds = unsafe_pointer_to_objref (ds_ptr)
145+ ds = unsafe_pointer_to_objref (ds_ptr):: ReadStatDataFrame
146+
147+ missing_count = readstat_variable_get_missing_ranges_count (variable)
143148
144149 push! (ds. val_label_keys, (val_label == C_NULL ? " " : unsafe_string (val_label)))
145150 push! (ds. headers, get_name (variable))
146151 push! (ds. labels, get_label (variable))
147152 push! (ds. formats, get_format (variable))
148153 jtype = get_type (variable)
149154 push! (ds. types, jtype)
150- push! (ds. data, DataValueVector {jtype} (ds. rows))
155+ push! (ds. types_as_int, readstat_variable_get_type (variable))
156+ push! (ds. hasmissings, missing_count > 0 )
157+ push! (ds. data, DataValueVector {jtype} (Vector {jtype} (undef, ds. rows), fill (false , ds. rows)))
151158 push! (ds. storagewidths, get_storagewidth (variable))
152159 push! (ds. measures, get_measure (variable))
153160 push! (ds. alignments, get_alignment (variable))
@@ -173,40 +180,103 @@ as_native(val::Value) = convert(get_type(val), val)
173180
174181function handle_value! (obs_index:: Cint , variable:: Ptr{Nothing} ,
175182 value:: ReadStatValue , ds_ptr:: Ptr{ReadStatDataFrame} )
176- ds = unsafe_pointer_to_objref (ds_ptr)
177- var_index = readstat_variable_get_index (variable)
178- if ! readstat_value_is_missing (value, variable)
179- readfield! (ds. data[var_index + 1 ], obs_index + 1 , value)
183+ ds = unsafe_pointer_to_objref (ds_ptr):: ReadStatDataFrame
184+ var_index = readstat_variable_get_index (variable) + 1
185+ data = ds. data
186+ @inbounds type_as_int = ds. types_as_int[var_index]
187+
188+ ismissing = if @inbounds (ds. hasmissings[var_index])
189+ readstat_value_is_missing (value, variable)
190+ else
191+ readstat_value_is_missing (value, C_NULL )
192+ end
193+
194+ if type_as_int == READSTAT_TYPE_DOUBLE
195+ col_float64 = data[var_index]:: DataValueVector{Float64}
196+
197+ if ismissing
198+ DataValues. unsafe_setindex_isna! (col_float64, true , obs_index + 1 )
199+ else
200+ readfield! (col_float64, obs_index + 1 , value)
201+ end
202+ elseif type_as_int == READSTAT_TYPE_INT32
203+ col_int32 = data[var_index]:: DataValueVector{Int32}
204+
205+ if ismissing
206+ DataValues. unsafe_setindex_isna! (col_int32, true , obs_index + 1 )
207+ else
208+ readfield! (col_int32, obs_index + 1 , value)
209+ end
210+ elseif type_as_int == READSTAT_TYPE_STRING
211+ col_string = data[var_index]:: DataValueVector{String}
212+
213+ if ismissing
214+ DataValues. unsafe_setindex_isna! (col_string, true , obs_index + 1 )
215+ else
216+ readfield! (col_string, obs_index + 1 , value)
217+ end
218+ elseif type_as_int == READSTAT_TYPE_CHAR
219+ col_int8 = data[var_index]:: DataValueVector{Int8}
220+
221+ if ismissing
222+ DataValues. unsafe_setindex_isna! (col_int8, true , obs_index + 1 )
223+ else
224+ readfield! (col_int8, obs_index + 1 , value)
225+ end
226+ elseif type_as_int == READSTAT_TYPE_INT16
227+ col_int16 = data[var_index]:: DataValueVector{Int16}
228+
229+ if ismissing
230+ DataValues. unsafe_setindex_isna! (col_int16, true , obs_index + 1 )
231+ else
232+ readfield! (col_int16, obs_index + 1 , value)
233+ end
234+ elseif type_as_int == READSTAT_TYPE_FLOAT
235+ col_float32 = data[var_index]:: DataValueVector{Float32}
236+
237+ if ismissing
238+ DataValues. unsafe_setindex_isna! (col_float32, true , obs_index + 1 )
239+ else
240+ readfield! (col_float32, obs_index + 1 , value)
241+ end
242+ else
243+ col_untyped = data[var_index]
244+
245+ if ismissing
246+ DataValues. unsafe_setindex_isna! (col_untyped, true , obs_index + 1 )
247+ else
248+ readfield! (col_untyped, obs_index + 1 , value)
249+ end
180250 end
181251
182252 return Cint (0 )
183253end
184254
185- function readfield! (dest:: DataValueVector{String} , row, val:: Value )
186- ptr = ccall ((:readstat_string_value , libreadstat), Cstring, (Value ,), val)
255+ function readfield! (dest:: DataValueVector{String} , row, val:: ReadStatValue )
256+ ptr = ccall ((:readstat_string_value , libreadstat), Cstring, (ReadStatValue ,), val)
187257 if ptr ≠ C_NULL
188- @inbounds dest[row] = unsafe_string (ptr)
258+ @inbounds DataValues . unsafe_setindex_value! ( dest, unsafe_string (ptr), row )
189259 end
190260end
191261
192- function readfield! (dest:: DataValueVector{Int8} , row, val:: Value )
193- @inbounds dest[row] = ccall ((:readstat_int8_value , libreadstat), Int8, (Value ,), val)
262+ function readfield! (dest:: DataValueVector{Int8} , row, val:: ReadStatValue )
263+ @inbounds DataValues . unsafe_setindex_value! ( dest, ccall ((:readstat_int8_value , libreadstat), Int8, (ReadStatValue ,), val), row )
194264end
195265
196- function readfield! (dest:: DataValueVector{Int16} , row, val:: Value )
197- @inbounds dest[row] = ccall ((:readstat_int16_value , libreadstat), Int16, (Value ,), val)
266+ function readfield! (dest:: DataValueVector{Int16} , row, val:: ReadStatValue )
267+ @inbounds DataValues . unsafe_setindex_value! ( dest, ccall ((:readstat_int16_value , libreadstat), Int16, (ReadStatValue ,), val), row )
198268end
199269
200- function readfield! (dest:: DataValueVector{Int32} , row, val:: Value )
201- @inbounds dest[row] = ccall ((:readstat_int32_value , libreadstat), Int32, (Value ,), val)
270+ function readfield! (dest:: DataValueVector{Int32} , row, val:: ReadStatValue )
271+ @inbounds DataValues . unsafe_setindex_value! ( dest, ccall ((:readstat_int32_value , libreadstat), Int32, (ReadStatValue ,), val), row )
202272end
203273
204- function readfield! (dest:: DataValueVector{Float64} , row, val:: Value )
205- @inbounds dest[row] = ccall ((:readstat_double_value , libreadstat), Float64, (Value ,), val)
274+ function readfield! (dest:: DataValueVector{Float64} , row, val:: ReadStatValue )
275+ @inbounds DataValues . unsafe_setindex_value! ( dest, ccall ((:readstat_double_value , libreadstat), Float64, (ReadStatValue ,), val), row )
206276end
207277
208- function readfield! (dest:: DataValueVector{Float32} , row, val:: Value )
209- @inbounds dest[row] = ccall ((:readstat_float_value , libreadstat), Float32, (Value ,), val)
278+ function readfield! (dest:: DataValueVector{Float32} , row, val:: ReadStatValue )
279+ @inbounds DataValues . unsafe_setindex_value! ( dest, ccall ((:readstat_float_value , libreadstat), Float32, (ReadStatValue ,), val), row )
210280end
211281
212282function handle_value_label! (val_labels:: Cstring , value:: Value , label:: Cstring , ds_ptr:: Ptr{ReadStatDataFrame} )
0 commit comments