Skip to content

Commit bf6dfae

Browse files
committed
Format files using DocumentFormat
1 parent 84f29ec commit bf6dfae

1 file changed

Lines changed: 110 additions & 40 deletions

File tree

src/ReadStat.jl

Lines changed: 110 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,23 @@ module ReadStat
22

33
using ReadStat_jll
44

5-
# #############################################################################
6-
# #
7-
# # Import
8-
# #
9-
# #############################################################################
5+
##############################################################################
6+
##
7+
## Import
8+
##
9+
##############################################################################
1010

1111
using DataValues: DataValueVector
12+
import DataValues
1213
using Dates
1314

1415
export ReadStatDataFrame, read_dta, read_sav, read_por, read_sas7bdat
1516

16-
# #############################################################################
17-
# #
18-
# # Julia types that mirror C types
19-
# #
20-
# #############################################################################
17+
##############################################################################
18+
##
19+
## Julia types that mirror C types
20+
##
21+
##############################################################################
2122

2223
const READSTAT_TYPE_STRING = Cint(0)
2324
const READSTAT_TYPE_CHAR = Cint(1)
@@ -33,11 +34,11 @@ const READSTAT_ERROR_MALLOC = Cint(3)
3334
const READSTAT_ERROR_USER_ABORT = Cint(4)
3435
const READSTAT_ERROR_PARSE = Cint(5)
3536

36-
# #############################################################################
37-
# #
38-
# # Pure Julia types
39-
# #
40-
# #############################################################################
37+
##############################################################################
38+
##
39+
## Pure Julia types
40+
##
41+
##############################################################################
4142

4243
struct ReadStatValue
4344
union::Int64
@@ -68,19 +69,21 @@ mutable struct ReadStatDataFrame
6869
filelabel::String
6970
timestamp::DateTime
7071
format::Clong
72+
types_as_int::Vector{Cint}
73+
hasmissings::Vector{Bool}
7174

7275
ReadStatDataFrame() =
7376
new(Any[], Symbol[], DataType[], String[], String[], Csize_t[], Cint[], Cint[],
74-
String[], Dict{String,Dict{Any,String}}(), 0, 0, "", Dates.unix2datetime(0), 0)
77+
String[], Dict{String,Dict{Any,String}}(), 0, 0, "", Dates.unix2datetime(0), 0, Cint[], Bool[])
7578
end
7679

7780
include("C_interface.jl")
7881

79-
# #############################################################################
80-
# #
81-
# # Julia functions
82-
# #
83-
# #############################################################################
82+
##############################################################################
83+
##
84+
## Julia functions
85+
##
86+
##############################################################################
8487

8588
function handle_info!(obs_count::Cint, var_count::Cint, ds_ptr::Ptr{ReadStatDataFrame})
8689
ds = unsafe_pointer_to_objref(ds_ptr)
@@ -139,15 +142,19 @@ get_alignment(variable::Ptr{Nothing}) = readstat_variable_get_measure(variable)
139142
function handle_variable!(var_index::Cint, variable::Ptr{Nothing},
140143
val_label::Cstring, ds_ptr::Ptr{ReadStatDataFrame})
141144
col = var_index + 1
142-
ds = unsafe_pointer_to_objref(ds_ptr)
145+
ds = unsafe_pointer_to_objref(ds_ptr)::ReadStatDataFrame
146+
147+
missing_count = readstat_variable_get_missing_ranges_count(variable)
143148

144149
push!(ds.val_label_keys, (val_label == C_NULL ? "" : unsafe_string(val_label)))
145150
push!(ds.headers, get_name(variable))
146151
push!(ds.labels, get_label(variable))
147152
push!(ds.formats, get_format(variable))
148153
jtype = get_type(variable)
149154
push!(ds.types, jtype)
150-
push!(ds.data, DataValueVector{jtype}(ds.rows))
155+
push!(ds.types_as_int, readstat_variable_get_type(variable))
156+
push!(ds.hasmissings, missing_count > 0)
157+
push!(ds.data, DataValueVector{jtype}(Vector{jtype}(undef, ds.rows), fill(false, ds.rows)))
151158
push!(ds.storagewidths, get_storagewidth(variable))
152159
push!(ds.measures, get_measure(variable))
153160
push!(ds.alignments, get_alignment(variable))
@@ -173,40 +180,103 @@ as_native(val::Value) = convert(get_type(val), val)
173180

174181
function handle_value!(obs_index::Cint, variable::Ptr{Nothing},
175182
value::ReadStatValue, ds_ptr::Ptr{ReadStatDataFrame})
176-
ds = unsafe_pointer_to_objref(ds_ptr)
177-
var_index = readstat_variable_get_index(variable)
178-
if !readstat_value_is_missing(value, variable)
179-
readfield!(ds.data[var_index + 1], obs_index + 1, value)
183+
ds = unsafe_pointer_to_objref(ds_ptr)::ReadStatDataFrame
184+
var_index = readstat_variable_get_index(variable) + 1
185+
data = ds.data
186+
@inbounds type_as_int = ds.types_as_int[var_index]
187+
188+
ismissing = if @inbounds(ds.hasmissings[var_index])
189+
readstat_value_is_missing(value, variable)
190+
else
191+
readstat_value_is_missing(value, C_NULL)
192+
end
193+
194+
if type_as_int == READSTAT_TYPE_DOUBLE
195+
col_float64 = data[var_index]::DataValueVector{Float64}
196+
197+
if ismissing
198+
DataValues.unsafe_setindex_isna!(col_float64, true, obs_index + 1)
199+
else
200+
readfield!(col_float64, obs_index + 1, value)
201+
end
202+
elseif type_as_int == READSTAT_TYPE_INT32
203+
col_int32 = data[var_index]::DataValueVector{Int32}
204+
205+
if ismissing
206+
DataValues.unsafe_setindex_isna!(col_int32, true, obs_index + 1)
207+
else
208+
readfield!(col_int32, obs_index + 1, value)
209+
end
210+
elseif type_as_int == READSTAT_TYPE_STRING
211+
col_string = data[var_index]::DataValueVector{String}
212+
213+
if ismissing
214+
DataValues.unsafe_setindex_isna!(col_string, true, obs_index + 1)
215+
else
216+
readfield!(col_string, obs_index + 1, value)
217+
end
218+
elseif type_as_int == READSTAT_TYPE_CHAR
219+
col_int8 = data[var_index]::DataValueVector{Int8}
220+
221+
if ismissing
222+
DataValues.unsafe_setindex_isna!(col_int8, true, obs_index + 1)
223+
else
224+
readfield!(col_int8, obs_index + 1, value)
225+
end
226+
elseif type_as_int == READSTAT_TYPE_INT16
227+
col_int16 = data[var_index]::DataValueVector{Int16}
228+
229+
if ismissing
230+
DataValues.unsafe_setindex_isna!(col_int16, true, obs_index + 1)
231+
else
232+
readfield!(col_int16, obs_index + 1, value)
233+
end
234+
elseif type_as_int == READSTAT_TYPE_FLOAT
235+
col_float32 = data[var_index]::DataValueVector{Float32}
236+
237+
if ismissing
238+
DataValues.unsafe_setindex_isna!(col_float32, true, obs_index + 1)
239+
else
240+
readfield!(col_float32, obs_index + 1, value)
241+
end
242+
else
243+
col_untyped = data[var_index]
244+
245+
if ismissing
246+
DataValues.unsafe_setindex_isna!(col_untyped, true, obs_index + 1)
247+
else
248+
readfield!(col_untyped, obs_index + 1, value)
249+
end
180250
end
181251

182252
return Cint(0)
183253
end
184254

185-
function readfield!(dest::DataValueVector{String}, row, val::Value)
186-
ptr = ccall((:readstat_string_value, libreadstat), Cstring, (Value,), val)
255+
function readfield!(dest::DataValueVector{String}, row, val::ReadStatValue)
256+
ptr = ccall((:readstat_string_value, libreadstat), Cstring, (ReadStatValue,), val)
187257
if ptr C_NULL
188-
@inbounds dest[row] = unsafe_string(ptr)
258+
@inbounds DataValues.unsafe_setindex_value!(dest, unsafe_string(ptr), row)
189259
end
190260
end
191261

192-
function readfield!(dest::DataValueVector{Int8}, row, val::Value)
193-
@inbounds dest[row] = ccall((:readstat_int8_value, libreadstat), Int8, (Value,), val)
262+
function readfield!(dest::DataValueVector{Int8}, row, val::ReadStatValue)
263+
@inbounds DataValues.unsafe_setindex_value!(dest, ccall((:readstat_int8_value, libreadstat), Int8, (ReadStatValue,), val), row)
194264
end
195265

196-
function readfield!(dest::DataValueVector{Int16}, row, val::Value)
197-
@inbounds dest[row] = ccall((:readstat_int16_value, libreadstat), Int16, (Value,), val)
266+
function readfield!(dest::DataValueVector{Int16}, row, val::ReadStatValue)
267+
@inbounds DataValues.unsafe_setindex_value!(dest, ccall((:readstat_int16_value, libreadstat), Int16, (ReadStatValue,), val), row)
198268
end
199269

200-
function readfield!(dest::DataValueVector{Int32}, row, val::Value)
201-
@inbounds dest[row] = ccall((:readstat_int32_value, libreadstat), Int32, (Value,), val)
270+
function readfield!(dest::DataValueVector{Int32}, row, val::ReadStatValue)
271+
@inbounds DataValues.unsafe_setindex_value!(dest, ccall((:readstat_int32_value, libreadstat), Int32, (ReadStatValue,), val), row)
202272
end
203273

204-
function readfield!(dest::DataValueVector{Float64}, row, val::Value)
205-
@inbounds dest[row] = ccall((:readstat_double_value, libreadstat), Float64, (Value,), val)
274+
function readfield!(dest::DataValueVector{Float64}, row, val::ReadStatValue)
275+
@inbounds DataValues.unsafe_setindex_value!(dest, ccall((:readstat_double_value, libreadstat), Float64, (ReadStatValue,), val), row)
206276
end
207277

208-
function readfield!(dest::DataValueVector{Float32}, row, val::Value)
209-
@inbounds dest[row] = ccall((:readstat_float_value, libreadstat), Float32, (Value,), val)
278+
function readfield!(dest::DataValueVector{Float32}, row, val::ReadStatValue)
279+
@inbounds DataValues.unsafe_setindex_value!(dest, ccall((:readstat_float_value, libreadstat), Float32, (ReadStatValue,), val), row)
210280
end
211281

212282
function handle_value_label!(val_labels::Cstring, value::Value, label::Cstring, ds_ptr::Ptr{ReadStatDataFrame})

0 commit comments

Comments
 (0)