22# no automatic type inference is done, but types are allowed to be passed
33# for as many columns as desired; `CSV.detect(row, i)` can also be used to
44# use the same inference logic used in `CSV.File` for determing a cell's typed value
5- struct Rows{transpose, O, O2, IO, T, V}
5+ struct Rows{transpose, O, O2, IO, T, V, F }
66 name:: String
77 names:: Vector{Symbol} # only includes "select"ed columns
88 finaltypes:: Vector{Type} # only includes "select"ed columns
@@ -23,6 +23,8 @@ struct Rows{transpose, O, O2, IO, T, V}
2323 reusebuffer:: Bool
2424 columns:: Vector{AbstractVector} # for parsing, allocated once and used for each iteration
2525 values:: Vector{V} # once values are parsed, put in values; allocated on each iteration if reusebuffer=false
26+ filter:: F
27+ codes:: Vector{Int16}
2628 lookup:: Dict{Symbol, Int}
2729end
2830
@@ -111,6 +113,7 @@ function Rows(source;
111113 ignoreemptylines:: Bool = false ,
112114 select= nothing ,
113115 drop= nothing ,
116+ filter= nothing ,
114117 # parsing options
115118 missingstrings= String[],
116119 missingstring= " " ,
@@ -142,13 +145,14 @@ function Rows(source;
142145 h = Header (source, header, normalizenames, datarow, skipto, footerskip, limit, transpose, comment, use_mmap, ignoreemptylines, false , select, drop, missingstrings, missingstring, delim, ignorerepeated, quotechar, openquotechar, closequotechar, escapechar, dateformat, dateformats, decimal, truestrings, falsestrings, type, types, typemap, categorical, pool, lazystrings, strict, silencewarnings, debug, parsingdebug, true )
143146 columns = allocate (1 , h. cols, h. types, h. flags)
144147 values = all (x-> x == Union{String, Missing}, h. types) && lazystrings ? Vector {PosLen} (undef, h. cols) : Vector {Any} (undef, h. cols)
148+ codes = filter === nothing ? EMPTY_CODES : zeros (Int16, h. cols)
145149 finaltypes = copy (h. types)
146150 columnmap = [i for i = 1 : h. cols]
147151 deleteat! (h. names, h. todrop)
148152 deleteat! (finaltypes, h. todrop)
149153 deleteat! (columnmap, h. todrop)
150154 lookup = Dict (nm=> i for (i, nm) in enumerate (h. names))
151- return Rows {transpose, typeof(h.options), typeof(h.coloptions), typeof(h.buf), typeof(h.customtypes), eltype(values)} (
155+ return Rows {transpose, typeof(h.options), typeof(h.coloptions), typeof(h.buf), typeof(h.customtypes), eltype(values), typeof(filter) } (
152156 h. name,
153157 h. names,
154158 finaltypes,
@@ -169,6 +173,8 @@ function Rows(source;
169173 reusebuffer,
170174 columns,
171175 values,
176+ filter,
177+ codes,
172178 lookup,
173179 )
174180end
214220@inline function Base. iterate (r:: Rows{transpose, O, O2, IO, T, V} , (pos, len, row)= (r. datapos, r. len, 1 )) where {transpose, O, O2, IO, T, V}
215221 (pos > len || row > r. limit) && return nothing
216222 pos > len && return nothing
217- pos = parserow (1 , Val (transpose), r. cols, EMPTY_TYPEMAP, r. columns, r. datapos, r. buf, pos, len, r. positions, 0.0 , EMPTY_REFS, 1 , r. datarow + row - 2 , r. types, r. flags, false , r. options, r. coloptions, T)
223+ pos = parserow (1 , Val (transpose), r. cols, EMPTY_TYPEMAP, r. columns, r. datapos, r. buf, pos, len, r. positions, 0.0 , EMPTY_REFS, 1 , r. datarow + row - 2 , r. types, r. flags, r . filter, r . names, r . codes, false , r. options, r. coloptions, T)
218224 cols = r. cols
219225 values = r. reusebuffer ? r. values : Vector {V} (undef, cols)
220226 columns = r. columns
0 commit comments