Skip to content

Commit c97af6f

Browse files
authored
Add more arg types in Map transform (#305)
* Add more arg types in Map transform * Fix show test * Improve docstring * Improve docstring
1 parent ccd0a29 commit c97af6f

3 files changed

Lines changed: 65 additions & 24 deletions

File tree

src/transforms/map.jl

Lines changed: 33 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,25 @@
22
# Licensed under the MIT License. See LICENSE in the project root.
33
# ------------------------------------------------------------------
44

5-
const TargetName = Union{Symbol,AbstractString}
6-
const PairWithTarget = Pair{<:Any,<:Pair{<:Function,<:TargetName}}
7-
const PairWithoutTarget = Pair{<:Any,<:Function}
8-
const MapPair = Union{PairWithTarget,PairWithoutTarget}
5+
# supported argument types
6+
const Callable = Union{Function,Type}
7+
const Target = Union{Symbol,AbstractString}
8+
const ColsCallableTarget = Pair{<:Any,<:Pair{<:Callable,<:Target}}
9+
const ColsCallable = Pair{<:Any,<:Callable}
10+
const CallableTarget = Pair{<:Callable,<:Target}
11+
const MapArg = Union{ColsCallableTarget,ColsCallable,CallableTarget,Callable}
912

1013
"""
1114
Map(cols₁ => fun₁ => target₁, cols₂ => fun₂, ..., colsₙ => funₙ => targetₙ)
1215
1316
Applies the `funᵢ` function to the columns selected by `colsᵢ` using
1417
the `map` function and saves the result in a new column named `targetᵢ`.
18+
Types are also allowed in place of functions to construct objects with
19+
arguments from the columns.
1520
1621
The column selection can be a single column identifier (index or name),
17-
a collection of identifiers or a regular expression (regex).
22+
a collection of identifiers or a regular expression (regex). It can also
23+
be ommited to apply the function to all columns.
1824
1925
Passing a target column name is optional and when omitted a new name
2026
is generated by joining the function name with the selected column names.
@@ -28,11 +34,14 @@ Map([2, 3] => ((b, c) -> 2b + c))
2834
Map([:a, :c] => ((a, c) -> 2a * 3c) => :col1)
2935
Map(["c", "a"] => ((c, a) -> 3c / a) => :col1, "c" => tan)
3036
Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
37+
Map(sin => "seno")
38+
Map(cos)
3139
```
3240
3341
## Notes
3442
35-
* Anonymous functions must be passed with parentheses as in the examples above;
43+
* Anonymous functions must be passed with parentheses as in the examples above
44+
3645
* Some function names are treated in a special way, they are:
3746
* Anonymous functions: `#1` -> `f1`;
3847
* Composed functions: `outer ∘ inner` -> `outer_inner`;
@@ -41,20 +50,20 @@ Map(r"[abc]" => ((a, b, c) -> a^2 - 2b + c) => "col1")
4150
"""
4251
struct Map <: StatelessFeatureTransform
4352
selectors::Vector{ColumnSelector}
44-
funs::Vector{Function}
53+
funs::Vector{Callable}
4554
targets::Vector{Union{Nothing,Symbol}}
4655
end
4756

48-
Map() = throw(ArgumentError("cannot create Map transform without arguments"))
49-
50-
function Map(pairs::MapPair...)
51-
tuples = map(_extract, pairs)
52-
selectors = [t[1] for t in tuples]
53-
funs = [t[2] for t in tuples]
54-
targets = [t[3] for t in tuples]
55-
Map(selectors, funs, targets)
57+
function Map(args::MapArg...)
58+
tups = map(_extract, args)
59+
sels = [t[1] for t in tups]
60+
funs = [t[2] for t in tups]
61+
tars = [t[3] for t in tups]
62+
Map(sels, funs, tars)
5663
end
5764

65+
Map() = throw(ArgumentError("cannot create Map transform without arguments"))
66+
5867
function applyfeat(transform::Map, feat, prep)
5968
cols = Tables.columns(feat)
6069
names = Tables.columnnames(cols)
@@ -76,13 +85,10 @@ function applyfeat(transform::Map, feat, prep)
7685
newfeat, nothing
7786
end
7887

79-
_extract(p::PairWithTarget) = selector(first(p)), first(last(p)), Symbol(last(last(p)))
80-
_extract(p::PairWithoutTarget) = selector(first(p)), last(p), nothing
81-
82-
_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
83-
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
84-
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
85-
_funname(fun) = string(fun)
88+
_extract(arg::ColsCallableTarget) = selector(first(arg)), first(last(arg)), Symbol(last(last(arg)))
89+
_extract(arg::ColsCallable) = selector(first(arg)), last(arg), nothing
90+
_extract(arg::CallableTarget) = AllSelector(), first(arg), Symbol(last(arg))
91+
_extract(arg::Callable) = AllSelector(), arg, nothing
8692

8793
function _makename(snames, fun)
8894
funname = _funname(fun)
@@ -91,3 +97,8 @@ function _makename(snames, fun)
9197
end
9298
Symbol(funname, :_, join(snames, "_"))
9399
end
100+
101+
_funname(fun::Base.Fix1) = "fix1_" * _funname(fun.f)
102+
_funname(fun::Base.Fix2) = "fix2_" * _funname(fun.f)
103+
_funname(fun::ComposedFunction) = _funname(fun.outer) * "_" * _funname(fun.inner)
104+
_funname(fun) = string(fun)

test/shows.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,14 +279,14 @@
279279
# compact mode
280280
iostr = sprint(show, T)
281281
@test iostr ==
282-
"Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Function[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])"
282+
"Map(selectors: ColumnSelector[:a, [:a, :b]], funs: Union{Function, Type}[sin, $(nameof(fun))], targets: Union{Nothing, Symbol}[nothing, :c])"
283283

284284
# full mode
285285
iostr = sprint(show, MIME("text/plain"), T)
286286
@test iostr == """
287287
Map transform
288288
├─ selectors: ColumnSelectors.ColumnSelector[:a, [:a, :b]]
289-
├─ funs: Function[sin, $(typeof(fun))()]
289+
├─ funs: Union{Function, Type}[sin, $(typeof(fun))()]
290290
└─ targets: Union{Nothing, Symbol}[nothing, :c]"""
291291
end
292292

test/transforms/map.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,36 @@
9696
@test Tables.schema(n).names == (:fix2_hypot_a,)
9797
@test n.fix2_hypot_a == f.(t.a)
9898

99+
# function and target
100+
f = (a, b, c, d) -> a + b + c + d
101+
T = Map(f => "target")
102+
n, c = apply(T, t)
103+
@test Tables.schema(n).names == (:target,)
104+
@test n.target == f.(t.a, t.b, t.c, t.d)
105+
106+
# function alone
107+
f = (a, b, c, d) -> a + b + c + d
108+
fname = replace(string(f), "#" => "f")
109+
colname = Symbol(fname, :_a, :_b, :_c, :_d)
110+
T = Map(f)
111+
n, c = apply(T, t)
112+
@test Tables.schema(n).names == (colname,)
113+
@test Tables.getcolumn(n, colname) == f.(t.a, t.b, t.c, t.d)
114+
115+
# type and target
116+
struct Foo a; b; c; d end
117+
T = Map(Foo => "target")
118+
n, c = apply(T, t)
119+
@test Tables.schema(n).names == (:target,)
120+
@test n.target == Foo.(t.a, t.b, t.c, t.d)
121+
122+
# type alone
123+
struct Bar a; b; c; d end
124+
T = Map(Bar)
125+
n, c = apply(T, t)
126+
@test Tables.schema(n).names == (:Bar_a_b_c_d,)
127+
@test n.Bar_a_b_c_d == Bar.(t.a, t.b, t.c, t.d)
128+
99129
# error: cannot create Map transform without arguments
100130
@test_throws ArgumentError Map()
101131
end

0 commit comments

Comments
 (0)