Skip to content

Commit d4816e0

Browse files
committed
update code
1 parent e09ca7a commit d4816e0

5 files changed

Lines changed: 150 additions & 117 deletions

File tree

.github/workflows/ci.yml

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
name: CI
22
on:
33
pull_request:
4-
branches:
5-
- main
64
push:
7-
branches:
8-
- main
9-
tags: '*'
5+
branches: [main]
6+
tags: ['*']
107
jobs:
118
test:
129
name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
@@ -15,31 +12,36 @@ jobs:
1512
fail-fast: false
1613
matrix:
1714
version:
18-
- '1' # Replace this with the minimum Julia version that your package supports. E.g. if your package requires Julia 1.5 or higher, change this to '1.5'.
19-
- '1' # Leave this line unchanged. '1' will automatically expand to the latest stable 1.x release of Julia.
15+
- '1.4'
16+
- '1' # automatically expands to the latest stable 1.x release of Julia
2017
os:
2118
- ubuntu-latest
2219
arch:
2320
- x64
21+
include:
22+
- os: windows-latest
23+
version: '1'
24+
arch: x86
25+
- os: macos-latest
26+
version: '1'
27+
arch: aarch64
28+
- os: ubuntu-latest
29+
version: 'nightly'
30+
arch: x64
31+
allow_failure: true
2432
steps:
25-
- uses: actions/checkout@v2
26-
- uses: julia-actions/setup-julia@v1
33+
- uses: actions/checkout@v5
34+
- uses: julia-actions/setup-julia@v2
2735
with:
2836
version: ${{ matrix.version }}
2937
arch: ${{ matrix.arch }}
30-
- uses: actions/cache@v1
31-
env:
32-
cache-name: cache-artifacts
33-
with:
34-
path: ~/.julia/artifacts
35-
key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }}
36-
restore-keys: |
37-
${{ runner.os }}-test-${{ env.cache-name }}-
38-
${{ runner.os }}-test-
39-
${{ runner.os }}-
38+
- uses: julia-actions/cache@v2
4039
- uses: julia-actions/julia-buildpkg@v1
4140
- uses: julia-actions/julia-runtest@v1
41+
env:
42+
JULIA_NUM_THREADS: 4,1
4243
- uses: julia-actions/julia-processcoverage@v1
43-
- uses: codecov/codecov-action@v1
44+
- uses: codecov/codecov-action@v5
4445
with:
45-
file: lcov.info
46+
files: lcov.info
47+
token: ${{ secrets.CODECOV_TOKEN }}

Project.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "GroupedArrays"
22
uuid = "6407cd72-fade-4a84-8a1e-56e431fc1533"
33
authors = ["matthieugomez <gomez.matthieu@gmail.com>"]
4-
version = "0.3.3"
4+
version = "0.3.4"
55

66
[deps]
77
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
@@ -10,7 +10,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
1010
[compat]
1111
DataAPI = "1"
1212
Missings = "1"
13-
julia = "1"
13+
julia = "1.4"
1414

1515
[extras]
1616
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"

src/GroupedArrays.jl

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -88,26 +88,29 @@ function GroupedArray(args...; coalesce = false, sort = true)
8888
s = size(first(args))
8989
all(size(x) == s for x in args) || throw(DimensionMismatch("cannot match array sizes"))
9090
groups = Vector{Int}(undef, prod(s))
91-
ngroups, rhashes, gslots, sorted = row_group_slots(vec.(args), Val(false), groups, !coalesce, sort)
91+
ngroups, rhashes, gslots, sorted = row_group_slots!(vec.(args), Val(false), groups, !coalesce, sort, true)
9292
# sort groups if row_group_slots hasn't already done that
9393
if sort === true && !sorted
9494
idx = find_index(GroupedVector{Int}(groups, ngroups))
9595
group_invperm = invperm(sortperm(collect(zip(map(x -> view(x, idx), args)...))))
96-
@inbounds for (i, gix) in enumerate(groups)
97-
groups[i] = gix > 0 ? group_invperm[gix] : 0
98-
end
96+
@inbounds for i in eachindex(groups)
97+
gix = groups[i]
98+
groups[i] = gix == 0 ? 0 : group_invperm[gix]
99+
end
99100
end
100101
T = !coalesce && any(eltype(x) >: Missing for x in args) ? Union{Int, Missing} : Int
101102
GroupedArray{T, length(s)}(reshape(groups, s), ngroups)
102103
end
103104

104105
# Find index of representative row for each group
106+
# now in fillfirst!
105107
function find_index(g::GroupedArray)
106108
groups, ngroups = g.groups, g.ngroups
107109
idx = Vector{Int}(undef, ngroups)
108110
filled = fill(false, ngroups)
109111
nfilled = 0
110-
@inbounds for (i, gix) in enumerate(groups)
112+
@inbounds for i in 1:length(groups)
113+
gix = groups[i]
111114
if gix > 0 && !filled[gix]
112115
filled[gix] = true
113116
idx[gix] = i

src/spawn.jl

Lines changed: 19 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,6 @@
11
# This code is taken from DataFrames.jl/src/other/utils.jl
22

3-
if VERSION >= v"1.3"
4-
using Base.Threads: @spawn
5-
else
6-
# This is the definition of @async in Base
7-
macro spawn(expr)
8-
thunk = esc(:(()->($expr)))
9-
var = esc(Base.sync_varname)
10-
quote
11-
local task = Task($thunk)
12-
if $(Expr(:isdefined, var))
13-
push!($var, task)
14-
end
15-
schedule(task)
16-
end
17-
end
18-
end
19-
3+
using Base.Threads: @spawn
204

215
# Compute chunks of indices, each with at least `basesize` entries
226
# This method ensures balanced sizes by avoiding a small last chunk
@@ -36,51 +20,34 @@ function split_to_chunks(len::Integer, np::Integer)
3620
return (Int(1 + ((i - 1) * len′) ÷ np):Int((i * len′) ÷ np) for i in 1:np)
3721
end
3822

39-
if VERSION >= v"1.4"
40-
function _spawn_for_chunks_helper(iter, lbody, basesize)
41-
lidx = iter.args[1]
42-
range = iter.args[2]
43-
quote
44-
let x = $(esc(range)), basesize = $(esc(basesize))
45-
@assert firstindex(x) == 1
23+
function _spawn_for_chunks_helper(iter, lbody, basesize)
24+
lidx = iter.args[1]
25+
range = iter.args[2]
26+
quote
27+
let x = $(esc(range)), basesize = $(esc(basesize))
28+
@assert firstindex(x) == 1
4629

47-
nt = Threads.nthreads()
48-
len = length(x)
49-
if nt > 1 && len > basesize
50-
tasks = [Threads.@spawn begin
51-
for i in p
52-
local $(esc(lidx)) = @inbounds x[i]
53-
$(esc(lbody))
54-
end
30+
nt = Threads.nthreads()
31+
len = length(x)
32+
if nt > 1 && len > basesize
33+
tasks = [@spawn begin
34+
for i in p
35+
local $(esc(lidx)) = @inbounds x[i]
36+
$(esc(lbody))
5537
end
56-
for p in split_indices(len, basesize)]
57-
foreach(wait, tasks)
58-
else
59-
for i in eachindex(x)
60-
local $(esc(lidx)) = @inbounds x[i]
61-
$(esc(lbody))
62-
end
63-
end
64-
end
65-
nothing
66-
end
67-
end
68-
else
69-
function _spawn_for_chunks_helper(iter, lbody, basesize)
70-
lidx = iter.args[1]
71-
range = iter.args[2]
72-
quote
73-
let x = $(esc(range))
38+
end
39+
for p in split_indices(len, basesize)]
40+
foreach(wait, tasks)
41+
else
7442
for i in eachindex(x)
7543
local $(esc(lidx)) = @inbounds x[i]
7644
$(esc(lbody))
7745
end
7846
end
79-
nothing
8047
end
48+
nothing
8149
end
8250
end
83-
8451
"""
8552
@spawn_for_chunks basesize for i in range ... end
8653
Parallelize a `for` loop by spawning separate tasks

0 commit comments

Comments
 (0)