Skip to content

Commit 4ffa1e5

Browse files
authored
fix link-all.py with high CPU count and little files (#785)
Code changes by https://github.com/aggiehorns When we have more CPUs than files to link the `files_per_job` variable got `0`. And then the begin and end indexes got wrong and multiple jobs tried to link the same file. Fixes: #664
1 parent f36cc9e commit 4ffa1e5

1 file changed

Lines changed: 10 additions & 13 deletions

File tree

scripts/link-all.py

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python3
22

33
import argparse
4+
import math
45
import os
56
import sys
67
import multiprocessing
@@ -19,29 +20,25 @@
1920
src_list.append(line.strip())
2021

2122
list_len = len(src_list)
22-
proc_num = multiprocessing.cpu_count()
23-
files_per_job = list_len // proc_num
23+
proc_num = min(multiprocessing.cpu_count(), 32)
24+
files_per_job = max(math.ceil(list_len / proc_num), 1)
2425

25-
def job(job_index):
26+
def job(chunk):
2627
try:
27-
begin_ind = files_per_job * job_index
28-
end_ind = files_per_job * (job_index + 1)
29-
last_job = (job_index == proc_num - 1)
30-
if last_job:
31-
end_ind = list_len
32-
for i in range(begin_ind, end_ind):
33-
filename = src_list[i]
28+
for filename in chunk:
3429
link_from = os.path.join(cmd_args.cellar, filename)
3530
link_to = os.path.join(cmd_args.dest, filename)
36-
os.link(link_from, link_to)
31+
if not os.path.exists(link_to):
32+
os.link(link_from, link_to)
3733
return 0
3834
except Exception as exc:
3935
print('Exception caught: {}'.format(exc))
4036
return 1
4137

4238
def run_link():
43-
pool = multiprocessing.Pool(processes=proc_num)
44-
result = pool.map(job, range(proc_num))
39+
chunks = [src_list[i:i + files_per_job] for i in range(0, list_len, files_per_job)]
40+
pool = multiprocessing.Pool(processes=len(chunks))
41+
result = pool.map(job, chunks)
4542
pool.close()
4643
pool.join()
4744

0 commit comments

Comments
 (0)