Skip to content

Commit 19f0fe6

Browse files
authored
Reduce node names in resolve dependency errors. (#47)
1 parent e69f9ad commit 19f0fe6

8 files changed

Lines changed: 90 additions & 25 deletions

File tree

docs/changes.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ all releases are available on `Anaconda.org <https://anaconda.org/pytask/pytask>
1515
input for ``@pytask.mark.depends_on`` and ``@pytask.mark.produces`` are preserved as a
1616
dictionary inside the function.
1717
- :gh:`43` releases v0.0.10
18+
- :gh:`47` reduce node names in error messages while resolving dependencies.
1819

1920

2021
0.0.9 - 2020-10-28

environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies:
2828
- pydot
2929
- pytest-cov
3030
- tox-conda
31+
- virtualenv=20.0.33
3132

3233
# Documentation
3334
- nbsphinx

src/_pytask/collect.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from _pytask.mark import has_marker
1414
from _pytask.nodes import FilePathNode
1515
from _pytask.nodes import PythonFunctionTask
16-
from _pytask.nodes import shorten_node_name
16+
from _pytask.nodes import reduce_node_name
1717
from _pytask.report import CollectionReport
1818
from _pytask.report import format_collect_footer
1919

@@ -234,8 +234,8 @@ def pytask_collect_log(session, reports, tasks):
234234
if report.node is None:
235235
header = " Error "
236236
else:
237-
shortened_name = shorten_node_name(report.node, session.config["paths"])
238-
header = f" Could not collect {shortened_name} "
237+
short_name = reduce_node_name(report.node, session.config["paths"])
238+
header = f" Could not collect {short_name} "
239239

240240
click.echo(f"{{:_^{tm_width}}}".format(header))
241241

src/_pytask/execute.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from _pytask.exceptions import NodeNotFoundError
1414
from _pytask.mark import Mark
1515
from _pytask.nodes import FilePathNode
16-
from _pytask.nodes import shorten_node_name
16+
from _pytask.nodes import reduce_node_name
1717
from _pytask.report import ExecutionReport
1818
from _pytask.report import format_execute_footer
1919

@@ -168,7 +168,7 @@ def pytask_execute_log_end(session, reports):
168168
for report in reports:
169169
if not report.success:
170170

171-
task_name = shorten_node_name(report.task, session.config["paths"])
171+
task_name = reduce_node_name(report.task, session.config["paths"])
172172
message = f" Task {task_name} failed "
173173
if len(message) > tm_width:
174174
click.echo("_" * tm_width)

src/_pytask/nodes.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,11 @@ def _relative_to(path: Path, source: Path, include_source: bool = True):
355355
def _find_closest_ancestor(path: Path, potential_ancestors: List[Path]):
356356
"""Find the closest ancestor of a path.
357357
358+
In case only a single path to a task file is passed, we take the parent folder of
359+
this file. The check :meth:`pathlib.Path.is_file` only succeeds when the file
360+
exists. This must be true as otherwise an error is raised by :obj:`click` right in
361+
the beginning.
362+
358363
Examples
359364
--------
360365
>>> from pathlib import Path
@@ -371,6 +376,11 @@ def _find_closest_ancestor(path: Path, potential_ancestors: List[Path]):
371376
if ancestor == path:
372377
closest_ancestor = path
373378
break
379+
380+
# Paths can also point to files in which case we want to take the parent folder.
381+
if ancestor.is_file():
382+
ancestor = ancestor.parent
383+
374384
if ancestor in path.parents:
375385
if closest_ancestor is None or (
376386
len(path.relative_to(ancestor).parts)
@@ -381,13 +391,13 @@ def _find_closest_ancestor(path: Path, potential_ancestors: List[Path]):
381391
return closest_ancestor
382392

383393

384-
def shorten_node_name(node, paths: List[Path]):
385-
"""Shorten the node name.
394+
def reduce_node_name(node, paths: List[Path]):
395+
"""Reduce the node name.
386396
387397
The whole name of the node - which includes the drive letter - can be very long
388398
when using nested folder structures in bigger projects.
389399
390-
Thus, the part of the name which contains the path is replace by the relative
400+
Thus, the part of the name which contains the path is replaced by the relative
391401
path from one path in ``session.config["paths"]`` to the node.
392402
393403
"""

src/_pytask/resolve_dependencies.py

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from _pytask.exceptions import NodeNotFoundError
1313
from _pytask.exceptions import ResolvingDependenciesError
1414
from _pytask.mark import Mark
15+
from _pytask.nodes import reduce_node_name
1516
from _pytask.report import ResolvingDependenciesReport
1617
from _pytask.traceback import remove_traceback_from_exc_info
1718
from pony import orm
@@ -86,11 +87,11 @@ def pytask_resolve_dependencies_select_execution_dag(dag):
8687

8788

8889
@hookimpl
89-
def pytask_resolve_dependencies_validate_dag(dag):
90+
def pytask_resolve_dependencies_validate_dag(session, dag):
9091
"""Validate the DAG."""
9192
_check_if_dag_has_cycles(dag)
92-
_check_if_root_nodes_are_available(dag)
93-
_check_if_tasks_have_the_same_products(dag)
93+
_check_if_root_nodes_are_available(dag, session)
94+
_check_if_tasks_have_the_same_products(dag, session)
9495

9596

9697
def _have_task_or_neighbors_changed(task_name, dag):
@@ -132,7 +133,8 @@ def _check_if_dag_has_cycles(dag):
132133
)
133134

134135

135-
def _check_if_root_nodes_are_available(dag):
136+
def _check_if_root_nodes_are_available(dag, session):
137+
paths = session.config["paths"]
136138
missing_root_nodes = {}
137139

138140
for node in dag.nodes:
@@ -142,7 +144,12 @@ def _check_if_root_nodes_are_available(dag):
142144
try:
143145
dag.nodes[node]["node"].state()
144146
except NodeNotFoundError:
145-
missing_root_nodes[node] = list(dag.successors(node))
147+
# Shorten node names for better printing.
148+
short_node_name = reduce_node_name(dag.nodes[node]["node"], paths)
149+
short_successors = _reduce_names_of_multiple_nodes(
150+
dag.successors(node), dag, paths
151+
)
152+
missing_root_nodes[short_node_name] = short_successors
146153

147154
if missing_root_nodes:
148155
raise ResolvingDependenciesError(
@@ -153,15 +160,19 @@ def _check_if_root_nodes_are_available(dag):
153160
)
154161

155162

156-
def _check_if_tasks_have_the_same_products(dag):
163+
def _check_if_tasks_have_the_same_products(dag, session):
164+
paths = session.config["paths"]
157165
nodes_created_by_multiple_tasks = {}
158166

159167
for node in dag.nodes:
160168
is_node = "node" in dag.nodes[node]
161169
if is_node:
162170
parents = list(dag.predecessors(node))
163171
if len(parents) > 1:
164-
nodes_created_by_multiple_tasks[node] = parents
172+
# Reduce node names for better printing.
173+
short_node = reduce_node_name(dag.nodes[node]["node"], paths)
174+
short_parents = _reduce_names_of_multiple_nodes(parents, dag, paths)
175+
nodes_created_by_multiple_tasks[short_node] = short_parents
165176

166177
if nodes_created_by_multiple_tasks:
167178
raise ResolvingDependenciesError(
@@ -184,3 +195,11 @@ def pytask_resolve_dependencies_log(session, report):
184195

185196
click.echo("")
186197
click.echo("=" * tm_width)
198+
199+
200+
def _reduce_names_of_multiple_nodes(names, dag, paths):
201+
"""Reduce the names of multiple nodes in the DAG."""
202+
return [
203+
reduce_node_name(dag.nodes[n].get("node") or dag.nodes[n].get("task"), paths)
204+
for n in names
205+
]

tests/test_nodes.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from _pytask.nodes import MetaNode
1818
from _pytask.nodes import MetaTask
1919
from _pytask.nodes import produces
20-
from _pytask.nodes import shorten_node_name
20+
from _pytask.nodes import reduce_node_name
2121

2222

2323
@pytest.mark.unit
@@ -196,10 +196,13 @@ def test_relative_to(path, source, include_source, expected):
196196
[
197197
(Path("src/task.py"), [Path("src"), Path("bld")], Path("src")),
198198
(Path("tasks/task.py"), [Path("src"), Path("bld")], None),
199-
(Path("src/tasks/task.py"), [Path("src"), Path("src/tasks")], Path("tasks")),
199+
(Path("src/ts/task.py"), [Path("src"), Path("src/ts")], Path("src/ts")),
200+
(Path("src/in.txt"), [Path("src/task_d.py")], Path("src")),
200201
],
201202
)
202-
def task_find_closest_ancestor(path, potential_ancestors, expected):
203+
def test_find_closest_ancestor(monkeypatch, path, potential_ancestors, expected):
204+
# Ensures that files are detected by an existing suffix not if they also exist.
205+
monkeypatch.setattr("_pytask.nodes.pathlib.Path.is_file", lambda x: bool(x.suffix))
203206
result = _find_closest_ancestor(path, potential_ancestors)
204207
assert result == expected
205208

@@ -259,9 +262,9 @@ class FalseNode:
259262
),
260263
],
261264
)
262-
def test_shorten_node_name(node, paths, expectation, expected):
265+
def test_reduce_node_name(node, paths, expectation, expected):
263266
with expectation:
264-
result = shorten_node_name(node, paths)
267+
result = reduce_node_name(node, paths)
265268
assert result == expected
266269

267270

tests/test_resolve_dependencies.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import textwrap
22
from contextlib import ExitStack as does_not_raise # noqa: N813
3+
from pathlib import Path
34

45
import attr
56
import networkx as nx
@@ -37,6 +38,10 @@ def state(self):
3738
raise NodeNotFoundError
3839

3940

41+
class DummySession:
42+
pass
43+
44+
4045
@pytest.mark.unit
4146
def test_create_dag():
4247
task = Task(
@@ -53,22 +58,30 @@ def test_create_dag():
5358
def test_check_if_root_nodes_are_available():
5459
dag = nx.DiGraph()
5560

61+
root = Path("directory")
62+
session = DummySession()
63+
session.config = {"paths": [root]}
64+
5665
task = Task("task")
66+
task.path = root.joinpath("task_dummy")
67+
task.base_name = "task_dummy"
5768
dag.add_node(task.name, task=task)
5869

5970
available_node = Node("available")
71+
available_node.path = root.joinpath("available_node")
6072
dag.add_node(available_node.name, node=available_node)
6173
dag.add_edge(available_node.name, task.name)
6274

6375
with does_not_raise():
64-
_check_if_root_nodes_are_available(dag)
76+
_check_if_root_nodes_are_available(dag, session)
6577

6678
missing_node = Node("missing")
79+
missing_node.path = root.joinpath("missing_node")
6780
dag.add_node(missing_node.name, node=missing_node)
6881
dag.add_edge(missing_node.name, task.name)
6982

7083
with pytest.raises(ResolvingDependenciesError):
71-
_check_if_root_nodes_are_available(dag)
84+
_check_if_root_nodes_are_available(dag, session)
7285

7386

7487
@pytest.mark.end_to_end
@@ -78,16 +91,24 @@ def test_check_if_root_nodes_are_available_end_to_end(tmp_path, runner):
7891
7992
@pytask.mark.depends_on("in.txt")
8093
@pytask.mark.produces("out.txt")
81-
def task_dummy(produces):
94+
def task_d(produces):
8295
produces.write_text("1")
8396
"""
84-
tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source))
97+
tmp_path.joinpath("task_d.py").write_text(textwrap.dedent(source))
8598

8699
result = runner.invoke(cli, [tmp_path.as_posix()])
87100

88101
assert result.exit_code == 4
89102
assert "Failures during resolving dependencies" in result.output
90103

104+
# Ensure that node names are reduced.
105+
assert "Failures during resolving dependencies" in result.output
106+
assert "There are some dependencies missing which do not" in result.output
107+
assert tmp_path.joinpath("task_d.py").as_posix() + "::task_d" not in result.output
108+
assert tmp_path.name + "/task_d.py::task_d" in result.output
109+
assert tmp_path.joinpath("in.txt").as_posix() not in result.output
110+
assert tmp_path.name + "/in.txt" in result.output
111+
91112

92113
@pytest.mark.end_to_end
93114
def test_cycle_in_dag(tmp_path, runner):
@@ -110,6 +131,7 @@ def task_2(produces):
110131

111132
assert result.exit_code == 4
112133
assert "Failures during resolving dependencies" in result.output
134+
assert "The DAG contains cycles which means a dependency" in result.output
113135

114136

115137
@pytest.mark.end_to_end
@@ -125,9 +147,18 @@ def task_1(produces):
125147
def task_2(produces):
126148
produces.write_text("2")
127149
"""
128-
tmp_path.joinpath("task_dummy.py").write_text(textwrap.dedent(source))
150+
tmp_path.joinpath("task_d.py").write_text(textwrap.dedent(source))
129151

130152
result = runner.invoke(cli, [tmp_path.as_posix()])
131153

132154
assert result.exit_code == 4
133155
assert "Failures during resolving dependencies" in result.output
156+
assert "There are some tasks which produce the same output." in result.output
157+
158+
# Ensure that nodes names are reduced.
159+
assert tmp_path.joinpath("task_d.py").as_posix() + "::task_1" not in result.output
160+
assert tmp_path.name + "/task_d.py::task_1" in result.output
161+
assert tmp_path.joinpath("task_d.py").as_posix() + "::task_2" not in result.output
162+
assert tmp_path.name + "/task_d.py::task_2" in result.output
163+
assert tmp_path.joinpath("out.txt").as_posix() not in result.output
164+
assert tmp_path.name + "/out.txt" in result.output

0 commit comments

Comments
 (0)