Skip to content

Commit 71236c8

Browse files
authored
Merge pull request #533 from EvgSkv/main
Update ti2023
2 parents a46dae4 + bf9ada5 commit 71236c8

17 files changed

Lines changed: 17157 additions & 4 deletions

common/benchmarking.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Simple benchmarking utility.
2+
3+
from IPython.core.magic import register_cell_magic
4+
from IPython import get_ipython
5+
import time
6+
from logica.common import sqlite3_logica
7+
import pandas
8+
import signal
9+
10+
timing = {}
11+
reports = []
12+
timeout = 200
13+
14+
def Clear():
15+
global timing, reports
16+
timing = {}
17+
reports = []
18+
19+
20+
@register_cell_magic
21+
def loop(line, cell):
22+
global timing
23+
local_timing = {}
24+
ip = get_ipython()
25+
problem_name, iterator = ip.ev(line)
26+
stop = False
27+
28+
for item in iterator:
29+
if stop:
30+
print('Skipping %s (previous timeout).' % item)
31+
timing[item] = local_timing[item] = 'TIMEOUT (> %d)' % timeout
32+
continue
33+
34+
print('Running %s.' % item)
35+
ip.user_ns['loop_parameter'] = item
36+
37+
timed_out = [False]
38+
def handler(signum, frame):
39+
timed_out[0] = True
40+
raise KeyboardInterrupt("Timeout")
41+
42+
old_handler = signal.getsignal(signal.SIGALRM)
43+
signal.signal(signal.SIGALRM, handler)
44+
signal.alarm(timeout)
45+
46+
start_time = time.perf_counter()
47+
try:
48+
ip.run_cell(cell.replace('{loop_parameter}', item))
49+
finally:
50+
signal.alarm(0)
51+
signal.signal(signal.SIGALRM, old_handler)
52+
elapsed = time.perf_counter() - start_time
53+
54+
if timed_out[0]:
55+
print('*** TIMEOUT on %s ***' % item)
56+
stop = True
57+
elapsed = 'TIMEOUT (> %d)' % timeout
58+
59+
timing[item] = elapsed
60+
local_timing[item] = elapsed
61+
62+
report = (' === Timing for %s ===\n' % problem_name) + (
63+
sqlite3_logica.DataframeAsArtisticTable(
64+
pandas.DataFrame({'problem': list(local_timing.keys()),
65+
'time': list(local_timing.values())})))
66+
reports.append(report)
67+
print(report)
68+

common/concertina_lib.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@ def __init__(self, config, engine, display_mode='colab', iterations=None):
176176
self.all_actions = {a["name"] for a in self.config}
177177
self.complete_actions = set()
178178
self.running_actions = set()
179-
assert display_mode in ('colab', 'terminal', 'colab-text', 'silent'), (
179+
self.show_only_running = False
180+
if os.getenv('LOGICA_TERMINAL_ONELINE', 'no') == 'yes':
181+
self.show_only_running = True
182+
assert display_mode in ('colab', 'terminal',
183+
'colab-text', 'silent'), (
180184
'Unrecognized display mode: %s' % display_mode)
181185
self.display_mode = display_mode
182186
self.display_id = self.GetDisplayId()
@@ -293,6 +297,13 @@ def AsArtGraph():
293297
extra_lines = self.ProgressBar().split('\n')
294298
return AsArtGraph().GetPicture(updating=updating,
295299
extra_lines=extra_lines)
300+
def ShowRunning(self, updating):
301+
nodes, edges = self.AsNodesAndEdges()
302+
running = [n for n in nodes if n.startswith('\033[1m')]
303+
if not running:
304+
return '*'
305+
return '[%d / %d] ' % (len(self.complete_actions),
306+
len(self.all_actions)) + running[0]
296307

297308
def AsNodesAndEdges(self):
298309
"""Nodes and edges to display in terminal."""
@@ -405,14 +416,18 @@ def UpdateDisplay(self, final=False):
405416
self.display_update_period = min(0.5, self.display_update_period * 1.2)
406417
if (now - self.recent_display_update_seconds <
407418
self.display_update_period and
408-
not final):
419+
not final and
420+
not self.show_only_running):
409421
# Avoid frequent display updates slowing down execution.
410422
return
411423
self.recent_display_update_seconds = now
412424
if self.display_mode == 'colab':
413425
update_display(self.AsGraphViz(), display_id=self.display_id)
414426
elif self.display_mode == 'terminal':
415-
print(self.AsTextPicture(updating=True))
427+
if self.show_only_running:
428+
print(self.ShowRunning(updating=True))
429+
else:
430+
print(self.AsTextPicture(updating=True))
416431
elif self.display_mode == 'colab-text':
417432
update_display(
418433
self.StateAsSimpleHTML(),

common/graph.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,4 +217,17 @@ def Convert(l):
217217

218218
def HierarchicalOptions():
219219
return {'layout': {'hierarchical': {'direction': 'UD',
220-
'sortMethod': 'directed'}}}
220+
'sortMethod': 'directed'}}}
221+
222+
def InstallRequire():
223+
from IPython.display import display, HTML
224+
# This script manually loads RequireJS so that logica/vis.js can work
225+
display(HTML('''
226+
<script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
227+
<script>
228+
// This ensures that even if require is loaded late,
229+
// the logica script can still find it.
230+
window.require = require;
231+
</script>
232+
'''))
233+

examples/graph/tgdk/README.md

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Benchmark Artifacts for "Logica-TGD: Transforming Graph Databases Logically"
2+
3+
This directory contains reproducible benchmark notebooks for the paper:
4+
5+
> **Logica-TGD: Transforming Graph Databases Logically**
6+
> Evgeny Skvortsov, Yilin Xia, Bertram Ludäscher, Shawn Bowers
7+
> *TGDK, 2026*
8+
9+
## Benchmarks
10+
11+
We compare four systems on graph computation problems (transitive closure,
12+
pairwise distances, same generation):
13+
14+
- **Logica** — compiling to DuckDB SQL
15+
- **Soufflé** — Datalog engine with parallel evaluation
16+
- **DuckPGQ** — DuckDB extension implementing SQL/PGQ (Cypher-style queries)
17+
- **Nemo** — single-threaded Rust rule engine (for the Nemo column only)
18+
19+
All benchmarks were run on a Google Cloud **c2d-standard-32** instance
20+
(32 vCPUs, 128 GB RAM) using Logica 1.3.1415926535897, DuckDB 1.3.2,
21+
Soufflé 2.4, and Nemo 0.10.0.
22+
23+
### Main notebooks
24+
25+
| Notebook | Description |
26+
|----------|-------------|
27+
| `benchmark_logica.ipynb` | Logica benchmarks (all problems). **Run this first** — it generates input data (CSV files and `graphs.db`) used by the other notebooks. |
28+
| `benchmark_souffle.ipynb` | Soufflé benchmarks (compiled mode) |
29+
| `benchmark_cypher.ipynb` | DuckPGQ / Cypher benchmarks |
30+
31+
### Auxiliary materials
32+
33+
| File | Description |
34+
|------|-------------|
35+
| `auxiliary/benchmark_souffle_interpreted.ipynb` | Soufflé benchmarks in interpreted mode (used in the original submission) |
36+
| `auxiliary/benchmark_logica_with_output_sizes.ipynb` | Logica notebook computing output sizes for the table in the paper |
37+
| `auxiliary/souffle_compiled_vs_interpreted.md` | Comparison of Soufflé compiled vs. interpreted modes |
38+
39+
## Reproducing the results
40+
41+
1. Install Jupyter Notebook:
42+
```
43+
python3 -m pip install notebook
44+
```
45+
46+
2. Install DuckDB:
47+
```
48+
python3 -m pip install duckdb
49+
```
50+
51+
3. Install Soufflé (v2.4 was used) by following the instructions at
52+
[souffle-lang.github.io](https://souffle-lang.github.io/install).
53+
54+
4. Clone this repository:
55+
```
56+
git clone https://github.com/EvgSkv/logica
57+
```
58+
59+
5. Start the notebook server from the repository root, so that Logica
60+
is importable:
61+
```
62+
cd logica
63+
python3 -m notebook examples/graph/tgdk
64+
```
65+
Alternatively, install Logica with `python3 -m pip install logica` and start
66+
the notebook from anywhere.
67+
68+
6. Run the notebooks starting with `benchmark_logica.ipynb` — it
69+
generates the input data (CSV files and `graphs.db`) used by the
70+
Soufflé and DuckPGQ notebooks. Then proceed to `benchmark_souffle.ipynb`
71+
and `benchmark_cypher.ipynb`.
72+
73+
For the Nemo comparison, see the [Nemo section](#nemo-comparison) below.
74+
75+
## Nemo comparison
76+
77+
| File | Description |
78+
|------|-------------|
79+
| `benchmark_and_collect.py` | Runs all TC and SG benchmarks on both Logica and Nemo, collects wall/CPU times and fact counts into `benchmark_results.txt` (ASCII table) and `benchmark_results.csv`. Generates the `.l` and `.nemo` programs from templates. |
80+
| `tc_g1k.l`, `tc_g1k.nemo` | Example Logica and Nemo programs for transitive closure (shown for reference — the script regenerates all sizes). |
81+
| `sg_tree7.l`, `sg_tree7.nemo` | Example Logica and Nemo programs for same generation. |
82+
| `benchmark_results.txt` | Output of `benchmark_and_collect.py` from our run. |
83+
84+
To run the Nemo comparison:
85+
86+
1. Install Nemo 0.10.0 (see [nemo rule engine](https://github.com/knowsys/nemo)).
87+
The `nmo` binary must be on `PATH` (we invoke it as `nemo` in the script —
88+
adjust the command there if your binary is named `nmo`).
89+
2. Make sure the CSV inputs (`g1k.csv`..`g5k.csv`, `tree7.csv`..`tree12.csv`)
90+
are present in the same directory. They are produced by running
91+
`benchmark_logica.ipynb`.
92+
3. Run the script from this directory:
93+
```
94+
python3 benchmark_and_collect.py
95+
```
96+
It writes `benchmark_results.txt` and `benchmark_results.csv`.
73 KB
Binary file not shown.

0 commit comments

Comments
 (0)