EvgSkv
diff --git a/‎common/benchmarking.py‎
Lines changed: 68 additions & 0 deletions b/‎common/benchmarking.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎common/concertina_lib.py‎
Lines changed: 18 additions & 3 deletions b/‎common/concertina_lib.py‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎common/graph.py‎
Lines changed: 14 additions & 1 deletion b/‎common/graph.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎examples/graph/tgdk/README.md‎
Lines changed: 96 additions & 0 deletions b/‎examples/graph/tgdk/README.md‎
Lines changed: 96 additions & 0 deletions
diff --git a/‎examples/graph/tgdk/all_programs.pdf‎
73 KB b/‎examples/graph/tgdk/all_programs.pdf‎
73 KB
@@ -0,0 +1,68 @@
+# Simple benchmarking utility.
+
+from IPython.core.magic import register_cell_magic
+from IPython import get_ipython
+import time
+from logica.common import sqlite3_logica
+import pandas
+import signal
+
+timing = {}
+reports = []
+timeout = 200
+
+def Clear():
+  global timing, reports
+  timing = {}
+  reports = []
+
+
+@register_cell_magic
+def loop(line, cell):
+  global timing
+  local_timing = {}
+  ip = get_ipython()
+  problem_name, iterator = ip.ev(line)
+  stop = False
+
+  for item in iterator:
+    if stop:
+      print('Skipping %s (previous timeout).' % item)
+      timing[item] = local_timing[item] = 'TIMEOUT (> %d)' % timeout
+      continue
+
+    print('Running %s.' % item)
+    ip.user_ns['loop_parameter'] = item
+
+    timed_out = [False]
+    def handler(signum, frame):
+      timed_out[0] = True
+      raise KeyboardInterrupt("Timeout")
+
+    old_handler = signal.getsignal(signal.SIGALRM)
+    signal.signal(signal.SIGALRM, handler)
+    signal.alarm(timeout)
+
+    start_time = time.perf_counter()
+    try:
+      ip.run_cell(cell.replace('{loop_parameter}', item))
+    finally:
+      signal.alarm(0)
+      signal.signal(signal.SIGALRM, old_handler)
+    elapsed = time.perf_counter() - start_time
+
+    if timed_out[0]:
+      print('*** TIMEOUT on %s ***' % item)
+      stop = True
+      elapsed = 'TIMEOUT (> %d)' % timeout
+
+    timing[item] = elapsed
+    local_timing[item] = elapsed
+
+  report = (' === Timing for %s ===\n' % problem_name) + (
+    sqlite3_logica.DataframeAsArtisticTable(
+        pandas.DataFrame({'problem': list(local_timing.keys()),
+        'time': list(local_timing.values())})))
+  reports.append(report)
+  print(report)
+
@@ -176,7 +176,11 @@ def __init__(self, config, engine, display_mode='colab', iterations=None):
     self.all_actions = {a["name"] for a in self.config}
     self.complete_actions = set()
     self.running_actions = set()
-    assert display_mode in ('colab', 'terminal', 'colab-text', 'silent'), (
+    self.show_only_running = False
+    if os.getenv('LOGICA_TERMINAL_ONELINE', 'no') == 'yes':
+      self.show_only_running = True
+    assert display_mode in ('colab', 'terminal',
+                            'colab-text', 'silent'), (
       'Unrecognized display mode: %s' % display_mode)
     self.display_mode = display_mode
     self.display_id = self.GetDisplayId()
@@ -293,6 +297,13 @@ def AsArtGraph():
     extra_lines = self.ProgressBar().split('\n')
     return AsArtGraph().GetPicture(updating=updating,
                                    extra_lines=extra_lines)
+  def ShowRunning(self, updating):
+     nodes, edges = self.AsNodesAndEdges()
+     running = [n for n in nodes if n.startswith('\033[1m')]
+     if not running:
+       return '*'
+     return '[%d / %d] ' % (len(self.complete_actions),
+                            len(self.all_actions)) + running[0]
 
   def AsNodesAndEdges(self):
     """Nodes and edges to display in terminal."""
@@ -405,14 +416,18 @@ def UpdateDisplay(self, final=False):
     self.display_update_period = min(0.5, self.display_update_period * 1.2)
     if (now - self.recent_display_update_seconds <
         self.display_update_period and
-        not final):
+        not final and
+        not self.show_only_running):
       # Avoid frequent display updates slowing down execution.
       return
     self.recent_display_update_seconds = now
     if self.display_mode == 'colab':
       update_display(self.AsGraphViz(), display_id=self.display_id)
     elif self.display_mode == 'terminal':
-      print(self.AsTextPicture(updating=True))
+      if self.show_only_running:
+        print(self.ShowRunning(updating=True))
+      else:
+        print(self.AsTextPicture(updating=True))
     elif self.display_mode == 'colab-text':
       update_display(
         self.StateAsSimpleHTML(),
 
@@ -217,4 +217,17 @@ def Convert(l):
 
 def HierarchicalOptions():
   return {'layout': {'hierarchical': {'direction': 'UD',
-                                      'sortMethod': 'directed'}}}
+                                      'sortMethod': 'directed'}}}
+
+def InstallRequire():
+  from IPython.display import display, HTML
+  # This script manually loads RequireJS so that logica/vis.js can work
+  display(HTML('''
+      <script src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.6/require.min.js"></script>
+      <script>
+          // This ensures that even if require is loaded late,
+          // the logica script can still find it.
+          window.require = require;
+      </script>
+  '''))
+
@@ -0,0 +1,96 @@
+# Benchmark Artifacts for "Logica-TGD: Transforming Graph Databases Logically"
+
+This directory contains reproducible benchmark notebooks for the paper:
+
+> **Logica-TGD: Transforming Graph Databases Logically**
+> Evgeny Skvortsov, Yilin Xia, Bertram Ludäscher, Shawn Bowers
+> *TGDK, 2026*
+
+## Benchmarks
+
+We compare four systems on graph computation problems (transitive closure,
+pairwise distances, same generation):
+
+- **Logica** — compiling to DuckDB SQL
+- **Soufflé** — Datalog engine with parallel evaluation
+- **DuckPGQ** — DuckDB extension implementing SQL/PGQ (Cypher-style queries)
+- **Nemo** — single-threaded Rust rule engine (for the Nemo column only)
+
+All benchmarks were run on a Google Cloud **c2d-standard-32** instance
+(32 vCPUs, 128 GB RAM) using Logica 1.3.1415926535897, DuckDB 1.3.2,
+Soufflé 2.4, and Nemo 0.10.0.
+
+### Main notebooks
+
+| Notebook | Description |
+|----------|-------------|
+| `benchmark_logica.ipynb` | Logica benchmarks (all problems). **Run this first** — it generates input data (CSV files and `graphs.db`) used by the other notebooks. |
+| `benchmark_souffle.ipynb` | Soufflé benchmarks (compiled mode) |
+| `benchmark_cypher.ipynb` | DuckPGQ / Cypher benchmarks |
+
+### Auxiliary materials
+
+| File | Description |
+|------|-------------|
+| `auxiliary/benchmark_souffle_interpreted.ipynb` | Soufflé benchmarks in interpreted mode (used in the original submission) |
+| `auxiliary/benchmark_logica_with_output_sizes.ipynb` | Logica notebook computing output sizes for the table in the paper |
+| `auxiliary/souffle_compiled_vs_interpreted.md` | Comparison of Soufflé compiled vs. interpreted modes |
+
+## Reproducing the results
+
+1. Install Jupyter Notebook:
+   ```
+   python3 -m pip install notebook
+   ```
+
+2. Install DuckDB:
+   ```
+   python3 -m pip install duckdb
+   ```
+
+3. Install Soufflé (v2.4 was used) by following the instructions at
+   [souffle-lang.github.io](https://souffle-lang.github.io/install).
+
+4. Clone this repository:
+   ```
+   git clone https://github.com/EvgSkv/logica
+   ```
+
+5. Start the notebook server from the repository root, so that Logica
+   is importable:
+   ```
+   cd logica
+   python3 -m notebook examples/graph/tgdk
+   ```
+   Alternatively, install Logica with `python3 -m pip install logica` and start
+   the notebook from anywhere.
+
+6. Run the notebooks starting with `benchmark_logica.ipynb` — it
+   generates the input data (CSV files and `graphs.db`) used by the
+   Soufflé and DuckPGQ notebooks. Then proceed to `benchmark_souffle.ipynb`
+   and `benchmark_cypher.ipynb`.
+
+For the Nemo comparison, see the [Nemo section](#nemo-comparison) below.
+
+## Nemo comparison
+
+| File | Description |
+|------|-------------|
+| `benchmark_and_collect.py` | Runs all TC and SG benchmarks on both Logica and Nemo, collects wall/CPU times and fact counts into `benchmark_results.txt` (ASCII table) and `benchmark_results.csv`. Generates the `.l` and `.nemo` programs from templates. |
+| `tc_g1k.l`, `tc_g1k.nemo` | Example Logica and Nemo programs for transitive closure (shown for reference — the script regenerates all sizes). |
+| `sg_tree7.l`, `sg_tree7.nemo` | Example Logica and Nemo programs for same generation. |
+| `benchmark_results.txt` | Output of `benchmark_and_collect.py` from our run. |
+
+To run the Nemo comparison:
+
+1. Install Nemo 0.10.0 (see [nemo rule engine](https://github.com/knowsys/nemo)).
+   The `nmo` binary must be on `PATH` (we invoke it as `nemo` in the script —
+   adjust the command there if your binary is named `nmo`).
+2. Make sure the CSV inputs (`g1k.csv`..`g5k.csv`, `tree7.csv`..`tree12.csv`)
+   are present in the same directory. They are produced by running
+   `benchmark_logica.ipynb`.
+3. Run the script from this directory:
+   ```
+   python3 benchmark_and_collect.py
+   ```
+   It writes `benchmark_results.txt` and `benchmark_results.csv`.