change-structures-paper/change-structures.tex at master · michaelpj/change-structures-paper · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% Toggle comments for preamble and topmatter to typeset in ACM style

%\input{preamble-standard}
%\input{preamble-acm}
\documentclass[runningheads]{llncs}

\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{mathtools}
\usepackage{stmaryrd}
\usepackage{thmtools}
\usepackage{todonotes}
\usepackage{etoolbox}
\usepackage[misc,geometry]{ifsym}

\makeatletter
\newcommand{\@chapapp}{\relax}%
\makeatother
\usepackage[title,header]{appendix}

\usepackage{environ}

% Switches between biblatex and natbib
%\newif\ifbiblatex\biblatextrue
\newif\ifbiblatex\biblatexfalse

% No appendix for conference version
\newif\ifappendix\appendixfalse

% Packages where the order matters
\ifbiblatex
\usepackage[
  safeinputenc,
  natbib=true
]{biblatex}
\usepackage{bbm}
\else
\fi
\usepackage{hyperref}
\usepackage{cleveref}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% BEGIN {Macros / packages by Luke}
% switch of draft/final version
\newif\ifdraft\drafttrue
%\newif\ifdraft\draftfalse

\ifdraft
\newcommand{\lochanged}[1]{{\color{red}{#1}}}
\newcommand{\mpchanged}[1]{{\color{blue}{#1}}}
%\newcommand{\tk}[1]{{\textcolor{DarkGreen}{[{#1}---Tsukada]}}}
%\newcommand{\mp}[1]{{\color{blue}{[{#1}--Mario]}}}
\newcommand{\lo}[1]{{\color{red}{[{#1}--Luke]}}}

\newcommand{\todoall}[1]{\todo[inline,color=black!30,author=All]{#1}}
\newcommand{\todompj}[1]{\todo[inline,color=yellow!40,author=Michael]{#1}}
\newcommand{\todomario}[1]{\todo[inline,color=blue!40,author=Mario]{#1}}

\else
\newcommand{\mpchanged}[1]{{#1}}
\newcommand{\lochanged}[1]{{#1}}
%\newcommand{\mp}[1]{}
\newcommand{\lo}[1]{}

\newcommand{\todoall}[1]{}
\newcommand{\todompj}[1]{}
\newcommand{\todomario}[1]{}
\fi
%% END {Macros / packages by Luke}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\input{theorem-restate}

\input{notation}

\newif\ifproofs
% Comment out to disable proofs
%\proofstrue

\ifbiblatex
\addbibresource{paper.bib}
\fi

\begin{document}

%\input{topmatter-standard}
%\input{topmatter-acm}
\input{topmatter-llncs}

\title{Fixing incremental computation}
\subtitle{Derivatives of fixpoints, and the recursive semantics of Datalog}
\maketitle

\begin{abstract}
  Incremental computation has recently been studied using the concepts of \emph{change
  structures} and \emph{derivatives} of programs, where the derivative of a function allows updating the output
  of the function based on a change to its input. We generalise change structures to \emph{change actions},
  and study their algebraic properties. We develop change actions for common structures
  in computer science, including directed-complete partial orders and Boolean algebras.
  We then show how to compute derivatives of fixpoints. This allows us to
  perform incremental evaluation and maintenance of recursively defined
  functions with particular application generalised Datalog programs.
  Moreover, unlike previous results, our techniques are \emph{modular} in that
  they are easy to apply both to variants of Datalog and to other programming languages.

  \keywords{Incremental computation, Datalog, Semantics, Fixpoints}
\end{abstract}


\section{Introduction}
\label{sec:intro}

Consider the following classic Datalog program\footnote{See \cite[part D]{abiteboul1995foundations} for an introduction to Datalog.},
which computes the transitive closure of an edge relation $e$:
\begin{align*}
  tc(x, y) &\leftarrow e(x, y)\\
  tc(x, y) &\leftarrow e(x, z) \wedge tc(z, y)
\end{align*}

The semantics of Datalog tells us that the denotation of this program is the
least fixpoint of the rule $tc$. Kleene's fixpoint Theorem tells us that we can
compute this fixpoint by repeatedly applying the rule until the output stops changing, starting from the empty relation. For example, supposing
that $e = \{ (1, 2), (2, 3), (3, 4) \}$, we get the following evaluation trace:
\begin{center}
  \begin{tabular} {p{5em} p{12em} p{12em}}
    Iteration & Newly deduced facts & Accumulated data in $tc$ \\
    %%\toprule
    0 & $\{ \}$ & $\{ \}$\\
    1 & $\{ (1, 2), (2, 3), (3, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4) \}$\\
    2 & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4) \}$\\
    3 & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4), (1, 4),(1, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4), (1, 4) \}$\\
    4 & (as above) & (as above) \\
    %%\bottomrule
  \end{tabular}
\end{center}
\medskip

At this point we have reached a fixpoint, and so we are done.

However, this process is quite wasteful. We deduced the fact $(1,2)$ at every iteration,
even though we had already deduced it in the first iteration. Indeed, for a
chain of $n$ such edges we will deduce $O(n^2)$ facts along the way.

The standard improvement to this evaluation strategy is known as ``semi-naive''
evaluation (see \cite[section 13.1]{abiteboul1995foundations}), where we transform
the program into a \emph{delta} program with two parts:
\begin{itemize}
  \item A \emph{delta} rule that computes the \emph{new} facts at each
    iteration.
  \item An \emph{accumulator} rule that accumulates the delta at each
    iteration to compute the final result
\end{itemize}
In this case our delta rule is simple: we only get new transitive edges at iteration $n+1$ if we
can deduce them from transitive edges we deduced at iteration $n$.
\begin{align*}
  \Delta tc_{0}(x, y) &\leftarrow e(x, y)\\
  \Delta tc_{i+1}(x, y) &\leftarrow e(x, z) \wedge \Delta tc_i(z, y)\\
  tc_{0}(x, y) &\leftarrow \Delta tc_0(x, y)\\
  tc_{i+1}(x, y) &\leftarrow tc_{i}(x,y) \vee \Delta tc_{i+1}(x,y)
\end{align*}

\begin{center}
  \begin{tabular} {p{5em} p{10em} p{12em}}
    Iteration & $\Delta tc_i$ & $tc_i$ \\
    %\toprule
    0 & $\{ (1, 2), (2, 3), (3, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4) \}$\\
    1 & $\{ (1, 3), (2, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4) \}$\\
    2 & $\{ (1, 4) \}$ & $\{ (1, 2), (2, 3), (3, 4),$ $(1, 3), (2, 4), (1, 4) \}$\\
    3 & $\{ \}$ & (as above) \\
    %\bottomrule
  \end{tabular}
\end{center}
\medskip

This is much better \textemdash{} we have turned a quadratic computation into a
linear one. The delta transformation is a kind of \emph{incremental
  computation}: at each stage we compute the changes in the rule given the previous
changes to its inputs.

But the delta rule translation works only for traditional Datalog. It is common to
liberalise the formula syntax with additional features, such as disjunction,
existential quantification, negation, and aggregation.\footnote{See, for
  example, LogiQL \cite{logicbloxWebsite,halpin2014logiql},
  Datomic \cite{datomicWebsite},
  Souffle \cite{souffleWebsite,scholz2016fast}, and
  DES \cite{saenz2011deductive}, which between them have all of these
  features and more. We do not here explore supporting extensions to the syntax
  of rule \emph{heads}, although as long as this can be given a denotational
  semantics in a similar style our techniques should be applicable.}
This allows us to write programs like the following, where we compute whether all the
nodes in a subtree given by $child$ have some property $p$:
\begin{align*}
  treeP(x) &\leftarrow p(x) \wedge \neg \exists y . (child(x,y) \wedge \neg treeP(y))
\end{align*}

The body of this predicate amounts to recursion through an \emph{universal}
quantifier (encoded as $\neg \exists \neg$). We would
like to be able to use semi-naive evaluation for this rule too, but the standard
definition of semi-naive transformation is not well defined for the extended
program syntax, and it is unclear how to extend it (and the
correctness proof) to handle such cases.

It is possible, however, to write a delta program for $treeP$ by hand; indeed,
here is a definition for the delta predicate (the accumulator is as before):\footnote{This rule should be read as: we
  can newly deduce that $x$ is in $treeP$ if $x$ satisfies the predicate, and
  we have newly deduced that one of its children is in $treeP$, and we currently
  believe that all of its children are in $treeP$.}
\begin{align*}
  \Delta_{i+1}treeP(x) \leftarrow & p(x)\\
  &\wedge \exists y. (child(x, y) \wedge \Delta_itreeP(y))\\
  &\wedge \neg \exists y. (child(x,y) \wedge \neg treeP_i(y))
\end{align*}

This is a \emph{correct} delta program (in that using it to iteratively compute
$treeP$ gives the right answer), but it is not \emph{precise} because it
derives some facts repeatedly. We will show how to construct correct delta
programs generally using a program transformation, and show how we have some
freedom to optimize within a range of possible alternatives to improve precision or ease evaluation.

Handling extended Datalog is of more than theoretical interest \textemdash{} the research
in this paper was carried out at Semmle, which
makes heavy use of a commercial Datalog implementation to implement large-scale
static program analysis
\cite{semmleWebsite,avgustinov2016ql,sereni2008adding,schafer2010type}.
Semmle's implementation includes parity-stratified negation\footnote{Parity-stratified negation means that recursive calls must
  appear under an even number of negations. This ensures that the rule remains
  monotone, so the least fixpoint still exists.},
recursive aggregates \cite{demoor2013aggregates}, and other non-standard
features, so we are faced with a dilemma: either abandon the new language
features, or abandon incremental computation.

We can tell a similar story about \emph{maintenance} of Datalog programs.
Maintenance means updating the results of the program when its inputs change,
for example, updating the value of $tc$ given a change to $e$. Again, this is a
kind of incremental computation, and there are known solutions for traditional Datalog
\cite{gupta1993maintaining}, but these break down when the language is extended.

There is a piece of folkloric knowledge in the Datalog community that hints at a
solution: the semi-naive translation of a rule corresponds to the
\emph{derivative} of that rule\cite[section 3.2.2]{bancilhon1986amateur,bancilhon1986naive}.
The idea of performing incremental computation using derivatives has been
studied recently by Cai et al. \cite{cai2014changes}, who give an account using
\emph{change structures}. They use this to provide a framework for incrementally evaluating lambda calculus programs.

However, Cai et al.'s work isn't directly applicable to Datalog: the tricky part
of Datalog's semantics are recursive definitions and the need for the
\emph{fixpoints}, so we need some additional theory to tell us how to
handle incremental evaluation and maintenance of fixpoint computations.

This paper aims to bridge that gap by providing a solid semantic foundation for the incremental
computation of Datalog, and other recursive programs, in terms of changes and
differentiable functions.

\paragraph{Contributions}

We start by generalizing change structures to
\emph{change actions} (\cref{sec:changeActions}). Change actions are simpler and weaker than change
structures, while still providing enough structure to handle incremental
computation, and have fruitful
interactions with a variety of structures (\cref{sec:moreStructures},
\cref{sec:dcpos}).

We then show how change actions can be used to perform incremental evaluation and maintenance
of non-recursive program semantics, using the formula semantics of generalized Datalog as our primary
example (\cref{sec:nonRecursiveDatalog}). Moreover, the structure of the
approach is modular, and can accommodate arbitrary additional
formula constructs (\cref{sec:extensions}).

We also provide a method of incrementally computing and maintaining fixpoints
(\cref{sec:fixpoints}). We use this to perform incremental evaluation and
maintenance of \emph{recursive} program semantics, including generalized
recursive Datalog (\cref{sec:recursiveDatalog}). This provides, to the best
of our knowledge, the world's first incremental
evaluation and maintenance mechanism for Datalog that can handle negation,
disjunction, and existential quantification.

We have omitted the proofs from this paper. Most of the results have routine
proofs, but the proofs of the more substantial results
(especially those in \cref{sec:fixpoints}) are included in an extended
report \cite{arxivchangeactions}, along with some extended worked examples, and additional
material on the precision of derivatives.

\section{Change actions and derivatives}
\label{sec:changeActions}

Incremental computation requires understanding how values \emph{change}. For
example, we can change an integer by adding a natural to it.
Abstractly, we have a set of values (the integers), and a set of changes
(the naturals) which we can ``apply'' to a value (by addition) to get a new value.

This kind of structure is well-known \textemdash{} it is a set action. It is
also very natural to want to combine changes sequentially, and if we do this
then we find ourselves with a monoid action.

Using monoid actions for changes gives us a reason to think that
change actions are an adequate representation of changes: any subset of $A
\rightarrow A$ which is closed under composition can be
represented as a monoid action on $A$, so we are able to capture all of these as change
actions.

\subsection{Change actions}
\label{sec:changeAction}

\begin{definition}
  A \emph{change action} is a tuple:
  \begin{displaymath}
    \cstr{A} \defeq \cstruct{A}{\changes{A}}{\cplus_A}
  \end{displaymath}
  where $A$ is a set, $\changes{A}$ is a monoid, and $\cplus_A : A \times \changes{A} \rightarrow A$ is a monoid action on $A$.\footnote{Why not
    just work with monoid actions? The reason is that while the category of
    monoid actions and the category of change actions have the same objects, they
  have different morphisms. See \cref{sec:sacts} for further discussion.}

  We will call $A$ the base set, and $\changes{A}$ the \emph{change set} of the change
  action. We will use $\splus$ for the monoid operation of $\changes{A}$, and
  $\mzero$ for its identity element.
  When there is no risk of confusion, we will simply write $\cplus$ for $\cplus_A$.
\end{definition}

\subsubsection{Examples}
\label{sec:examples}
A typical example of a change action is $\cstruct{A^\ast}{A^\ast}{\doubleplus}$ where $A^\ast$ is the set of finite words (or lists) of $A$.
Here we represent changes to a word made by concatenating another word onto it.
The changes themselves can be combined using $\doubleplus$ as the monoid operation with the empty word as the identity,
and this is a monoid action: $(a \doubleplus b) \doubleplus c = a \doubleplus \left( b \doubleplus c \right)$.

This is a very common case: any monoid $(A, \splus, \mzero)$ can be seen as a change action
$\cstruct{A}{(A, \splus, \mzero)}{\splus}$. Many practical change actions
can be constructed in this way. In particular, for any change action $\cstruct{A}{\changes{A}}{\cplus}$,
$\cstruct{\changes{A}}{\changes{A}}{\splus}$ is also a change action. This means
that we do not have to do any extra work to talk about changes to changes \textemdash{} we can
always take $\changes{\changes{A}} = \changes{A}$ (although there may be other
change actions available).

Three examples of change actions are of particular interest to us.
First, whenever
$L$ is a Boolean algebra, we can give it the change actions $(L, L, \vee)$ and $(L, L, \wedge)$,
as well as a combination of these (see \cref{sec:booleanAlgebras}). Second,
the natural numbers with addition have a change action $\cstr{\NN} \defeq (\NN,
\NN, +)$, which will prove useful during inductive proofs.

Another interesting example of change actions is \textit{semiautomata}. A semiautomaton is a triple
$(Q, \Sigma, T)$, where $Q$ is a set of states, $\Sigma$ is a (non-empty) finite input alphabet
and $T : Q \times \Sigma \rightarrow Q$ is a transition function.
Every semiautomaton corresponds to a change action $(Q, \Sigma^*, T^*)$ on the free monoid
over $\Sigma^*$, with $T^*$ being the free extension of $T$. Conversely, every change action $\cstr{A}$
whose change set $\changes{A}$ is freely generated by a finite set corresponds to a semiautomaton.

Other recurring examples of change actions are:
\begin{itemize}
  \item $\cstr{A}_\bot \defeq \cstruct{A}{M}{\lambda(a, \change{a}). a}$, where $M$ is any monoid,
    which we call the \emph{empty} change action on any base set, since it induces no changes at all.
  \item $\cstr{A}_\top \defeq \cstruct{A}{A \rightarrow A}{\ev}$, where $A$ is an arbitrary
    set, $A \rightarrow A$ denotes the set of all functions from $A$ into itself, considered as
    a monoid under composition and $\ev$ is the usual evaluation map. We will call this the
    ``full'' change action on $A$ since it contains every possible non-redundant change.
\end{itemize}
These are particularly relevant because they are, in a sense, the ``smallest'' and ``largest''
change actions that can be imposed on an arbitrary set $A$.

Many other notions in computer science can be understood naturally in terms of change actions,
\emph{e.g.} databases and database updates, files and diffs, Git repositories and commits, even
video compression algorithms that encode a frame as a series of changes to the previous frame.

\subsection{Derivatives}

When we do incremental computation we are usually trying to save ourselves some
work. We have an expensive function $f: A \rightarrow B$, which we've evaluated at some point
$a$. Now we are interested in evaluating $f$ after some change $\change{a}$ to
$a$, but ideally we want to avoid actually computing $f(a \cplus
\change{a})$ directly.

A solution to this problem is a function $\derive{f}: A \times \changes{A}
\rightarrow \changes B$, which given $a$ and $\change{a}$ tells us how to change
$f(a)$ to $f(a \cplus \change{a})$. We call this a \emph{derivative} of a function.

\begin{definition}
  \label{def:derivative}
  Let $\cstr{A}$ and $\cstr{B}$ be change actions.
  A \emph{derivative} of a function $f: A \rightarrow B$ is a function $\derive{f}: A \times \changes{A} \rightarrow
  \changes{B}$ such that
  \begin{displaymath}
    f(a \cplus_A \change{a}) = f(a) \cplus_B \derive{f}(a, \change{a})
  \end{displaymath}
  A function which has a derivative is
  \emph{differentiable}, and we will write $\cstr{A} \rightarrow \cstr{B}$ for
  the set of differentiable functions between $A$ and $B$.\footnote{Note that we do not require that $\derive{f}(a,
    \change{a} \splus \change{b}) = \derive{f}(a, \change{a}) \splus \derive{f}(a
    \cplus \change{a}, \change{b})$ nor that $\derive{f}(a, \mzero) = \mzero$.
    These are natural conditions, and all the
    derivatives we have studied also satisfy them, but none of the results on
    this paper require them to hold.}
\end{definition}

Derivatives need not be unique in general, so we will speak of ``a''
derivative. Functions into ``thin'' change
actions \textemdash{} where $a \cplus \change{a} = a \cplus \change{b}$ implies $\change{a} =
\change{b}$ \textemdash{} have unique derivatives, but many change actions are not thin.
For example, $\cstruct{\powerset{\NN}}{\powerset{\NN}}{\cap}$ is not thin because $\{0\} \cap \{1\} = \{0\}
\cap \{2\}$.

Derivatives capture the structure of incremental computation, but there are
important operational considerations that affect whether using them for
computation actually saves us
any work. As we will see in a moment (\cref{prop:minusDerivatives}), for many
change actions we will have the option
of picking the ``worst'' derivative, which merely computes $f(a \cplus \change{a})$
directly and then works out the change that maps $f(a)$ to this new value.
While this is formally a derivative, using it certainly does not save us any work! We will be concerned
with both the possibility of constructing correct derivatives
(\cref{sec:booleanAlgebras} and \cref{sec:fixpoints} in particular), and also in
giving ourselves a range of derivatives to choose from so that we can soundly
optimize for operational value.

For our Datalog case study, we aim to cash out the folkloric idea that
incremental computation functions via a derivative. We will construct a derivative of
the semantics of Datalog in stages: first the non-recursive
formula semantics (\cref{sec:nonRecursiveDatalog}); and later the full, recursive, semantics
(\cref{sec:recursiveDatalog}).

\subsection{Useful facts about change actions and derivatives}

\subsubsection{The Chain Rule}

The derivative of a function can be computed compositionally, because derivatives satisfy the standard chain rule.

\begin{theorem}[The Chain Rule]
  Let $f: \cstr{A} \rightarrow \cstr{B}$, $g: \cstr{B} \rightarrow \cstr{C}$ be differentiable functions. Then $g \circ f$ is also
  differentiable, with a derivative given by
  \begin{displaymath}
    \derive{(g \circ f)}(x, \change{x}) = \derive{g}\left(f(x), \derive{f}(x, \change{x})\right)
  \end{displaymath}
  or, in curried form
  \begin{displaymath}
    \derive{(g \circ f)}(x) = \derive{g}(f(x)) \circ \derive{f}(x)
  \end{displaymath}
\end{theorem}

\subsubsection{Complete change actions and minus operators}

Complete change actions are an important class of change actions, because they
have changes between \emph{any} two values in the base set.

\begin{definition}
  A change action is \emph{complete} if for any $a, b \in A$, there is
  a change $\change{a} \in \changes{A}$ such that $a \cplus \change{a} = b$.
\end{definition}

Complete change actions have convenient ``minus operators'' that allow us to
compute the difference between two values.

\begin{definition}
  A \emph{minus operator} is a function $\cminus: A \times A \rightarrow
  \changes{A}$ such that $a \cplus (b \cminus a) = b$ for all $a, b \in A$.
\end{definition}

\begin{proposition}
  \label{prop:minusDerivatives}
  Given a minus operator $\cminus$, and a function $f$, let
  \begin{displaymath}
    \derive{f}_\cminus(a, \change{a}) \defeq f(a \cplus \change{a}) \cminus f(a)
  \end{displaymath}
  Then $\derive{f}_\cminus$ is a derivative for $f$.
\end{proposition}

\begin{proposition}
  Let $\cstr{A}$ be a change action. Then the following are equivalent:
  \begin{itemize}
    \item $\cstr{A}$ is complete.
    \item There is a minus operator on $\cstr{A}$.
    \item For any change action $\cstr{B}$ all functions $f: {B} \rightarrow {A}$ are differentiable.
  \end{itemize}
\end{proposition}

This last property is of the utmost importance, since we are often concerned with the differentiability
of functions.

\subsubsection{Products and sums}
\label{sec:prodsum}

Given change actions on sets $A$ and $B$, the question immediately arises of whether there are
change actions on their Cartesian product $A \times B$ or disjoint union $A + B$. While there are
many candidates, there is a clear ``natural'' choice for both.

\begin{rep-proposition}{products}[Products]
  \label{prop:products}
  Let $\cstr{A} = \cstruct{A}{\changes{A}}{\cplus_A}$ and $\cstr{B} =
  \cstruct{B}{\changes{B}}{\cplus_B}$ be change actions.

  Then $\cstr{A} \times \cstr{B} \defeq \cstruct{A \times B}{\changes{A} \times \changes{B}}{\cplus_{\times}}$ is a change action,
  where $\cplus_{\times}$ is defined by:
  \begin{align*}
    (a, b) \cplus_{A \times B} (\change{a}, \change{b}) \defeq (a \cplus_A \change{a}, b \cplus_B \change{b})
  \end{align*}

  The projection maps $\pi_1$,$\pi_2$ are differentiable with respect to it.
  Furthermore, a function
  $f : A \times B \into C$ is differentiable from $\cstr{A} \times \cstr{B}$ into $\cstr{C}$ if
  and only if, for every fixed $a \in A$ and $b \in B$, the partially applied functions
  \begin{align*}
    f(a, \cdot) : B \into C\\
    f(\cdot, b) : A \into C
  \end{align*}
  are differentiable.
\end{rep-proposition}

Whenever $f : A \times B \rightarrow C$ is differentiable, we will sometimes use $\partial_1 f$ and
$\partial_2 f$ to refer to derivatives of the partially applied versions, i.e. if
$f'_a : B \times \changes{B} \rightarrow \changes{C}$ and
$f'_b : A \times \changes{A} \rightarrow \changes{C}$ refer to derivatives for
$f(a, \cdot), f(\cdot, b)$ respectively, then
\begin{gather*}
  \partial_1 f : A \times \changes{A} \times B \rightarrow \changes{C}\\
  \partial_1 f(a, \change{a}, b) \defeq f'_b(a, \change{a})\\
  \partial_2 f : A \times B \times \changes{B} \rightarrow \changes{C}\\
  \partial_2 f(a, b, \change{b}) \defeq f'_a(b, \change{b})
\end{gather*}

\ifproofs
\begin{proof}
  See \cref{prf:products}.
\end{proof}
\fi

\begin{rep-proposition}{disjointUnions}[Disjoint unions]
  \label{prop:disjointUnions}
  Let $\cstr{A} = \cstruct{A}{\changes{A}}{\cplus_A}$ and $\cstr{B} =
  \cstruct{B}{\changes{B}}{\cplus_B}$ be change actions.

  Then $\cstr{A} + \cstr{B} \defeq \cstruct{A + B}{\changes{A} \times
  \changes{B}}{\cplus_{+}}$ is a change action, where $\cplus_{+}$ is defined as:
  \begin{align*}
    \iota_1 a \cplus_{+} (\change{a}, \change{b}) &\defeq \iota_1 (a \cplus_A \change{a})\\
    \iota_2 b \cplus_{+} (\change{a}, \change{b}) &\defeq \iota_2 (b \cplus_B \change{b})
  \end{align*}

  The injection maps $\iota_1, \iota_2$ are differentiable with respect to $\cstr{A} + \cstr{B}$. Furthermore,
  whenever $\cstr{C}$ is a change action and $f : A \rightarrow C, g: B \rightarrow C$ are differentiable,
  then so is $\left[ f, g \right]$.
\end{rep-proposition}
\ifproofs
\begin{proof}
  See \cref{prf:disjointUnions}.
\end{proof}
\fi

\subsection{Comparing change actions}

Much like topological spaces, we can compare change actions on the same
base set according to coarseness. This
is useful since differentiability of functions between change actions is characterized
entirely by the coarseness of the actions.

\begin{definition}
  Let $\cstr{A}_1$ and $ \cstr{A}_2$ be
  change actions on $A$. We say that $\cstr{A}_1$ is \emph{coarser} than $\cstr{A}_2$ (or that $\cstr{A}_2$ is \emph{finer}
  than $\cstr{A}_1$) whenever for every $x \in A$ and change $\change{a}_1 \in
  \changes{A}_1$, there is a change $\change{a}_2 \in
  \changes{A}_2$ such that $x \cplus_{A_1} \change{a}_1 = x \cplus_{A_2} \change{a}_2$.

  We will write $\cstr{A}_1 \leq \cstr{A}_2$ whenever $\cstr{A}_1$ is coarser than $\cstr{A}_2$.
  If $\cstr{A}_1$ is both finer and coarser than $\cstr{A}_2$, we will say that $\cstr{A}_1$
  and $\cstr{A}_2$ are equivalent.
\end{definition}

The relation $\leq$  defines a preorder (but not a partial order) on the set of all change actions
over a fixed set A. Least and greatest elements do exist up to equivalence, and correspond
respectively to the empty change action $\cstr{A}_\bot$ and any complete change
action, such as the full change action $\cstr{A}_\top$,
defined in \cref{sec:changeAction}.

\begin{proposition}
  Let $\cstr{A}_2 \leq \cstr{A}_1$, $\cstr{B}_1 \leq \cstr{B}_2$ be change actions, and suppose
  the function $f : A \rightarrow B$ is differentiable as a function from $\cstr{A}_1$ into
  $\cstr{B}_1$. Then $f$ is differentiable as a function from $\cstr{A}_2$ into $\cstr{B}_2$.
\end{proposition}

A consequence of this fact is that whenever two change actions are equivalent
they can be used interchangeably without affecting which functions are differentiable. One last parallel with topology
is the following result, which establishes a simple criterion for when a change action is coarser than
another:

\begin{proposition}
  Let $\cstr{A}_1, \cstr{A}_2$ be change actions on $A$. Then $\cstr{A}_1$ is coarser than $\cstr{A}_2$
  if and only if the identity function $\id : A \rightarrow A$ is differentiable from $\cstr{A}_1$ to
  $\cstr{A}_2$.
\end{proposition}


\section{Posets and Boolean algebras}
\label{sec:moreStructures}

The semantic domain of Datalog is a complete Boolean algebra, and so our next
step is to construct a good change action for Boolean algebras. Along the way, we
will consider change actions over posets, which give us the ability to
\emph{approximate} derivatives, which will turn out to be very important in practice.

\subsection{Posets}

Ordered sets give us a constrained class of functions: monotone
functions. We can define \emph{ordered} change actions, which are those that
are well-behaved with respect to the order on the underlying set.\footnote{If we were giving a presentation that was
generic in the base category, then this would simply be the definition of being
a change action in the category of posets and monotone maps.}

\begin{definition}
  A change action $\cstr{A}$ is \emph{ordered} if
  \begin{itemize}
    \item $A$ and $\changes{A}$ are posets.
    \item $\cplus$ is monotone as a map from $A \times \changes{A} \rightarrow A$
    \item $\splus$ is monotone as a map from $\changes{A} \times \changes{A} \rightarrow \changes{A}$
  \end{itemize}
\end{definition}

In fact, any change action whose base set is a poset induces a partial order
on the corresponding change set:

\begin{definition}
  $\change{a} \changeOrder \change{b}$ iff for all $a \in A$ it is the case that
  $a \cplus \change{a} \leq a \cplus \change{b}$.
\end{definition}

\begin{proposition}
  Let $\cstr{A}$ be a change action on a set $A$ equipped with a partial order $\leq$ such that
  $\cplus$ is monotone in its first argument. Then $\cstr{A}$ is an ordered change action when
  $\changes{A}$ is equipped with the partial order $\changeOrder$.
\end{proposition}

In what follows, we will extend the partial order $\changeOrder$ on some change
set $\changes{B}$ pointwise to functions from some $A$ into $\changes{B}$. This pointwise
order interacts nicely with derivatives, in that it gives us the following lemma:

\begin{theorem}[Sandwich lemma]
  \label{theorem:sandwich}
  Let $\cstr{A}$ be a change action, and $\cstr{B}$ be an ordered change action,
  and let
  $f: {A} \rightarrow {B}$ and $g: A \times \changes{A} \rightarrow
  \changes{B}$ be function. If $\supderive{f}$ and $\subderive{f}$ are
  derivatives for $f$ such that
  \begin{displaymath}
    \subderive{f} \changeOrder g \changeOrder \supderive{f}
  \end{displaymath}
  then $g$ is a derivative for $f$.
\end{theorem}

If unique minimal and maximal derivatives exist, then this gives us a
characterisation of all the derivatives for a function.

\begin{theorem}
\label{theorem:derivativeCharacterization}
  Let $\cstr{A}$ and $\cstr{B}$ be change actions, with $\cstr{B}$ ordered, and let
  $f: {A} \rightarrow {B}$ be a function. If there exist $\subderiveM{f}$ and
  $\supderiveM{f}$ which are unique minimal and maximal derivatives of $f$,
  respectively, then the derivatives of $f$ are precisely
  the functions $\derive{f}$ such that
  \begin{displaymath}
    \subderiveM{f} \changeOrder \derive{f} \changeOrder \supderiveM{f}
  \end{displaymath}
\end{theorem}
\ifproofs
\begin{proof}
  Follows easily from \cref{theorem:sandwich} and minimality/maximality.
\end{proof}
\fi

This theorem gives us the leeway that we need when trying to pick a derivative: we can pick out the
bounds, and that tells us how much ``wiggle room'' we have above and below.

\subsection{Boolean algebras}
\label{sec:booleanAlgebras}

Complete Boolean algebras are a particularly nice domain for change actions
because they have a negation operator. This is very helpful for computing
differences, and indeed Boolean algebras have a complete change action.

\begin{rep-proposition}{lsuperpose}[Boolean algebra change actions]
 Let $L$ be a complete Boolean algebra. Define
  \begin{displaymath}
    \cstr{L}_\superpose \defeq \cstruct{L}{L \disjointTimes L}{\twist}
  \end{displaymath}
  where
  \begin{align*}
    L \disjointTimes L &\defeq \{ (a, b) \in L \times L \mid a \wedge b = \bot \}\\
    a \twist (p, q) &\defeq (a \vee p) \wedge \neg q
  \end{align*}
  \begin{displaymath}
    (p, q) \splus (r, s) \defeq ((p \wedge \neg s) \vee r, (q \wedge \neg r) \vee s)
  \end{displaymath}
  with identity element $(\bot, \bot)$.

  Then $\cstr{L}_\superpose$ is a complete change action on $L$.
\end{rep-proposition}
\ifproofs
\begin{proof}
  See \cref{prf:lsuperpose}.
\end{proof}
\fi

We can think of $\cstr{L}_\superpose$ as tracking changes as pairs of ``upwards'' and
``downwards'' changes, where the monoid action simply applies one after the
other, with an adjustment to make sure that the components remain
disjoint.\footnote{The
  intuition that $\cstr{L}_\superpose$ is made up of an ``upwards''
  and a ``downwards'' change action glued together can in fact be made precise, but the specifics
  are outside the scope of this paper.} For example, in the powerset Boolean
algebra $\mathcal{P}(\NN)$, a change to $\{ 1, 2 \}$
might consist of \emph{adding} $\{ 3 \}$ and \emph{removing} $\{ 1 \}$,
producing $\{ 2, 3 \}$. In $\mathcal{P}(\NN)_\superpose$ this would be
represented as $(\{ 1, 2 \}) \cplus
(\{ 3 \}, \{ 1 \}) = \{ 2, 3 \}$.

Boolean algebras also have unique maximal and minimal
derivatives, under the usual partial order based on implication. The change
set is, as usual, given the change partial order, which in this case corresponds to
the natural order on $L \times L^{\textrm{op}}$.

\begin{proposition}
  \label{prop:minimalMaximalDerivatives}
  Let $L$ be a complete Boolean algebra with the $\cstr{L}_\superpose$ change action, and
  $f: A \rightarrow L$ be a function.
  Then, the following are minus operators:
  \begin{align*}
    a \cminus_\bot b &= (a \wedge \neg b, \neg a)\\
    a \cminus_\top b &= (a, b \wedge \neg a)
  \end{align*}
  Additionally, $\derive{f}_{\cminus_{\bot}}$ and $\derive{f}_{\cminus_{\top}}$
  define unique least and greatest derivatives for $f$.
\end{proposition}

\Cref{theorem:derivativeCharacterization} then gives us bounds for
all the derivatives on Boolean algebras:

\begin{corollary}
\label{cor:booleanCharacterization}
  Let $L$ be a complete Boolean algebra with the corresponding change action
  $\cstr{L}_\superpose$, $\cstr{A}$ be an arbitrary change action, and $f: A \rightarrow
  L$ be a function. Then the derivatives of $f$ are precisely those functions
  $\derive{f}: A \times \changes{A} \rightarrow \changes{A}$ such that
  \begin{displaymath}
    \derive{f}_{\cminus_{\bot}}
    \changeOrder
    \derive{f}
    \changeOrder
    \derive{f}_{\cminus_{\top}}
  \end{displaymath}
\end{corollary}

This makes \cref{theorem:derivativeCharacterization} actually usable in practice, since
we have concrete definitions for our bounds (which we will make use of in \cref{sec:datalogDifferentiability}).

\section{Derivatives for non-recursive Datalog}
\label{sec:nonRecursiveDatalog}

We now want to apply the theory we have developed to the specific case of the semantics
of Datalog. Giving a differentiable semantics for Datalog will
lead us to a strategy for performing incremental evaluation and maintenance of Datalog programs.
To begin with, we will restrict ourselves to the non-recursive fragment of the
language \textemdash{} the formulae that make up the right hand sides of Datalog
rules. We will tackle the full program semantics in a later section, once we
know how to handle fixpoints.

Although the techniques we are using should work for any language, Datalog
provides a non-trivial case study where the need for incremental computation is
real and pressing, as we saw in \cref{sec:intro}.

\subsection{Semantics of Datalog formulae}

Datalog is usually given a logical semantics where formulae are interpreted as first-order
logic predicates and the semantics of a program is the set of models of its constituent
predicates. We will instead give a simple denotational semantics (as is
typical when working with fixpoints, see e.g. \cite{compton1994stratified}) that treats a Datalog
formula as directly denoting a relation, i.e.
a set of named tuples, with variables ranging over a finite schema.

\begin{definition}
  A \emph{schema} $\Gamma$ is a finite set of names. A \emph{named tuple} over $\Gamma$ is an assignment
  of a value $v_i$ for each name $x_i$ in $\Gamma$. Given disjoint schemata
  $\Gamma = \{ x_1, \ldots, x_n \}$ and $\Sigma = \{ y_1, \ldots, y_m \}$,
  the \emph{selection function} $\sigma_\Gamma$ is defined as
  \begin{displaymath}
    \sigma_\Gamma(\{x_1 \mapsto v_1, \ldots, x_n \mapsto v_n, y_1 \mapsto w_1, \ldots, y_m \mapsto w_m \})
    \defeq \{ x_1 \mapsto v_1, \ldots, x_n \mapsto v_n \}
  \end{displaymath}
  i.e. $\sigma_\Gamma$ restricts a named tuple over $\Gamma \cup \Sigma$ into a tuple over $\Gamma$
  with the same values for the names in $\Gamma$.
  We denote the elementwise extension of $\sigma_\Gamma$ to sets of tuples also as $\sigma_\Gamma$.
\end{definition}

We will adopt the usual closed-world assumption to give a denotation to negation.

\begin{definition}
  For any schema $\Gamma$,
  there exists a universal relation $\universalRel_\Gamma$.
  Negation on relations can then be defined as
  \begin{displaymath}
    \neg R \defeq \universalRel_\Gamma \setminus R
  \end{displaymath}
\end{definition}

This makes $\Rel_\Gamma$, the set of all subsets of $\universalRel_\Gamma$,
a complete Boolean algebra.

\begin{definition}
  A Datalog formula $T$ whose free term variables are contained in $\Gamma$ denotes a function from
  $\Rel_\Gamma^n$ to $\Rel_\Gamma$.
  \begin{displaymath}
    \denote{\_}_\Gamma : \Formula \rightarrow \Rel_\Gamma^n \rightarrow \Rel_\Gamma
  \end{displaymath}
  If $\semR = (\semR_1, \ldots, \semR_n)$ is a choice of a relation $\semR_i$ for each of the variables $R_i$,
  $\denote{T}(\semR)$ is inductively defined according to the rules in \cref{fig:datalogSemantics}.
\end{definition}

\begin{figure}
  \fbox{
    \begin{minipage}[t]{0.9\textwidth}
      \vspace{-5pt}
      \begin{minipage}[t]{.45\textwidth}
      \begin{align*}
        \sem{\top}_\Gamma(\semR) &\defeq \universalRel_\Gamma\\
        \sem{\bot}_\Gamma(\semR) &\defeq \emptyset\\
        \sem{R_j}_\Gamma(\semR) &\defeq \semR_j\\
      \end{align*}
    \end{minipage}\hfill\noindent
    \begin{minipage}[t]{.45\textwidth}
      \begin{align*}
        \sem{T \wedge U}_\Gamma(\semR) &\defeq \sem{T}_\Gamma(\semR) \cap \sem{U}_\Gamma(\semR)\\
        \sem{T \vee U}_\Gamma(\semR) &\defeq \sem{T}_\Gamma(\semR) \cup \sem{U}_\Gamma(\semR)\\
        \sem{\neg T}_\Gamma(\semR) &\defeq \neg \sem{T}_\Gamma(\semR)\\
      \end{align*}
    \end{minipage}
    \vspace{-12pt}
    \begin{align*}
      \sem{\exists x . T}_\Gamma(\semR) &\defeq \sigma_\Gamma(\sem{T}_{\Gamma \cup \{ x \}}(\semR))
    \end{align*}
    \end{minipage}
  }
  \caption{Formula semantics for Datalog}
  \label{fig:datalogSemantics}
  \vspace{-12pt}
\end{figure}

Since $\Rel_\Gamma$ is a complete Boolean algebra, and so is $\Rel_\Gamma^n$, $\denote{T}_\Gamma$ is
a function between complete Boolean algebras. For brevity, we will often leave the schema implicit,
as it is clear from the context.

\subsection{Differentiability of Datalog formula semantics}
\label{sec:datalogDifferentiability}

In order to actually perform our incremental computation, we first need to provide a concrete derivative for the semantics
of Datalog formulae. Of course, since $\denote{T}_\Gamma$ is a function between the complete Boolean algebras
$\Rel_\Gamma^n$ and
$\Rel_\Gamma$, and we know that the corresponding change actions
$\widehat{\Rel_\Gamma^n}_\superpose$ and $\widehat{\Rel_\Gamma}_\superpose$
are complete, this guarantees the existence of a derivative for $\denote{T}$.

Unfortunately, this does not necessarily provide us with an \emph{efficient}
derivative for $\denote{T}$. The derivatives that we know how to compute
(\cref{cor:booleanCharacterization}) rely on computing $f(a \cplus \change{a})$
itself, which is the very thing we were trying to avoid computing!

Of course, given a concrete definition of a derivative we can simplify this
expression and hopefully make it easier to compute. But we also know from
\cref{cor:booleanCharacterization} that \emph{any} function bounded by
$\derive{f}_{\cminus_\bot}$ and $\derive{f}_{\cminus_\top}$ is a valid
derivative,
and we can therefore optimize anywhere within that range to make a
trade-off between ease of computation and precision.\footnote{The idea of using an approximation
to the precise derivative, and a soundness condition, appears in Bancilhon \cite{bancilhon1986amateur}.}

There is also the question of how to compute the derivative. Since the change
set for $\widehat{\Rel}_\superpose$ is a subset of $\Rel \times \Rel$, it
is possible and indeed very natural to compute the two components via a pair of
Datalog formulae, which allows us to reuse an existing Datalog formula
evaluator. Indeed, if this process is occurring in an optimizing compiler,
the derivative formulae can themselves be optimized. This is very
beneficial in practice, since the initial formulae may be quite complex.

This does give us additional constraints that the derivative formulae must satisfy:
for example, we need to be able to evaluate them; and we may wish to pick formulae that will be easy or cheap
for our evaluation engine to compute, even if they compute a less precise derivative.

The upshot of these considerations is that the optimal choice of derivatives is likely
to be quite dependent on the precise variant of Datalog being evaluated, and the
specifics of the evaluation engine. Here is one possibility, which is the one used at Semmle.

\subsubsection{A concrete Datalog formula derivative}
%\newcommand{\bothdiff}{\diamond}
\newcommand{\bothdiff}{{\mathsf X}}

\begin{figure}[t]
  \fbox{
    \begin{minipage}[t]{0.9\textwidth}
    \vspace{-5pt}
    \begin{minipage}[t]{.45\textwidth}
      \begin{align*}
        \updiff(\bot) &\defeq \bot\\
        \updiff(\top) &\defeq \bot\\
        \updiff(R_j) &\defeq \updiff R_j \\
        \updiff(T\vee U) &\defeq \updiff(T) \vee \updiff (U)\\
        \updiff(T\wedge U) &\defeq (\updiff(T)\wedge \bothdiff(U))\\
                            & \vee (\updiff(U) \wedge \bothdiff(T))\\
        \updiff(\neg T) &\defeq \downdiff(T)\\
        \updiff(\exists x.T) &\defeq \exists x.\updiff(T)
      \end{align*}
    \end{minipage}\hfill\noindent
    \begin{minipage}[t]{.45\textwidth}
      \begin{align*}
        \downdiff(\bot) &\defeq \bot\\
        \downdiff(\top) &\defeq \bot\\
        \downdiff(R_j) &\defeq \downdiff R_j \\
        \downdiff(T\vee U) &\defeq (\downdiff(T) \wedge \neg \bothdiff(U))\\
                              & \vee (\downdiff(U) \wedge \neg \bothdiff(T))\\
        \downdiff(T\wedge U) &\defeq (\downdiff(T)\wedge U) \vee (T \wedge \downdiff(U))\\
        \downdiff(\neg T) &\defeq \updiff(T)\\
        \downdiff(\exists x.T) &\defeq \exists x.\downdiff(T) \wedge \neg \exists x.\bothdiff(T)
      \end{align*}
    \end{minipage}
    \vspace{6pt}
    \begin{align*}
      \bothdiff(R) \defeq (R \vee \updiff(R)) \wedge \neg \downdiff(R)
    \end{align*}
    \end{minipage}
  }
  \caption{Upwards and downwards formula derivatives for Datalog}
  \label{fig:datalogDerivatives}
  \vspace{-12pt}
\end{figure}

In~\cref{fig:datalogDerivatives}, we define a ``symbolic'' derivative operator as a pair of mutually recursive functions,
$\updiff$ and $\downdiff$, which turn a Datalog formula $T$ into new formulae that compute
the upwards and downwards parts of the derivative, respectively.
Our definition uses an auxiliary function, $\bothdiff$, which computes the ``neXt'' value of a term by applying the upwards and downwards derivatives.
As is typical for a derivative, the new formulae will have additional free relation variables
for the upwards and downwards derivatives of the free relation variables of $T$,
denoted as $\updiff R$ and $\downdiff R$ respectively. Evaluating the formula as
a derivative means evaluating it as a normal Datalog formula with the new
relation variables set to the input relation changes.

While the definitions mostly exhibit the dualities we would expect
between corresponding operators, there are a few asymmetries to explain.

The asymmetry between the cases for $\updiff(T \vee U)$ and
$\downdiff(T \wedge U)$ is for operational reasons. The symmetrical version of
$\updiff(T \vee U)$ is $(\updiff(T) \wedge \neg U) \vee (\updiff(U) \wedge \neg
T)$ (which is also precise). The reason we omit the negated conjuncts is simply
that they are costly to compute and not especially helpful to our evaluation engine.

The asymmetry between the cases for $\exists$ is because our
dialect of Datalog does not have a primitive universal quantifier.
If we did have one, the cases for $\exists$ would be dual to the corresponding
cases for $\forall$.

\newcommand{\bothchanges}{\rho}
\begin{rep-theorem}{concreteDatalog}[Concrete Datalog formula derivatives]
\label{theorem:concreteDatalog}
  Let $\updiff$, $\downdiff$, $\bothdiff : \Formula \rightarrow \Formula$ be mutually recursive functions
  defined by structural induction as in \cref{fig:datalogDerivatives}.

  Then $\updiff(T)$ and $\downdiff(T)$ are disjoint, and for any schema $\Gamma$
  and any Datalog formula $T$ whose free term variables are contained in $\Gamma$,
  $\derive{\denote{T}_\Gamma} \defeq (\denote{\updiff(T)}_\Gamma, \denote{\downdiff(T)}_\Gamma)$
  is a derivative for $\denote{T}_\Gamma$.
\end{rep-theorem}
\ifproofs
\begin{proof}
  See \cref{prf:concreteDatalog}
\end{proof}
\fi

We can give a derivative for our $treeP$ predicate by mechanically applying
the recursive functions defined in \cref{fig:datalogDerivatives}.
\begin{align*}
  &\updiff(treeP(x))\\
  &= p(x) \wedge \exists y. (child(x, y) \wedge \updiff(treeP(y))) \wedge \neg \exists y. (child(x,y) \wedge \neg \bothdiff(treeP(y)))\\
  \\
  &\downdiff(treeP(x))\\
  &= p(x) \wedge \exists y. (child(x, y) \wedge \downdiff(treeP(y)))
\end{align*}

The upwards difference in particular is not especially easy to compute. If we naively compute it, the
third conjunct requires us to recompute the whole of the recursive part. However,
the second conjunct gives us a
guard: if it is empty we then the whole formula will be, so we only need to evaluate the third conjunct if the second conjunct is
non-empty, i.e if there is \emph{some} change in the body of the existential.

This shows that our derivatives aren't a panacea: it is simply \emph{hard} to compute
downwards differences for $\exists$ (and, equivalently, upwards differences for
$\forall$) because we must check that there is no other way of deriving the same
facts.\footnote{The ``support'' data structures introduced by
  \cite{gupta1993maintaining} are an attempt to avoid this issue by
  tracking the number of derivations of each tuple.} However, we can still avoid
the re-evaluation in many cases, and the inefficiency is local to this subformula.

\subsection{Extensions to Datalog}
\label{sec:extensions}

Our formulation of Datalog formula semantics and derivatives is
generic and modular, so it is easy to extend the language with new
formula constructs: all we need to do is add cases for $\updiff$ and $\downdiff$.