Skip to content

Commit 4aa4da1

Browse files
committed
Keep inotify directory baseline when a snapshot fails
A directory watch snapshots the child-name set on each change and diffs it against the previous baseline to recover the name kqueue omits. dir_snapshot returned an empty list on any failure (opendir error, a mid-read readdir error, or an allocation failure), which the diff could not tell apart from a genuinely empty directory. On a transient failure the IN_DELETE pass then saw every known child as missing and queued a spurious IN_DELETE for each, and the baseline was overwritten with the empty list, so every later change re-reported the whole directory as IN_CREATE. The corruption was permanent. Make dir_snapshot return bool: on failure it frees any partial result and reports false, and readdir read errors are now detected by clearing errno before each call. process_vnode_event only diffs against and advances to a snapshot that succeeded; on failure it keeps the previous baseline so the next successful snapshot reconciles whatever changed in between. The watch-add path documents that a failed initial snapshot is a best-effort empty baseline. Validated with make check on Apple Silicon; test-inotify still passes 6/6.
1 parent b6e2122 commit 4aa4da1

1 file changed

Lines changed: 83 additions & 43 deletions

File tree

src/syscall/inotify.c

Lines changed: 83 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -303,51 +303,77 @@ static void pipe_drain(inotify_instance_t *inst)
303303
;
304304
}
305305

306-
/* Snapshot the entry names of a directory (excluding "." and ".."). On return
307-
* *out is a malloc'd array of malloc'd strings with *n_out entries (free with
308-
* free_dir_snapshot). On any failure the snapshot is left empty.
306+
static void free_dir_snapshot(char **entries, int n)
307+
{
308+
if (!entries)
309+
return;
310+
for (int i = 0; i < n; i++)
311+
free(entries[i]);
312+
free(entries);
313+
}
314+
315+
/* Snapshot the entry names of a directory (excluding "." and ".."). On success
316+
* *out is a malloc'd array of *n_out malloc'd strings (free with
317+
* free_dir_snapshot) and the function returns true. On any failure -- the
318+
* directory could not be opened, a readdir read error occurred, or an
319+
* allocation failed -- it frees any partial result, leaves *out NULL / *n_out
320+
* 0, and returns false. Callers must distinguish a failed snapshot from a
321+
* genuinely empty directory: treating a failure as "empty" would diff every
322+
* known child as deleted and wipe the baseline.
309323
*/
310-
static void dir_snapshot(const char *path, char ***out, int *n_out)
324+
static bool dir_snapshot(const char *path, char ***out, int *n_out)
311325
{
312326
*out = NULL;
313327
*n_out = 0;
314328

315329
DIR *d = opendir(path);
316330
if (!d)
317-
return;
331+
return false;
318332

319333
char **names = NULL;
320334
int n = 0, cap = 0;
321-
struct dirent *de;
322-
while ((de = readdir(d)) != NULL) {
335+
bool ok = true;
336+
for (;;) {
337+
/* readdir returns NULL both at end-of-stream and on error; reset
338+
* errno immediately before each call so a non-zero errno afterwards
339+
* unambiguously signals a read error rather than EOF.
340+
*/
341+
errno = 0;
342+
struct dirent *de = readdir(d);
343+
if (!de) {
344+
if (errno != 0)
345+
ok = false;
346+
break;
347+
}
323348
if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
324349
continue;
325350
if (n == cap) {
326351
int ncap = cap ? cap * 2 : 16;
327352
char **tmp = realloc(names, (size_t) ncap * sizeof(char *));
328-
if (!tmp)
353+
if (!tmp) {
354+
ok = false;
329355
break;
356+
}
330357
names = tmp;
331358
cap = ncap;
332359
}
333360
names[n] = strdup(de->d_name);
334-
if (!names[n])
361+
if (!names[n]) {
362+
ok = false;
335363
break;
364+
}
336365
n++;
337366
}
338367
closedir(d);
339368

369+
if (!ok) {
370+
free_dir_snapshot(names, n);
371+
return false;
372+
}
373+
340374
*out = names;
341375
*n_out = n;
342-
}
343-
344-
static void free_dir_snapshot(char **entries, int n)
345-
{
346-
if (!entries)
347-
return;
348-
for (int i = 0; i < n; i++)
349-
free(entries[i]);
350-
free(entries);
376+
return true;
351377
}
352378

353379
static bool snapshot_contains(char *const *entries, int n, const char *name)
@@ -379,33 +405,42 @@ static int process_vnode_event(inotify_instance_t *inst,
379405
if (w->is_dir && (fflags & NOTE_WRITE) && w->path) {
380406
char **now = NULL;
381407
int now_n = 0;
382-
dir_snapshot(w->path, &now, &now_n);
383-
384-
for (int j = 0; j < now_n && !overflow; j++) {
385-
if ((w->mask & IN_CREATE) &&
386-
!snapshot_contains(w->entries, w->n_entries, now[j])) {
387-
if (queue_event(inst, w->wd, IN_CREATE, 0, now[j]) < 0)
388-
overflow = true;
389-
else
390-
queued++;
408+
/* Only diff against -- and advance to -- a snapshot that succeeded.
409+
* On a transient opendir/readdir failure dir_snapshot returns false
410+
* with an empty result; treating that as the live directory state
411+
* would emit a spurious IN_DELETE for every known child and leave the
412+
* baseline empty, so every later change would re-report the whole tree
413+
* as IN_CREATE. Keeping the previous baseline instead lets the next
414+
* successful snapshot reconcile whatever was missed this round.
415+
*/
416+
if (dir_snapshot(w->path, &now, &now_n)) {
417+
for (int j = 0; j < now_n && !overflow; j++) {
418+
if ((w->mask & IN_CREATE) &&
419+
!snapshot_contains(w->entries, w->n_entries, now[j])) {
420+
if (queue_event(inst, w->wd, IN_CREATE, 0, now[j]) < 0)
421+
overflow = true;
422+
else
423+
queued++;
424+
}
391425
}
392-
}
393-
for (int j = 0; j < w->n_entries && !overflow; j++) {
394-
if ((w->mask & IN_DELETE) &&
395-
!snapshot_contains(now, now_n, w->entries[j])) {
396-
if (queue_event(inst, w->wd, IN_DELETE, 0, w->entries[j]) < 0)
397-
overflow = true;
398-
else
399-
queued++;
426+
for (int j = 0; j < w->n_entries && !overflow; j++) {
427+
if ((w->mask & IN_DELETE) &&
428+
!snapshot_contains(now, now_n, w->entries[j])) {
429+
if (queue_event(inst, w->wd, IN_DELETE, 0, w->entries[j]) <
430+
0)
431+
overflow = true;
432+
else
433+
queued++;
434+
}
400435
}
401-
}
402436

403-
/* Advance the snapshot regardless: the directory state has moved on,
404-
* and any names dropped under overflow are covered by IN_Q_OVERFLOW.
405-
*/
406-
free_dir_snapshot(w->entries, w->n_entries);
407-
w->entries = now;
408-
w->n_entries = now_n;
437+
/* Advance the snapshot: the directory state has moved on, and any
438+
* names dropped under overflow are covered by IN_Q_OVERFLOW.
439+
*/
440+
free_dir_snapshot(w->entries, w->n_entries);
441+
w->entries = now;
442+
w->n_entries = now_n;
443+
}
409444
}
410445

411446
if (!overflow) {
@@ -564,7 +599,12 @@ int64_t sys_inotify_add_watch(guest_t *g,
564599
int wn = 0;
565600
if (is_dir) {
566601
wpath = strdup(path);
567-
dir_snapshot(path, &wentries, &wn);
602+
/* A failed initial snapshot leaves an empty baseline -- the best we
603+
* can do at watch-add time -- so the first change may report existing
604+
* children as IN_CREATE once. That self-corrects and never wipes a
605+
* populated baseline, which is the case the runtime diff guards.
606+
*/
607+
(void) dir_snapshot(path, &wentries, &wn);
568608
}
569609

570610
pthread_mutex_lock(&inotify_lock);

0 commit comments

Comments
 (0)