2929#include <errno.h>
3030#include <limits.h>
3131#include <pthread.h>
32+ #include <dirent.h>
33+ #include <stdlib.h>
3234#include <sys/event.h>
3335#include <sys/stat.h>
3436#include <sys/uio.h>
@@ -83,6 +85,11 @@ typedef struct {
8385 bool is_dir ; /* true if watching a directory */
8486 dev_t dev ; /* Device ID (for re-add lookup by inode) */
8587 ino_t ino ; /* Inode number (for re-add lookup by inode) */
88+ /* Dir watches only: path + entry-name snapshot, diffed on change to
89+ * recover the child name kqueue omits. NULL/0 for file watches. */
90+ char * path ;
91+ char * * entries ;
92+ int n_entries ;
8693} inotify_watch_t ;
8794
8895typedef struct {
@@ -296,8 +303,133 @@ static void pipe_drain(inotify_instance_t *inst)
296303 ;
297304}
298305
306+ /* Snapshot the entry names of a directory (excluding "." and ".."). On return
307+ * *out is a malloc'd array of malloc'd strings with *n_out entries (free with
308+ * free_dir_snapshot). On any failure the snapshot is left empty.
309+ */
310+ static void dir_snapshot (const char * path , char * * * out , int * n_out )
311+ {
312+ * out = NULL ;
313+ * n_out = 0 ;
314+
315+ DIR * d = opendir (path );
316+ if (!d )
317+ return ;
318+
319+ char * * names = NULL ;
320+ int n = 0 , cap = 0 ;
321+ struct dirent * de ;
322+ while ((de = readdir (d )) != NULL ) {
323+ if (!strcmp (de -> d_name , "." ) || !strcmp (de -> d_name , ".." ))
324+ continue ;
325+ if (n == cap ) {
326+ int ncap = cap ? cap * 2 : 16 ;
327+ char * * tmp = realloc (names , (size_t ) ncap * sizeof (char * ));
328+ if (!tmp )
329+ break ;
330+ names = tmp ;
331+ cap = ncap ;
332+ }
333+ names [n ] = strdup (de -> d_name );
334+ if (!names [n ])
335+ break ;
336+ n ++ ;
337+ }
338+ closedir (d );
339+
340+ * out = names ;
341+ * n_out = n ;
342+ }
343+
344+ static void free_dir_snapshot (char * * entries , int n )
345+ {
346+ if (!entries )
347+ return ;
348+ for (int i = 0 ; i < n ; i ++ )
349+ free (entries [i ]);
350+ free (entries );
351+ }
352+
353+ static bool snapshot_contains (char * const * entries , int n , const char * name )
354+ {
355+ for (int i = 0 ; i < n ; i ++ )
356+ if (!strcmp (entries [i ], name ))
357+ return true;
358+ return false;
359+ }
360+
299361/* Collect events from kqueue. */
300362
363+ /* Translate one EVFILT_VNODE notification into queued inotify events for the
364+ * watch on host_fd. Returns the number queued, or -1 on buffer overflow (an
365+ * IN_Q_OVERFLOW marker is queued). Caller holds inotify_lock.
366+ */
367+ static int process_vnode_event (inotify_instance_t * inst ,
368+ int host_fd ,
369+ uint32_t fflags )
370+ {
371+ int widx = watch_find_by_hostfd (inst , host_fd );
372+ if (widx < 0 )
373+ return 0 ;
374+
375+ inotify_watch_t * w = & inst -> watches [widx ];
376+ int queued = 0 ;
377+ bool overflow = false;
378+
379+ if (w -> is_dir && (fflags & NOTE_WRITE ) && w -> path ) {
380+ char * * now = NULL ;
381+ int now_n = 0 ;
382+ dir_snapshot (w -> path , & now , & now_n );
383+
384+ for (int j = 0 ; j < now_n && !overflow ; j ++ ) {
385+ if ((w -> mask & IN_CREATE ) &&
386+ !snapshot_contains (w -> entries , w -> n_entries , now [j ])) {
387+ if (queue_event (inst , w -> wd , IN_CREATE , 0 , now [j ]) < 0 )
388+ overflow = true;
389+ else
390+ queued ++ ;
391+ }
392+ }
393+ for (int j = 0 ; j < w -> n_entries && !overflow ; j ++ ) {
394+ if ((w -> mask & IN_DELETE ) &&
395+ !snapshot_contains (now , now_n , w -> entries [j ])) {
396+ if (queue_event (inst , w -> wd , IN_DELETE , 0 , w -> entries [j ]) < 0 )
397+ overflow = true;
398+ else
399+ queued ++ ;
400+ }
401+ }
402+
403+ /* Advance the snapshot regardless: the directory state has moved on,
404+ * and any names dropped under overflow are covered by IN_Q_OVERFLOW.
405+ */
406+ free_dir_snapshot (w -> entries , w -> n_entries );
407+ w -> entries = now ;
408+ w -> n_entries = now_n ;
409+ }
410+
411+ if (!overflow ) {
412+ uint32_t in_mask = notes_to_in_mask (fflags , w -> mask , w -> is_dir );
413+ /* The per-child create/delete is emitted by the diff above; only emit
414+ * the bare-mask event for file watches or non-create/delete changes.
415+ */
416+ if (in_mask != 0 &&
417+ !(w -> is_dir && (in_mask & (IN_CREATE | IN_DELETE )))) {
418+ if (queue_event (inst , w -> wd , in_mask , 0 , NULL ) < 0 )
419+ overflow = true;
420+ else
421+ queued ++ ;
422+ }
423+ }
424+
425+ if (overflow ) {
426+ /* IN_Q_OVERFLOW (0x4000) uses wd=-1 per Linux semantics. */
427+ queue_event (inst , -1 , 0x4000 , 0 , NULL );
428+ return -1 ;
429+ }
430+ return queued ;
431+ }
432+
301433/* Poll the kqueue for pending vnode events and translate them into
302434 * inotify events in the instance buffer. Returns the number of
303435 * events collected.
@@ -312,35 +444,19 @@ static int collect_events(inotify_instance_t *inst)
312444 return 0 ;
313445
314446 int collected = 0 ;
447+ bool overflow = false;
315448 for (int i = 0 ; i < nev ; i ++ ) {
316- int host_fd = (int ) kevs [i ].ident ;
317- int widx = watch_find_by_hostfd (inst , host_fd );
318- if (widx < 0 )
319- continue ;
320-
321- inotify_watch_t * w = & inst -> watches [widx ];
322- uint32_t in_mask =
323- notes_to_in_mask ((uint32_t ) kevs [i ].fflags , w -> mask , w -> is_dir );
324- if (in_mask == 0 )
325- continue ;
326-
327- /* Queue event without a filename for file watches. For directory
328- * watches, inotify emulation also omits the filename since kqueue
329- * EVFILT_VNODE does not report which child changed.
330- */
331- if (queue_event (inst , w -> wd , in_mask , 0 , NULL ) == 0 ) {
332- collected ++ ;
333- } else {
334- /* Fixed inotify queue is full; queue IN_Q_OVERFLOW and stop.
335- * IN_Q_OVERFLOW (0x4000) uses wd=-1 per Linux semantics.
336- */
337- queue_event (inst , -1 , 0x4000 , 0 , NULL );
449+ int r = process_vnode_event (inst , (int ) kevs [i ].ident ,
450+ (uint32_t ) kevs [i ].fflags );
451+ if (r < 0 ) {
452+ overflow = true;
338453 break ;
339454 }
455+ collected += r ;
340456 }
341457
342458 /* Signal the self-pipe so poll/epoll sees readability */
343- if (collected > 0 )
459+ if (collected > 0 || overflow )
344460 pipe_signal (inst );
345461
346462 return collected ;
@@ -438,12 +554,27 @@ int64_t sys_inotify_add_watch(guest_t *g,
438554 /* Strip IN_MASK_ADD control flag before storing */
439555 uint32_t event_mask = mask & ~(uint32_t ) IN_MASK_ADD ;
440556
557+ /* For directory watches, snapshot the path + current entries up-front
558+ * (outside the lock) so collect_events can diff on each change to emit
559+ * named IN_CREATE/IN_DELETE. Ownership moves to the watch slot on success;
560+ * every early-exit path below frees these.
561+ */
562+ char * wpath = NULL ;
563+ char * * wentries = NULL ;
564+ int wn = 0 ;
565+ if (is_dir ) {
566+ wpath = strdup (path );
567+ dir_snapshot (path , & wentries , & wn );
568+ }
569+
441570 pthread_mutex_lock (& inotify_lock );
442571
443572 int slot = inotify_find (inotify_fd );
444573 if (slot < 0 ) {
445574 pthread_mutex_unlock (& inotify_lock );
446575 close (host_fd );
576+ free_dir_snapshot (wentries , wn );
577+ free (wpath );
447578 return - LINUX_EBADF ;
448579 }
449580
@@ -466,8 +597,12 @@ int64_t sys_inotify_add_watch(guest_t *g,
466597 uint32_t snapshot_mask = w -> mask ; /* Snapshot before unlock */
467598 pthread_mutex_unlock (& inotify_lock );
468599
469- /* Close the duplicate fd; inotify emulation keeps the original */
600+ /* Close the duplicate fd; inotify emulation keeps the original.
601+ * The existing watch keeps its snapshot; drop this call's copy.
602+ */
470603 close (host_fd );
604+ free_dir_snapshot (wentries , wn );
605+ free (wpath );
471606
472607 /* Update kevent filter with the new mask (use snapshot --
473608 * w->mask may be modified by another thread after unlock)
@@ -486,6 +621,8 @@ int64_t sys_inotify_add_watch(guest_t *g,
486621 if (widx < 0 ) {
487622 pthread_mutex_unlock (& inotify_lock );
488623 close (host_fd );
624+ free_dir_snapshot (wentries , wn );
625+ free (wpath );
489626 return - LINUX_ENOSPC ;
490627 }
491628
@@ -501,6 +638,9 @@ int64_t sys_inotify_add_watch(guest_t *g,
501638 w -> is_dir = is_dir ;
502639 w -> dev = st .st_dev ;
503640 w -> ino = st .st_ino ;
641+ w -> path = wpath ;
642+ w -> entries = wentries ;
643+ w -> n_entries = wn ;
504644
505645 /* Capture kq_fd while under lock */
506646 int kq_fd = inst -> kq_fd ;
@@ -521,6 +661,11 @@ int64_t sys_inotify_add_watch(guest_t *g,
521661 pthread_mutex_lock (& inotify_lock );
522662 w -> wd = 0 ;
523663 w -> host_fd = 0 ;
664+ free_dir_snapshot (w -> entries , w -> n_entries );
665+ w -> entries = NULL ;
666+ w -> n_entries = 0 ;
667+ free (w -> path );
668+ w -> path = NULL ;
524669 pthread_mutex_unlock (& inotify_lock );
525670 close (host_fd );
526671 errno = saved ;
@@ -555,6 +700,11 @@ int64_t sys_inotify_rm_watch(int inotify_fd, int wd)
555700 w -> host_fd = 0 ;
556701 w -> mask = 0 ;
557702 w -> is_dir = 0 ;
703+ free_dir_snapshot (w -> entries , w -> n_entries );
704+ w -> entries = NULL ;
705+ w -> n_entries = 0 ;
706+ free (w -> path );
707+ w -> path = NULL ;
558708 pthread_mutex_unlock (& inotify_lock );
559709
560710 /* Remove from kqueue and close outside lock */
@@ -619,18 +769,12 @@ int64_t inotify_read(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t count)
619769 }
620770 inst = & inotify_state [slot ];
621771
622- /* Process the received event */
772+ /* Process the received event (same named-directory diff as the
773+ * non-blocking collect path).
774+ */
623775 int host_fd = (int ) kev .ident ;
624- int widx = watch_find_by_hostfd (inst , host_fd );
625- if (widx >= 0 ) {
626- inotify_watch_t * w = & inst -> watches [widx ];
627- uint32_t in_mask =
628- notes_to_in_mask ((uint32_t ) kev .fflags , w -> mask , w -> is_dir );
629- if (in_mask != 0 ) {
630- queue_event (inst , w -> wd , in_mask , 0 , NULL );
631- pipe_signal (inst );
632- }
633- }
776+ if (process_vnode_event (inst , host_fd , (uint32_t ) kev .fflags ) != 0 )
777+ pipe_signal (inst );
634778 }
635779 }
636780
@@ -711,6 +855,11 @@ static void inotify_close(int guest_fd)
711855 watch_fds [nfds ++ ] = inst -> watches [i ].host_fd ;
712856 inst -> watches [i ].wd = 0 ;
713857 }
858+ free_dir_snapshot (inst -> watches [i ].entries , inst -> watches [i ].n_entries );
859+ inst -> watches [i ].entries = NULL ;
860+ inst -> watches [i ].n_entries = 0 ;
861+ free (inst -> watches [i ].path );
862+ inst -> watches [i ].path = NULL ;
714863 }
715864
716865 inst -> guest_fd = -1 ;
0 commit comments