Skip to content

Commit c708a39

Browse files
authored
Merge pull request #56 from chenhunghan/fix-netlink-sockets
Dispatch getsockname/sendto/recvfrom on netlink sockets
2 parents 66635fd + fafdb43 commit c708a39

5 files changed

Lines changed: 389 additions & 35 deletions

File tree

src/syscall/net.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@ int64_t sys_getsockname(guest_t *g,
546546
uint64_t addr_gva,
547547
uint64_t addrlen_gva)
548548
{
549+
if (fd_get_type(fd) == FD_NETLINK)
550+
return netlink_getsockname(fd, g, addr_gva, addrlen_gva);
551+
549552
host_fd_ref_t host_ref;
550553
if (host_fd_ref_open(fd, &host_ref) < 0)
551554
return -LINUX_EBADF;
@@ -676,6 +679,9 @@ int64_t sys_sendto(guest_t *g,
676679
uint64_t dest_gva,
677680
uint32_t addrlen)
678681
{
682+
if (fd_get_type(fd) == FD_NETLINK)
683+
return netlink_send(fd, g, buf_gva, len);
684+
679685
host_fd_ref_t host_ref;
680686
if (host_fd_ref_open(fd, &host_ref) < 0)
681687
return -LINUX_EBADF;
@@ -743,6 +749,9 @@ int64_t sys_recvfrom(guest_t *g,
743749
uint64_t src_gva,
744750
uint64_t addrlen_gva)
745751
{
752+
if (fd_get_type(fd) == FD_NETLINK)
753+
return netlink_recv(fd, g, buf_gva, len, src_gva, addrlen_gva);
754+
746755
host_fd_ref_t host_ref;
747756
if (host_fd_ref_open(fd, &host_ref) < 0)
748757
return -LINUX_EBADF;

src/syscall/net.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,12 +192,25 @@ int64_t netlink_sendmsg(int guest_fd, guest_t *g, uint64_t msg_gva, int flags);
192192
/* Netlink recvmsg: return buffered response data. */
193193
int64_t netlink_recvmsg(int guest_fd, guest_t *g, uint64_t msg_gva, int flags);
194194

195-
/* Netlink read: return buffered response data without msghdr metadata. */
196195
int64_t netlink_read(int guest_fd,
197196
guest_t *g,
198197
uint64_t buf_gva,
199198
uint64_t count);
200199

200+
int64_t netlink_send(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t len);
201+
202+
int64_t netlink_recv(int guest_fd,
203+
guest_t *g,
204+
uint64_t buf_gva,
205+
uint64_t len,
206+
uint64_t src_gva,
207+
uint64_t addrlen_gva);
208+
209+
int64_t netlink_getsockname(int guest_fd,
210+
guest_t *g,
211+
uint64_t addr_gva,
212+
uint64_t addrlen_gva);
213+
201214
/* Clean up abstract socket filesystem entry for a fd being closed. */
202215
void absock_unregister_fd(int guest_fd);
203216

src/syscall/netlink.c

Lines changed: 230 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -187,8 +187,12 @@ static size_t nl_put_attr(uint8_t *buf,
187187
return aligned;
188188
}
189189

190-
/* Build RTM_GETLINK response from host getifaddrs(). */
191-
static int nl_build_getlink(netlink_state_t *ns)
190+
/* Build RTM_GETLINK response from host getifaddrs(). A non-empty name_filter
191+
* or non-zero index_filter restricts the reply to one matching link.
192+
*/
193+
static int nl_build_getlink(netlink_state_t *ns,
194+
const char *name_filter,
195+
uint32_t index_filter)
192196
{
193197
struct ifaddrs *ifalist, *ifa;
194198
if (getifaddrs(&ifalist) < 0)
@@ -209,6 +213,11 @@ static int nl_build_getlink(netlink_state_t *ns)
209213
if (idx == 0)
210214
continue;
211215

216+
if (name_filter[0] && strcmp(ifa->ifa_name, name_filter) != 0)
217+
continue;
218+
if (index_filter != 0 && idx != index_filter)
219+
continue;
220+
212221
/* Check if already seen */
213222
bool found = false;
214223
for (int i = 0; i < nseen; i++) {
@@ -458,6 +467,96 @@ int64_t netlink_bind(int guest_fd,
458467
return 0;
459468
}
460469

470+
/* Extract the LinkByName/LinkByIndex filter (ifi_index plus an optional
471+
* IFLA_IFNAME) from a RTM_GETLINK request. Empty name / zero index = no filter.
472+
*/
473+
static void nl_parse_link_filter(const uint8_t *req,
474+
size_t reqlen,
475+
char *name_out,
476+
size_t name_cap,
477+
uint32_t *index_out)
478+
{
479+
name_out[0] = '\0';
480+
*index_out = 0;
481+
482+
if (reqlen < (size_t) NLMSG_HDRLEN + sizeof(ifinfomsg_t))
483+
return;
484+
485+
ifinfomsg_t ifi;
486+
memcpy(&ifi, req + NLMSG_HDRLEN, sizeof(ifi));
487+
if (ifi.ifi_index > 0)
488+
*index_out = (uint32_t) ifi.ifi_index;
489+
490+
uint32_t nlmsg_len;
491+
memcpy(&nlmsg_len, req, sizeof(nlmsg_len));
492+
size_t total = (nlmsg_len < reqlen) ? nlmsg_len : reqlen;
493+
494+
size_t off = NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(ifinfomsg_t));
495+
while (off + RTA_HDRLEN <= total) {
496+
rtattr_t rta;
497+
memcpy(&rta, req + off, sizeof(rta));
498+
if (rta.rta_len < RTA_HDRLEN || off + rta.rta_len > total)
499+
break;
500+
if (rta.rta_type == IFLA_IFNAME) {
501+
size_t dlen = rta.rta_len - RTA_HDRLEN;
502+
size_t i = 0;
503+
for (; i < dlen && i + 1 < name_cap && req[off + RTA_HDRLEN + i];
504+
i++)
505+
name_out[i] = (char) req[off + RTA_HDRLEN + i];
506+
name_out[i] = '\0';
507+
}
508+
off += RTA_ALIGN(rta.rta_len);
509+
}
510+
}
511+
512+
/* Build the reply for one rtnetlink request (already copied into req). Mutates
513+
* ns->buf/seq. Returns 0 on success (including a built NLMSG_ERROR reply for
514+
* unsupported types), or a negative LINUX_E* on a build failure. Caller holds
515+
* nl_lock. req is guaranteed to be at least NLMSG_HDRLEN bytes.
516+
*/
517+
static int nl_process_request(netlink_state_t *ns,
518+
const uint8_t *req,
519+
size_t reqlen)
520+
{
521+
nlmsghdr_t req_hdr;
522+
memcpy(&req_hdr, req, sizeof(req_hdr));
523+
ns->seq = req_hdr.nlmsg_seq;
524+
525+
int ret;
526+
switch (req_hdr.nlmsg_type) {
527+
case RTM_GETLINK: {
528+
char name[64];
529+
uint32_t index;
530+
nl_parse_link_filter(req, reqlen, name, sizeof(name), &index);
531+
ret = nl_build_getlink(ns, name, index);
532+
break;
533+
}
534+
case RTM_GETADDR:
535+
ret = nl_build_getaddr(ns);
536+
break;
537+
default:
538+
/* Unsupported request: return NLMSG_ERROR with EOPNOTSUPP */
539+
if ((size_t) NLMSG_HDRLEN + 4 <= NETLINK_BUF_SIZE) {
540+
size_t off = 0;
541+
nlmsghdr_t err_hdr = {
542+
.nlmsg_len = NLMSG_HDRLEN + 4,
543+
.nlmsg_type = NLMSG_ERROR,
544+
.nlmsg_seq = ns->seq,
545+
.nlmsg_pid = ns->pid,
546+
};
547+
memcpy(ns->buf + off, &err_hdr, sizeof(err_hdr));
548+
off += NLMSG_HDRLEN;
549+
int32_t errcode = -95; /* -EOPNOTSUPP */
550+
memcpy(ns->buf + off, &errcode, 4);
551+
ns->buf_len = off + 4;
552+
ns->buf_pos = 0;
553+
}
554+
return 0;
555+
}
556+
557+
return (ret < 0) ? -LINUX_EIO : 0;
558+
}
559+
461560
int64_t netlink_sendmsg(int guest_fd, guest_t *g, uint64_t msg_gva, int flags)
462561
{
463562
(void) flags;
@@ -490,56 +589,153 @@ int64_t netlink_sendmsg(int guest_fd, guest_t *g, uint64_t msg_gva, int flags)
490589
goto out;
491590
}
492591

493-
if (iov.iov_len < NLMSG_HDRLEN) {
592+
if (iov.iov_len < (uint64_t) NLMSG_HDRLEN) {
494593
result = -LINUX_EINVAL;
495594
goto out;
496595
}
497596

498-
nlmsghdr_t req_hdr;
499-
if (guest_read_small(g, iov.iov_base, &req_hdr, sizeof(req_hdr)) < 0) {
597+
/* Copy the whole request: the dispatcher inspects filter attributes past
598+
* the fixed nlmsghdr.
599+
*/
600+
uint8_t req[512];
601+
size_t rlen = (iov.iov_len < sizeof(req)) ? iov.iov_len : sizeof(req);
602+
if (guest_read(g, iov.iov_base, req, rlen) < 0) {
500603
result = -LINUX_EFAULT;
501604
goto out;
502605
}
503606

504-
ns->seq = req_hdr.nlmsg_seq;
607+
int ret = nl_process_request(ns, req, rlen);
608+
result = (ret < 0) ? ret : (int64_t) iov.iov_len;
505609

506-
/* Dispatch based on request type */
507-
int ret;
508-
switch (req_hdr.nlmsg_type) {
509-
case RTM_GETLINK:
510-
ret = nl_build_getlink(ns);
511-
break;
512-
case RTM_GETADDR:
513-
ret = nl_build_getaddr(ns);
514-
break;
515-
default:
516-
/* Unsupported request: return NLMSG_ERROR with EOPNOTSUPP */
517-
if (ns->buf_len + NLMSG_HDRLEN + 4 <= NETLINK_BUF_SIZE) {
518-
size_t off = 0;
519-
nlmsghdr_t err_hdr = {
520-
.nlmsg_len = NLMSG_HDRLEN + 4,
521-
.nlmsg_type = NLMSG_ERROR,
522-
.nlmsg_seq = ns->seq,
523-
.nlmsg_pid = ns->pid,
524-
};
525-
memcpy(ns->buf + off, &err_hdr, sizeof(err_hdr));
526-
off += NLMSG_HDRLEN;
527-
int32_t errcode = -95; /* -EOPNOTSUPP */
528-
memcpy(ns->buf + off, &errcode, 4);
529-
ns->buf_len = off + 4;
530-
ns->buf_pos = 0;
531-
}
532-
result = (int64_t) iov.iov_len;
610+
out:
611+
pthread_mutex_unlock(&nl_lock);
612+
return result;
613+
}
614+
615+
/* sendto(2) on a netlink socket: a flat request buffer (no msghdr). */
616+
int64_t netlink_send(int guest_fd, guest_t *g, uint64_t buf_gva, uint64_t len)
617+
{
618+
pthread_mutex_lock(&nl_lock);
619+
netlink_state_t *ns = nl_find(guest_fd);
620+
if (!ns) {
621+
pthread_mutex_unlock(&nl_lock);
622+
return -LINUX_EBADF;
623+
}
624+
625+
int64_t result;
626+
if (len < (uint64_t) NLMSG_HDRLEN) {
627+
result = -LINUX_EINVAL;
628+
goto out;
629+
}
630+
631+
uint8_t req[512];
632+
size_t rlen = (len < sizeof(req)) ? len : sizeof(req);
633+
if (guest_read(g, buf_gva, req, rlen) < 0) {
634+
result = -LINUX_EFAULT;
533635
goto out;
534636
}
535637

536-
result = (ret < 0) ? -LINUX_EIO : (int64_t) iov.iov_len;
638+
int ret = nl_process_request(ns, req, rlen);
639+
result = (ret < 0) ? ret : (int64_t) len;
537640

538641
out:
539642
pthread_mutex_unlock(&nl_lock);
540643
return result;
541644
}
542645

646+
/* recvfrom(2) on a netlink socket: drain whole messages; write back a kernel
647+
* sockaddr_nl (nl_pid 0) when src is requested.
648+
*/
649+
int64_t netlink_recv(int guest_fd,
650+
guest_t *g,
651+
uint64_t buf_gva,
652+
uint64_t len,
653+
uint64_t src_gva,
654+
uint64_t addrlen_gva)
655+
{
656+
pthread_mutex_lock(&nl_lock);
657+
netlink_state_t *ns = nl_find(guest_fd);
658+
if (!ns) {
659+
pthread_mutex_unlock(&nl_lock);
660+
return -LINUX_EBADF;
661+
}
662+
663+
if (ns->buf_pos >= ns->buf_len) {
664+
pthread_mutex_unlock(&nl_lock);
665+
return 0;
666+
}
667+
668+
size_t avail = ns->buf_len - ns->buf_pos;
669+
size_t to_copy = (avail < len) ? avail : len;
670+
671+
/* Return complete netlink messages only (same walk as netlink_recvmsg). */
672+
size_t msg_end = 0, pos = ns->buf_pos;
673+
while (pos < ns->buf_len && (pos - ns->buf_pos + NLMSG_HDRLEN) <= to_copy) {
674+
nlmsghdr_t *hdr = (nlmsghdr_t *) (ns->buf + pos);
675+
if (hdr->nlmsg_len < NLMSG_HDRLEN)
676+
break;
677+
size_t msg_bytes = pos - ns->buf_pos + NLMSG_ALIGN(hdr->nlmsg_len);
678+
if (msg_bytes > to_copy)
679+
break;
680+
pos += NLMSG_ALIGN(hdr->nlmsg_len);
681+
msg_end = pos - ns->buf_pos;
682+
}
683+
if (msg_end == 0)
684+
msg_end = to_copy;
685+
686+
if (guest_write(g, buf_gva, ns->buf + ns->buf_pos, msg_end) < 0) {
687+
pthread_mutex_unlock(&nl_lock);
688+
return -LINUX_EFAULT;
689+
}
690+
ns->buf_pos += msg_end;
691+
692+
if (src_gva && addrlen_gva) {
693+
sockaddr_nl_t snl = {
694+
.nl_family = LINUX_AF_NETLINK,
695+
.nl_pid = 0, /* From kernel */
696+
};
697+
guest_write_small(g, src_gva, &snl, sizeof(snl));
698+
uint32_t namelen = sizeof(sockaddr_nl_t);
699+
guest_write_small(g, addrlen_gva, &namelen, sizeof(namelen));
700+
}
701+
702+
pthread_mutex_unlock(&nl_lock);
703+
return (int64_t) msg_end;
704+
}
705+
706+
/* getsockname(2) on a netlink socket: returns the bound/auto-assigned pid. */
707+
int64_t netlink_getsockname(int guest_fd,
708+
guest_t *g,
709+
uint64_t addr_gva,
710+
uint64_t addrlen_gva)
711+
{
712+
pthread_mutex_lock(&nl_lock);
713+
netlink_state_t *ns = nl_find(guest_fd);
714+
if (!ns) {
715+
pthread_mutex_unlock(&nl_lock);
716+
return -LINUX_EBADF;
717+
}
718+
uint32_t pid = ns->pid;
719+
pthread_mutex_unlock(&nl_lock);
720+
721+
uint32_t cap = 0;
722+
if (guest_read_small(g, addrlen_gva, &cap, sizeof(cap)) < 0)
723+
return -LINUX_EFAULT;
724+
725+
sockaddr_nl_t snl = {
726+
.nl_family = LINUX_AF_NETLINK,
727+
.nl_pid = pid,
728+
};
729+
size_t n = (cap < sizeof(snl)) ? cap : sizeof(snl);
730+
if (n > 0 && guest_write(g, addr_gva, &snl, n) < 0)
731+
return -LINUX_EFAULT;
732+
733+
uint32_t actual = sizeof(snl);
734+
if (guest_write_small(g, addrlen_gva, &actual, sizeof(actual)) < 0)
735+
return -LINUX_EFAULT;
736+
return 0;
737+
}
738+
543739
int64_t netlink_recvmsg(int guest_fd, guest_t *g, uint64_t msg_gva, int flags)
544740
{
545741
(void) flags;

tests/manifest.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ test-sysfs-cpu
7777
[section] Network tests
7878
test-net
7979
test-netstat
80+
test-netlink
8081

8182
[section] Threading tests
8283
test-thread # diff=skip

0 commit comments

Comments
 (0)