Skip to content

Commit 845da0a

Browse files
[sidecar] p4 PHV coherence + mocha container protection
This commit forces metadata fields out of mocha containers via @pa_container_type pragmas. Mocha containers lack ALUs and only support whole-container set operations. When a field is the only one written by an action in a mocha container, the whole-container write clobbers other fields sharing that container. The ipv4_checksum_err field was packed into mocha MH0 alongside pkt_type, which could cause issues. This adds similar protections for NAT/encap fields, egress bridge header fields, and egress action fields in both multicast and non-multicast builds. Other includes: - Zero-inits sidecar header pad and unused fields (sc_pad, sc_egress, sc_payload) across all sidecar header construction sites to prevent stale PHV data from leaking into the header. - Removes unreachable ternary entry (0, 0, _, _, _) from the multicast port_bitmap_check const entries table. The two preceding wildcard entries already cover that case. - Simplifies multicast egress counting to use the mcast_tag local captured before egress decap rather than re-checking header validity.
1 parent a75a02e commit 845da0a

2 files changed

Lines changed: 82 additions & 18 deletions

File tree

dpd/p4/metadata.p4

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,62 @@
1616
@pa_no_init("ingress", "meta.nat_ingress_port")
1717
@pa_no_init("ingress", "meta.encap_needed")
1818
@pa_no_init("ingress", "meta.icmp_recalc")
19+
@pa_no_init("ingress", "meta.allow_source_mcast")
20+
@pa_no_init("ingress", "meta.resolve_nexthop")
21+
@pa_no_init("ingress", "meta.nexthop_is_v6")
22+
@pa_no_init("ingress", "meta.route_ttl_is_1")
23+
24+
// Force fields out of mocha containers into normal containers. Mocha containers
25+
// only support whole-container-set operations, so isolated fields can have
26+
// their other bits corrupted by stale data from previous packets.
27+
//
28+
// Without these pragmas the compiler may pack small metadata fields into mocha
29+
// containers alongside unrelated fields. A whole-container write to one field
30+
// then clobbers the others. The risk is highest for 1-bit booleans and fields
31+
// with long liverange gaps between set and use.
32+
//
33+
// Both builds share ipv4_checksum_err: confirmed allocated to mocha MH0 where
34+
// it shared a container with pkt_type, risking false checksum-error drops.
35+
@pa_container_type("ingress", "meta.ipv4_checksum_err", "normal")
36+
37+
#ifdef MULTICAST
38+
// Ingress fields needed for NAT encapsulation and checksum computation.
39+
@pa_container_type("ingress", "meta.nat_ingress_tgt", "normal")
40+
@pa_container_type("ingress", "meta.nat_geneve_vni", "normal")
41+
@pa_container_type("ingress", "meta.icmp_csum", "normal")
42+
@pa_container_type("ingress", "meta.body_checksum", "normal")
43+
@pa_container_type("ingress", "meta.orig_src_mac", "normal")
44+
@pa_container_type("ingress", "meta.orig_src_ipv4", "normal")
45+
@pa_container_type("ingress", "meta.drop_reason", "normal")
46+
@pa_container_type("ingress", "meta.l4_dst_port", "normal")
47+
@pa_container_type("ingress", "meta.nat_inner_mac", "normal")
48+
// Carried over from non-MULTICAST build. Without explicit pragmas these
49+
// fields were only protected by incidental co-location with deparsed bridge
50+
// header fields in the same container, which is fragile across compiler
51+
// versions and PHV pressure changes.
52+
@pa_container_type("ingress", "meta.service_routed", "normal")
53+
@pa_container_type("ingress", "meta.l4_src_port", "normal")
54+
@pa_container_type("ingress", "meta.icmp_recalc", "normal")
55+
@pa_container_type("ingress", "meta.nat_ingress_csum", "normal")
56+
// Egress bridge header fields crossing the ingress/egress boundary.
57+
@pa_container_type("egress", "meta.bridge_hdr.ingress_port", "normal")
58+
@pa_container_type("egress", "meta.bridge_hdr.is_mcast_routed", "normal")
59+
// Egress fields set by multicast table actions and consumed later in the
60+
// pipeline. drop_reason was confirmed allocated to mocha MH9 where it
61+
// shared a container with drop_ctl. ipv4_checksum_recalc is a 1-bit field
62+
// at high risk of mocha packing.
63+
@pa_container_type("egress", "meta.vlan_id", "normal")
64+
@pa_container_type("egress", "meta.port_number", "normal")
65+
@pa_container_type("egress", "meta.ipv4_checksum_recalc", "normal")
66+
@pa_container_type("egress", "meta.drop_reason", "normal")
67+
#else
68+
@pa_container_type("ingress", "meta.service_routed", "normal")
69+
@pa_container_type("ingress", "meta.nexthop", "normal")
70+
@pa_container_type("ingress", "meta.l4_src_port", "normal")
71+
@pa_container_type("ingress", "meta.icmp_recalc", "normal")
72+
@pa_container_type("ingress", "meta.nat_ingress_csum", "normal")
73+
@pa_container_type("ingress", "meta.drop_reason", "normal")
74+
#endif
1975

2076
/* Flexible bridge header for passing metadata between ingress and egress
2177
* pipelines.

dpd/p4/sidecar.p4

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ const bit<9> USER_SPACE_SERVICE_PORT = 192;
5454
// Callers should set meta.drop_reason and call counters as needed.
5555
#define ICMP_ERROR_SETUP(type, code) \
5656
hdr.sidecar.sc_code = SC_ICMP_NEEDED; \
57+
hdr.sidecar.sc_pad = 0; \
5758
hdr.sidecar.sc_ingress = (bit<16>)ig_intr_md.ingress_port; \
5859
hdr.sidecar.sc_egress = (bit<16>)ig_tm_md.ucast_egress_port; \
5960
hdr.sidecar.sc_ether_type = hdr.ethernet.ether_type; \
@@ -311,8 +312,11 @@ control Services(
311312
// sidecar tag, which indicates which port the request arrived on.
312313
action forward_to_userspace() {
313314
hdr.sidecar.sc_code = SC_FWD_TO_USERSPACE;
315+
hdr.sidecar.sc_pad = 0;
314316
hdr.sidecar.sc_ingress = (bit<16>)ig_intr_md.ingress_port;
317+
hdr.sidecar.sc_egress = 0;
315318
hdr.sidecar.sc_ether_type = hdr.ethernet.ether_type;
319+
hdr.sidecar.sc_payload = 0;
316320
hdr.sidecar.setValid();
317321
hdr.ethernet.ether_type = ETHERTYPE_SIDECAR;
318322
meta.service_routed = true;
@@ -345,6 +349,7 @@ control Services(
345349
// packets always go to the port indicated by the sidecar header.
346350
action mcast_inbound_link_local() {
347351
hdr.sidecar.sc_code = SC_FWD_TO_USERSPACE;
352+
hdr.sidecar.sc_pad = 0;
348353
hdr.sidecar.sc_ingress = (bit<16>)ig_intr_md.ingress_port;
349354
hdr.sidecar.sc_egress = (bit<16>)ig_tm_md.ucast_egress_port;
350355
hdr.sidecar.sc_ether_type = hdr.ethernet.ether_type;
@@ -1166,6 +1171,7 @@ control Arp (
11661171

11671172
action request() {
11681173
hdr.sidecar.sc_code = SC_ARP_NEEDED;
1174+
hdr.sidecar.sc_pad = 0;
11691175
hdr.sidecar.sc_ingress = (bit<16>)ig_intr_md.ingress_port;
11701176
hdr.sidecar.sc_egress = (bit<16>)ig_tm_md.ucast_egress_port;
11711177
hdr.sidecar.sc_ether_type = hdr.ethernet.ether_type;
@@ -1218,6 +1224,7 @@ control Ndp (
12181224

12191225
action request() {
12201226
hdr.sidecar.sc_code = SC_NEIGHBOR_NEEDED;
1227+
hdr.sidecar.sc_pad = 0;
12211228
hdr.sidecar.sc_ingress = (bit<16>)ig_intr_md.ingress_port;
12221229
hdr.sidecar.sc_egress = (bit<16>)ig_tm_md.ucast_egress_port;
12231230
hdr.sidecar.sc_ether_type = hdr.ethernet.ether_type;
@@ -1723,16 +1730,18 @@ control MulticastIngress (
17231730
NoAction;
17241731
}
17251732

1733+
// Priority order: first match wins. The geneve tag entries are
1734+
// most specific (both headers valid + exact tag), followed by
1735+
// group-ID fallbacks for non-geneve multicast.
17261736
const entries = {
17271737
( _, _, true, true, MULTICAST_TAG_EXTERNAL ) : invalidate_underlay_grp_and_set_decap;
17281738
( _, _, true, true, MULTICAST_TAG_UNDERLAY ) : invalidate_external_grp;
17291739
( _, _, true, true, MULTICAST_TAG_UNDERLAY_EXTERNAL ) : NoAction;
17301740
( 0, _, _, _, _ ) : invalidate_external_grp;
17311741
( _, 0, _, _, _ ) : invalidate_underlay_grp;
1732-
( 0, 0, _, _, _ ) : invalidate_grps;
17331742
}
17341743

1735-
const size = 6;
1744+
const size = 5;
17361745
}
17371746

17381747
// Note: SSM tables currently take one extra stage in the pipeline (17->18).
@@ -2259,24 +2268,23 @@ control Egress(
22592268
forwarded_ctr.count(eg_intr_md.egress_port);
22602269

22612270
#ifdef MULTICAST
2262-
// Multicast-specific counting
2271+
// Multicast-specific counting. Use the mcast_tag
2272+
// local (captured before egress decap may strip
2273+
// geneve headers) rather than re-checking header
2274+
// validity.
22632275
if (is_mcast_routed) {
22642276
mcast_ctr.count(eg_intr_md.egress_port);
2265-
if (hdr.geneve.isValid()) {
2266-
if (hdr.geneve_opts.oxg_mcast.isValid()) {
2267-
if (mcast_tag == MULTICAST_TAG_UNDERLAY) {
2268-
underlay_mcast_ctr.count(
2269-
eg_intr_md.egress_port);
2270-
} else if (mcast_tag == MULTICAST_TAG_EXTERNAL) {
2271-
external_mcast_ctr.count(
2272-
eg_intr_md.egress_port);
2273-
} else if (mcast_tag == MULTICAST_TAG_UNDERLAY_EXTERNAL) {
2274-
underlay_mcast_ctr.count(
2275-
eg_intr_md.egress_port);
2276-
external_mcast_ctr.count(
2277-
eg_intr_md.egress_port);
2278-
}
2279-
}
2277+
if (mcast_tag == MULTICAST_TAG_UNDERLAY) {
2278+
underlay_mcast_ctr.count(
2279+
eg_intr_md.egress_port);
2280+
} else if (mcast_tag == MULTICAST_TAG_EXTERNAL) {
2281+
external_mcast_ctr.count(
2282+
eg_intr_md.egress_port);
2283+
} else if (mcast_tag == MULTICAST_TAG_UNDERLAY_EXTERNAL) {
2284+
underlay_mcast_ctr.count(
2285+
eg_intr_md.egress_port);
2286+
external_mcast_ctr.count(
2287+
eg_intr_md.egress_port);
22802288
}
22812289
} else if (is_link_local_ipv6_mcast) {
22822290
mcast_ctr.count(eg_intr_md.egress_port);

0 commit comments

Comments
 (0)