Skip to content

Commit f44aa7c

Browse files
Robert RichterJiandiAnNVIDIA
authored andcommitted
NVIDIA: VR: SAUCE: cxl/region: Support multi-level interleaving with smaller granularities for lower levels
The CXL specification supports multi-level interleaving "as long as all the levels use different, but consecutive, HPA bits to select the target and no Interleave Set has more than 8 devices" (from 3.2). Currently the kernel expects that a decoder's "interleave granularity is a multiple of @parent_port granularity". That is, the granularity of a lower level is bigger than those of the parent and uses the outer HPA bits as selector. It works e.g. for the following 8-way config: * cross-link (cross-hostbridge config in CFMWS): * 4-way * 256 granularity * Selector: HPA[8:9] * sub-link (CXL Host bridge config of the HDM): * 2-way * 1024 granularity * Selector: HPA[10] Now, if the outer HPA bits are used for the cross-hostbridge, an 8-way config could look like this: * cross-link (cross-hostbridge config in CFMWS): * 4-way * 512 granularity * Selector: HPA[9:10] * sub-link (CXL Host bridge config of the HDM): * 2-way * 256 granularity * Selector: HPA[8] The enumeration of decoders for this configuration fails then with following error: cxl region0: pci0000:00:port1 cxl_port_setup_targets expected iw: 2 ig: 1024 [mem 0x10000000000-0x1ffffffffff flags 0x200] cxl region0: pci0000:00:port1 cxl_port_setup_targets got iw: 2 ig: 256 state: enabled 0x10000000000:0x1ffffffffff cxl_port endpoint12: failed to attach decoder12.0 to region0: -6 Note that this happens only if firmware is setting up the decoders (CXL_REGION_F_AUTO). For userspace region assembly the granularities are chosen to increase from root down to the lower levels. That is, outer HPA bits are always used for lower interleaving levels. Rework the implementation to also support multi-level interleaving with smaller granularities for lower levels. Determine the interleave set of autodetected decoders. Check that it is a subset of the root interleave. The HPA selector bits are extracted for all decoders of the set and checked that there is no overlap and bits are consecutive. All decoders can be programmed now to use any bit range within the region's target selector. Signed-off-by: Robert Richter <rrichter@amd.com> (backported from https://lore.kernel.org/all/20251028094754.72816-1-rrichter@amd.com/) [jan: Resolved minor conflicts] Signed-off-by: Jiandi An <jan@nvidia.com>
1 parent fac5e4f commit f44aa7c

1 file changed

Lines changed: 109 additions & 93 deletions

File tree

drivers/cxl/core/region.c

Lines changed: 109 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,57 +1555,119 @@ static int check_interleave_cap(struct cxl_decoder *cxld, int iw, int ig)
15551555
return 0;
15561556
}
15571557

1558+
static inline u64 get_selector(u64 ways, u64 gran)
1559+
{
1560+
if (!is_power_of_2(ways))
1561+
ways /= 3;
1562+
1563+
if (!is_power_of_2(ways) || !is_power_of_2(gran))
1564+
return 0;
1565+
1566+
return (ways - 1) * gran;
1567+
}
1568+
15581569
static int cxl_port_setup_targets(struct cxl_port *port,
15591570
struct cxl_region *cxlr,
15601571
struct cxl_endpoint_decoder *cxled)
15611572
{
15621573
struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1563-
int parent_iw, parent_ig, ig, iw, rc, pos = cxled->pos;
15641574
struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
15651575
struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
15661576
struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
15671577
struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
15681578
struct cxl_region_params *p = &cxlr->params;
15691579
struct cxl_decoder *cxld = cxl_rr->decoder;
1570-
struct cxl_switch_decoder *cxlsd;
1580+
struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(&cxld->dev);
15711581
struct cxl_port *iter = port;
1572-
u16 eig, peig;
1573-
u8 eiw, peiw;
1582+
int ig, iw = cxl_rr->nr_targets, rc, pos = cxled->pos;
1583+
int distance, parent_distance;
1584+
u64 selector, cxlr_sel;
1585+
u16 eig;
1586+
u8 eiw;
15741587

15751588
/*
15761589
* While root level decoders support x3, x6, x12, switch level
15771590
* decoders only support powers of 2 up to x16.
15781591
*/
1579-
if (!is_power_of_2(cxl_rr->nr_targets)) {
1592+
if (!is_power_of_2(iw)) {
15801593
dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
1581-
dev_name(port->uport_dev), dev_name(&port->dev),
1582-
cxl_rr->nr_targets);
1594+
dev_name(port->uport_dev), dev_name(&port->dev), iw);
15831595
return -EINVAL;
15841596
}
15851597

1586-
cxlsd = to_cxl_switch_decoder(&cxld->dev);
1587-
if (cxl_rr->nr_targets_set) {
1588-
int i, distance = 1;
1589-
struct cxl_region_ref *cxl_rr_iter;
1598+
if (iw > 8 || iw > cxlsd->nr_targets) {
1599+
dev_dbg(&cxlr->dev,
1600+
"%s:%s:%s: ways: %d overflows targets: %d\n",
1601+
dev_name(port->uport_dev), dev_name(&port->dev),
1602+
dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1603+
return -ENXIO;
1604+
}
15901605

1591-
/*
1592-
* The "distance" between peer downstream ports represents which
1593-
* endpoint positions in the region interleave a given port can
1594-
* host.
1595-
*
1596-
* For example, at the root of a hierarchy the distance is
1597-
* always 1 as every index targets a different host-bridge. At
1598-
* each subsequent switch level those ports map every Nth region
1599-
* position where N is the width of the switch == distance.
1600-
*/
1601-
do {
1602-
cxl_rr_iter = cxl_rr_load(iter, cxlr);
1603-
distance *= cxl_rr_iter->nr_targets;
1604-
iter = to_cxl_port(iter->dev.parent);
1605-
} while (!is_cxl_root(iter));
1606-
distance *= cxlrd->cxlsd.cxld.interleave_ways;
1606+
/*
1607+
* Calculate the effective granularity and ways to determine
1608+
* HPA bits used as target selectors of the interleave set.
1609+
* Use this to check if the root decoder and all subsequent
1610+
* HDM decoders only use bits from that range as selectors.
1611+
*
1612+
* The "distance" between peer downstream ports represents which
1613+
* endpoint positions in the region interleave a given port can
1614+
* host.
1615+
*
1616+
* For example, at the root of a hierarchy the distance is
1617+
* always 1 as every index targets a different host-bridge. At
1618+
* each subsequent switch level those ports map every Nth region
1619+
* position where N is the width of the switch == distance.
1620+
*/
1621+
1622+
/* Start with the root decoders selector and distance. */
1623+
selector = get_selector(cxlrd->cxlsd.cxld.interleave_ways,
1624+
cxlrd->cxlsd.cxld.interleave_granularity);
1625+
distance = cxlrd->cxlsd.cxld.interleave_ways;
1626+
if (!is_power_of_2(distance))
1627+
distance /= 3;
1628+
1629+
for (iter = parent_port; !is_cxl_root(iter);
1630+
iter = to_cxl_port(iter->dev.parent)) {
1631+
struct cxl_region_ref *cxl_rr_iter = cxl_rr_load(iter, cxlr);
1632+
struct cxl_decoder *cxld_iter = cxl_rr_iter->decoder;
1633+
u64 cxld_sel;
1634+
1635+
if (cxld_iter->interleave_ways == 1)
1636+
continue;
1637+
1638+
cxld_sel = get_selector(cxld_iter->interleave_ways,
1639+
cxld_iter->interleave_granularity);
1640+
1641+
if (cxld_sel & selector) {
1642+
dev_dbg(&cxlr->dev, "%s:%s: overlapping selectors: %#llx:%#llx\n",
1643+
dev_name(iter->uport_dev),
1644+
dev_name(&iter->dev), cxld_sel, selector);
1645+
return -ENXIO;
1646+
}
16071647

1608-
for (i = 0; i < cxl_rr->nr_targets_set; i++)
1648+
selector |= cxld_sel;
1649+
distance *= cxl_rr_iter->nr_targets;
1650+
}
1651+
1652+
parent_distance = distance;
1653+
distance *= iw;
1654+
1655+
/* The combined selector bits must fit the region selector. */
1656+
cxlr_sel = get_selector(p->interleave_ways,
1657+
p->interleave_granularity);
1658+
1659+
if ((cxlr_sel & selector) != selector) {
1660+
dev_dbg(&cxlr->dev, "%s:%s: invalid selectors: %#llx:%#llx\n",
1661+
dev_name(iter->uport_dev),
1662+
dev_name(&iter->dev), cxlr_sel, selector);
1663+
return -ENXIO;
1664+
}
1665+
1666+
/* Calculate remaining selector bits available for use. */
1667+
selector = cxlr_sel & ~selector;
1668+
1669+
if (cxl_rr->nr_targets_set) {
1670+
for (int i = 0; i < cxl_rr->nr_targets_set; i++)
16091671
if (ep->dport == cxlsd->target[i]) {
16101672
rc = check_last_peer(cxled, ep, cxl_rr,
16111673
distance);
@@ -1616,88 +1678,42 @@ static int cxl_port_setup_targets(struct cxl_port *port,
16161678
goto add_target;
16171679
}
16181680

1619-
if (is_cxl_root(parent_port)) {
1681+
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags))
1682+
ig = cxld->interleave_granularity;
1683+
else
16201684
/*
1685+
* Set the interleave granularity with each interleave
1686+
* level to a multiple of it's parent port interleave
1687+
* ways. Beginning with the granularity of the root
1688+
* decoder set to the region granularity (starting
1689+
* with the inner selector bits of the HPA), the
1690+
* granularity is increased with each level. Calculate
1691+
* this using the parent distance and region
1692+
* granularity.
1693+
*
16211694
* Root decoder IG is always set to value in CFMWS which
16221695
* may be different than this region's IG. We can use the
16231696
* region's IG here since interleave_granularity_store()
16241697
* does not allow interleaved host-bridges with
16251698
* root IG != region IG.
16261699
*/
1627-
parent_ig = p->interleave_granularity;
1628-
parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1629-
/*
1630-
* For purposes of address bit routing, use power-of-2 math for
1631-
* switch ports.
1632-
*/
1633-
if (!is_power_of_2(parent_iw))
1634-
parent_iw /= 3;
1635-
} else {
1636-
struct cxl_region_ref *parent_rr;
1637-
struct cxl_decoder *parent_cxld;
1638-
1639-
parent_rr = cxl_rr_load(parent_port, cxlr);
1640-
parent_cxld = parent_rr->decoder;
1641-
parent_ig = parent_cxld->interleave_granularity;
1642-
parent_iw = parent_cxld->interleave_ways;
1643-
}
1644-
1645-
rc = granularity_to_eig(parent_ig, &peig);
1646-
if (rc) {
1647-
dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1648-
dev_name(parent_port->uport_dev),
1649-
dev_name(&parent_port->dev), parent_ig);
1650-
return rc;
1651-
}
1652-
1653-
rc = ways_to_eiw(parent_iw, &peiw);
1654-
if (rc) {
1655-
dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1656-
dev_name(parent_port->uport_dev),
1657-
dev_name(&parent_port->dev), parent_iw);
1658-
return rc;
1659-
}
1700+
ig = p->interleave_granularity * parent_distance;
16601701

1661-
iw = cxl_rr->nr_targets;
16621702
rc = ways_to_eiw(iw, &eiw);
1663-
if (rc) {
1664-
dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1665-
dev_name(port->uport_dev), dev_name(&port->dev), iw);
1666-
return rc;
1667-
}
1668-
1669-
/*
1670-
* Interleave granularity is a multiple of @parent_port granularity.
1671-
* Multiplier is the parent port interleave ways.
1672-
*/
1673-
rc = granularity_to_eig(parent_ig * parent_iw, &eig);
1674-
if (rc) {
1675-
dev_dbg(&cxlr->dev,
1676-
"%s: invalid granularity calculation (%d * %d)\n",
1677-
dev_name(&parent_port->dev), parent_ig, parent_iw);
1678-
return rc;
1679-
}
1680-
1681-
rc = eig_to_granularity(eig, &ig);
1682-
if (rc) {
1683-
dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1684-
dev_name(port->uport_dev), dev_name(&port->dev),
1685-
256 << eig);
1686-
return rc;
1687-
}
1703+
if (!rc)
1704+
rc = granularity_to_eig(ig, &eig);
16881705

1689-
if (iw > 8 || iw > cxlsd->nr_targets) {
1690-
dev_dbg(&cxlr->dev,
1691-
"%s:%s:%s: ways: %d overflows targets: %d\n",
1706+
if (rc || (iw > 1 && ~selector & get_selector(iw, ig))) {
1707+
dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d:%d:%#llx\n",
16921708
dev_name(port->uport_dev), dev_name(&port->dev),
1693-
dev_name(&cxld->dev), iw, cxlsd->nr_targets);
1709+
iw, ig, selector);
16941710
return -ENXIO;
16951711
}
16961712

16971713
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
16981714
if (cxld->interleave_ways != iw ||
1699-
(iw > 1 && cxld->interleave_granularity != ig) ||
17001715
!spa_maps_hpa(p, &cxld->hpa_range) ||
1716+
!region_res_match_cxl_range(p, &cxld->hpa_range) ||
17011717
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
17021718
dev_err(&cxlr->dev,
17031719
"%s:%s %s expected iw: %d ig: %d %pr\n",

0 commit comments

Comments
 (0)