Skip to content

Commit d5df26d

Browse files
yoongsiang2Shani Peretz
authored andcommitted
net/e1000: fix igc launch time calculation
[ upstream commit 2e79349 ] Improve the launch time calculation logic to handle different scenarios: - Set launch time to 0 if txtime has expired. - Set launch time to 0 if txtime exceeds the horizon (beyond the end of the next Qbv cycle). - Mark the first flag in the context descriptor when the packet is the first one scheduled in the next Qbv cycle. - Create a dummy packet to dirty the current cycle before sending packets intended for the next Qbv cycle. Testing was performed on two Intel ADL-S platforms with i226 NICs connected back-to-back. A DPDK sample application is created to send 10 UDP packets with 20,000 nanosecond intervals and their txtime is set to the time of the next Qbv cycle. Meanwhile, the tcpdump command below is used on the link partner to capture the delta of Rx hardware timestamp of the 10 packets: tcpdump -ttt -ni enp1s0 --time-stamp-precision=nano -j adapter_unsynced Without this patch, packets are transmitted immediately as the hardware interprets their launch time as expired, resulting in 8,384 nanosecond intervals (wire speed for 1024-byte packets at 1Gbps), as shown in tcpdump log below: 00:00:00.000000000 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000008384 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 With this patch, packets are properly held until the next Qbv cycle and transmitted at the intended 20,000 nanosecond intervals, demonstrating correct launch time behavior, as shown in tcpdump log below: 00:00:00.000000000 [|llc] 00:00:00.000862592 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000019993 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020000 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020010 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000019997 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020000 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020003 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000019990 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020000 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 00:00:00.000020000 IP 192.168.1.100.2 > 224.1.1.1.5: UDP, length 982 Fixes: 9630f7c ("net/igc: enable launch time offloading") Cc: stable@dpdk.org Signed-off-by: David Zage <david.zage@intel.com> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
1 parent 6d8b434 commit d5df26d

2 files changed

Lines changed: 199 additions & 32 deletions

File tree

drivers/net/igc/igc_txrx.c

Lines changed: 190 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,13 @@
9292
/* L4 Packet TYPE of Reserved */
9393
#define IGC_ADVTXD_TUCMD_L4T_RSV 0x00001800
9494

95+
/* Indicate the first packet in a Qbv cycle */
96+
#define IGC_ADVTXD_TSN_CNTX_FRST 0x00000080
97+
9598
#define IGC_TX_OFFLOAD_NOTSUP_MASK (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IGC_TX_OFFLOAD_MASK)
9699

97100
#define IGC_TS_HDR_LEN 16
101+
#define IGC_DUMMY_PKT_SIZE 64
98102

99103
static inline uint64_t
100104
rx_desc_statuserr_to_pkt_flags(uint32_t statuserr)
@@ -1442,34 +1446,166 @@ what_advctx_update(struct igc_tx_queue *txq, uint64_t flags,
14421446
return IGC_CTX_NUM;
14431447
}
14441448

1445-
static uint32_t igc_tx_launchtime(uint64_t txtime, uint16_t port_id)
1449+
static uint32_t
1450+
igc_tx_launchtime(uint64_t txtime, struct igc_tx_queue *txq,
1451+
bool *need_dummy_pkt, bool *need_frst_flag)
14461452
{
1447-
struct rte_eth_dev *dev = &rte_eth_devices[port_id];
1453+
struct rte_eth_dev *dev = &rte_eth_devices[txq->port_id];
14481454
struct igc_adapter *adapter = IGC_DEV_PRIVATE(dev);
1449-
uint64_t base_time = adapter->base_time;
1455+
struct igc_hw *hw = IGC_DEV_PRIVATE_HW(dev);
14501456
uint64_t cycle_time = adapter->cycle_time;
1457+
uint64_t base_time = adapter->base_time;
1458+
uint64_t current_cycle_end;
1459+
uint64_t cycles_elapsed;
14511460
uint32_t launchtime;
1461+
uint32_t nsec, sec;
1462+
uint64_t systime;
1463+
1464+
/*
1465+
* Read current PTP hardware time from SYSTIM registers.
1466+
* Reading the SYSTIML register latches the upper 32 bits to the SYSTIMH
1467+
* shadow register for coherent access. As long as we read SYSTIML first
1468+
* followed by SYSTIMH, we avoid race conditions where the time rolls
1469+
* over between the two register reads.
1470+
*/
1471+
nsec = IGC_READ_REG(hw, IGC_SYSTIML);
1472+
sec = IGC_READ_REG(hw, IGC_SYSTIMH);
1473+
systime = (uint64_t)sec * NSEC_PER_SEC + (uint64_t)nsec;
14521474

1475+
/* Calculate end time of current Qbv cycle */
1476+
cycles_elapsed = (systime - base_time) / cycle_time;
1477+
current_cycle_end = (cycles_elapsed + 1) * cycle_time + base_time;
1478+
1479+
/* Set launchtime to 0 if txtime has expired or exceeds the horizon */
1480+
if (txtime <= systime || txtime >= current_cycle_end + cycle_time) {
1481+
txq->last_packet_cycle = current_cycle_end;
1482+
return 0;
1483+
}
1484+
1485+
/* Calculate launchtime to be inserted into Tx context descriptor */
14531486
launchtime = (txtime - base_time) % cycle_time;
14541487

1488+
/* Handle txtime that fall into next Qbv cycle */
1489+
if (txtime >= current_cycle_end) {
1490+
/* Only mark as first if the cycle hasn't had a first pkt yet */
1491+
if (txq->last_frst_flag != current_cycle_end) {
1492+
*need_frst_flag = true;
1493+
txq->last_frst_flag = current_cycle_end;
1494+
1495+
/* Check if we need dummy pkt to dirty current cycle */
1496+
if (txq->last_packet_cycle < current_cycle_end)
1497+
*need_dummy_pkt = true;
1498+
}
1499+
txq->last_packet_cycle = current_cycle_end + cycle_time;
1500+
} else {
1501+
txq->last_packet_cycle = current_cycle_end;
1502+
}
1503+
14551504
return rte_cpu_to_le_32(launchtime);
14561505
}
14571506

1507+
/*
1508+
* If the IGC_ADVTXD_TSN_CNTX_FRST flag is used to schedule a packet for the
1509+
* next Qbv cycle while no packet was transmitted from that queue in the current
1510+
* cycle, then the IGC_ADVTXD_TSN_CNTX_FRST flag may be valid in the current
1511+
* cycle and the packet will be transmitted in the current cycle. To overcome
1512+
* this issue, we transmit an IGC_DUMMY_PKT_SIZE byte "dummy" packet to "dirty"
1513+
* the current cycle before sending the packet intended for the next cycle.
1514+
*/
1515+
static void
1516+
igc_insert_dummy_packet(struct igc_tx_queue *txq, uint16_t *tx_id)
1517+
{
1518+
volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1519+
struct igc_tx_entry * const sw_ring = txq->sw_ring;
1520+
volatile struct igc_adv_tx_context_desc *ctx_txd;
1521+
volatile union igc_adv_tx_desc *txd;
1522+
struct igc_tx_entry *txe, *txn;
1523+
1524+
/* Get Tx entry (txe) for Tx context descriptor of dummy packet */
1525+
txe = &sw_ring[*tx_id];
1526+
1527+
/* Prepare for next Tx entry (txn) */
1528+
txn = &sw_ring[txe->next_id];
1529+
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1530+
1531+
/* Set up Tx context descriptor for dummy packet */
1532+
ctx_txd = (volatile struct igc_adv_tx_context_desc *)&txr[*tx_id];
1533+
ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(IGC_ADVTXD_DTYP_CTXT |
1534+
IGC_ADVTXD_DCMD_DEXT);
1535+
ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(txq->ctx_curr <<
1536+
IGC_ADVTXD_IDX_SHIFT);
1537+
ctx_txd->vlan_macip_lens = 0;
1538+
ctx_txd->u.launch_time = 0;
1539+
1540+
/* Update tx_id and last_id */
1541+
*tx_id = txe->next_id;
1542+
txe->last_id = *tx_id;
1543+
1544+
/* Get Tx entry (txe) for Tx data descriptor of dummy packet */
1545+
txe = txn;
1546+
1547+
/* Prepare for next Tx entry (txn) */
1548+
txn = &sw_ring[txe->next_id];
1549+
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1550+
1551+
/* Free previous mbuf */
1552+
if (txe->mbuf != NULL) {
1553+
rte_pktmbuf_free_seg(txe->mbuf);
1554+
txe->mbuf = NULL;
1555+
}
1556+
1557+
/* Set up Tx data descriptor for dummy packet */
1558+
txd = &txr[*tx_id];
1559+
txd->read.buffer_addr = rte_cpu_to_le_64(txq->dummy_pkt_dma);
1560+
txd->read.cmd_type_len = rte_cpu_to_le_32(txq->txd_type |
1561+
IGC_DUMMY_PKT_SIZE | IGC_ADVTXD_DCMD_IFCS |
1562+
IGC_ADVTXD_DCMD_DEXT | IGC_TXD_CMD_EOP |
1563+
IGC_TXD_CMD_RS);
1564+
txd->read.olinfo_status = rte_cpu_to_le_32(IGC_DUMMY_PKT_SIZE <<
1565+
IGC_ADVTXD_PAYLEN_SHIFT);
1566+
1567+
/* Update last_id and tx_id */
1568+
txe->last_id = *tx_id;
1569+
*tx_id = txe->next_id;
1570+
1571+
/* Get Tx entry (txe) for Tx context descriptor of actual packet */
1572+
txe = txn;
1573+
1574+
/* Prepare for next Tx entry (txn) */
1575+
txn = &sw_ring[txe->next_id];
1576+
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
1577+
1578+
/* Free previous mbuf */
1579+
if (txe->mbuf != NULL) {
1580+
rte_pktmbuf_free_seg(txe->mbuf);
1581+
txe->mbuf = NULL;
1582+
}
1583+
1584+
/* Update ctx_curr */
1585+
txq->ctx_curr ^= 1;
1586+
}
1587+
14581588
/*
14591589
* This is a separate function, looking for optimization opportunity here
14601590
* Rework required to go with the pre-defined values.
14611591
*/
14621592
static inline void
1463-
igc_set_xmit_ctx(struct igc_tx_queue *txq,
1464-
volatile struct igc_adv_tx_context_desc *ctx_txd,
1593+
igc_set_xmit_ctx(struct igc_tx_queue *txq, uint16_t *tx_id,
14651594
uint64_t ol_flags, union igc_tx_offload tx_offload,
1466-
uint64_t txtime)
1595+
uint64_t txtime, uint16_t tx_last)
14671596
{
1597+
volatile union igc_adv_tx_desc * const txr = txq->tx_ring;
1598+
struct igc_tx_entry * const sw_ring = txq->sw_ring;
1599+
volatile struct igc_adv_tx_context_desc *ctx_txd;
1600+
struct igc_tx_entry *txe;
14681601
uint32_t type_tucmd_mlhl;
1469-
uint32_t mss_l4len_idx;
1602+
uint32_t mss_l4len_idx = 0;
14701603
uint32_t ctx_curr;
14711604
uint32_t vlan_macip_lens;
14721605
union igc_tx_offload tx_offload_mask;
1606+
bool need_frst_flag = false;
1607+
bool need_dummy_pkt = false;
1608+
uint32_t launch_time = 0;
14731609

14741610
/* Use the previous context */
14751611
txq->ctx_curr ^= 1;
@@ -1478,9 +1614,6 @@ igc_set_xmit_ctx(struct igc_tx_queue *txq,
14781614
tx_offload_mask.data = 0;
14791615
type_tucmd_mlhl = 0;
14801616

1481-
/* Specify which HW CTX to upload. */
1482-
mss_l4len_idx = (ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1483-
14841617
if (ol_flags & RTE_MBUF_F_TX_VLAN)
14851618
tx_offload_mask.vlan_tci = 0xffff;
14861619

@@ -1542,18 +1675,32 @@ igc_set_xmit_ctx(struct igc_tx_queue *txq,
15421675
txq->ctx_cache[ctx_curr].tx_offload.data =
15431676
tx_offload_mask.data & tx_offload.data;
15441677
txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
1678+
} else {
1679+
launch_time = igc_tx_launchtime(txtime, txq, &need_dummy_pkt,
1680+
&need_frst_flag);
15451681
}
15461682

1683+
if (need_frst_flag)
1684+
mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FRST;
1685+
1686+
if (need_dummy_pkt)
1687+
igc_insert_dummy_packet(txq, tx_id);
1688+
1689+
/* Specify which HW CTX to upload. */
1690+
mss_l4len_idx |= (txq->ctx_curr << IGC_ADVTXD_IDX_SHIFT);
1691+
1692+
/* Set up Tx context descriptor */
1693+
ctx_txd = (volatile struct igc_adv_tx_context_desc *)&txr[*tx_id];
15471694
ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
15481695
vlan_macip_lens = (uint32_t)tx_offload.data;
15491696
ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
15501697
ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
1698+
ctx_txd->u.launch_time = launch_time;
15511699

1552-
if (txtime)
1553-
ctx_txd->u.launch_time = igc_tx_launchtime(txtime,
1554-
txq->port_id);
1555-
else
1556-
ctx_txd->u.launch_time = 0;
1700+
/* Update last_id and tx_id */
1701+
txe = &sw_ring[*tx_id];
1702+
txe->last_id = tx_last;
1703+
*tx_id = txe->next_id;
15571704
}
15581705

15591706
static inline uint32_t
@@ -1603,7 +1750,7 @@ igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
16031750
uint64_t tx_ol_req;
16041751
uint32_t new_ctx = 0;
16051752
union igc_tx_offload tx_offload = {0};
1606-
uint64_t ts;
1753+
uint64_t ts = 0;
16071754

16081755
tx_id = txq->tx_tail;
16091756
txe = &sw_ring[tx_id];
@@ -1653,7 +1800,7 @@ igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
16531800
/*
16541801
* Check if there are enough free descriptors in the TX ring
16551802
* to transmit the next packet.
1656-
* This operation is based on the two following rules:
1803+
* This operation is based on the three following rules:
16571804
*
16581805
* 1- Only check that the last needed TX descriptor can be
16591806
* allocated (by construction, if that descriptor is free,
@@ -1674,13 +1821,17 @@ igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
16741821
* By extension, avoid to allocate a free descriptor that
16751822
* belongs to the last set of free descriptors allocated
16761823
* to the same packet previously transmitted.
1824+
*
1825+
* 3- Make sure there are two extra descriptors available in
1826+
* the ring, in case a dummy packet is needed to dirty the
1827+
* current Qbv cycle when using launch time feature.
16771828
*/
16781829

16791830
/*
16801831
* The "last descriptor" of the previously sent packet, if any,
16811832
* which used the last descriptor to allocate.
16821833
*/
1683-
tx_end = sw_ring[tx_last].last_id;
1834+
tx_end = sw_ring[tx_last + 2].last_id;
16841835

16851836
/*
16861837
* The next descriptor following that "last descriptor" in the
@@ -1740,10 +1891,6 @@ igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
17401891
if (tx_ol_req) {
17411892
/* Setup TX Advanced context descriptor if required */
17421893
if (new_ctx) {
1743-
volatile struct igc_adv_tx_context_desc *
1744-
ctx_txd = (volatile struct
1745-
igc_adv_tx_context_desc *)&txr[tx_id];
1746-
17471894
txn = &sw_ring[txe->next_id];
17481895
RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
17491896

@@ -1752,20 +1899,15 @@ igc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
17521899
txe->mbuf = NULL;
17531900
}
17541901

1755-
if (igc_tx_timestamp_dynflag > 0) {
1902+
if (igc_tx_timestamp_dynflag > 0)
17561903
ts = *RTE_MBUF_DYNFIELD(tx_pkt,
17571904
igc_tx_timestamp_dynfield_offset,
17581905
uint64_t *);
1759-
igc_set_xmit_ctx(txq, ctx_txd,
1760-
tx_ol_req, tx_offload, ts);
1761-
} else {
1762-
igc_set_xmit_ctx(txq, ctx_txd,
1763-
tx_ol_req, tx_offload, 0);
1764-
}
17651906

1766-
txe->last_id = tx_last;
1767-
tx_id = txe->next_id;
1768-
txe = txn;
1907+
igc_set_xmit_ctx(txq, &tx_id, tx_ol_req,
1908+
tx_offload, ts, tx_last);
1909+
1910+
txe = &sw_ring[tx_id];
17691911
}
17701912

17711913
/* Setup the TX Advanced Data Descriptor */
@@ -1863,6 +2005,7 @@ static void
18632005
igc_tx_queue_release(struct igc_tx_queue *txq)
18642006
{
18652007
igc_tx_queue_release_mbufs(txq);
2008+
rte_free(txq->dummy_pkt_buf);
18662009
rte_free(txq->sw_ring);
18672010
rte_free(txq);
18682011
}
@@ -2017,6 +2160,21 @@ int eth_igc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
20172160
PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%" PRIx64,
20182161
txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
20192162

2163+
/* Allocate dummy packet buffer */
2164+
txq->dummy_pkt_buf = rte_zmalloc("dummy_pkt", IGC_DUMMY_PKT_SIZE,
2165+
RTE_CACHE_LINE_SIZE);
2166+
if (txq->dummy_pkt_buf == NULL) {
2167+
igc_tx_queue_release(txq);
2168+
return -ENOMEM;
2169+
}
2170+
2171+
txq->dummy_pkt_dma = rte_mem_virt2iova(txq->dummy_pkt_buf);
2172+
if (txq->dummy_pkt_dma == RTE_BAD_IOVA) {
2173+
PMD_DRV_LOG(ERR, "Failed to get DMA address for dummy packet");
2174+
igc_tx_queue_release(txq);
2175+
return -ENOMEM;
2176+
}
2177+
20202178
igc_reset_tx_queue(txq);
20212179
dev->tx_pkt_burst = igc_xmit_pkts;
20222180
dev->tx_pkt_prepare = &eth_igc_prep_pkts;

drivers/net/igc/igc_txrx.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ struct igc_tx_queue {
128128
struct igc_advctx_info ctx_cache[IGC_CTX_NUM];
129129
/**< Hardware context history.*/
130130
uint64_t offloads; /**< offloads of RTE_ETH_TX_OFFLOAD_* */
131+
132+
/**< Qbv cycle when the last first flag was marked. */
133+
uint64_t last_frst_flag;
134+
/**< Qbv cycle when the last packet was transmitted. */
135+
uint64_t last_packet_cycle;
136+
/**< Virtual address of dummy packet buffer for Qbv cycle dirtying. */
137+
void *dummy_pkt_buf;
138+
/**< DMA/physical address of dummy packet buffer for hardware access. */
139+
rte_iova_t dummy_pkt_dma;
131140
};
132141

133142
/*

0 commit comments

Comments
 (0)