Skip to content

Commit eb51afb

Browse files
committed
i40e: add ability to reset VF for Tx and Rx MDD events
jira LE-2522 Rebuild_History Non-Buildable kernel-5.14.0-503.29.1.el9_5 commit-author Aleksandr Loktionov <[email protected]> commit 07af482 Implement "mdd-auto-reset-vf" priv-flag to handle Tx and Rx MDD events for VFs. This flag is also used in other network adapters like ICE. Usage: - "on" - The problematic VF will be automatically reset if a malformed descriptor is detected. - "off" - The problematic VF will be disabled. In cases where a VF sends malformed packets classified as malicious, it can cause the Tx queue to freeze, rendering it unusable for several minutes. When an MDD event occurs, this new implementation allows for a graceful VF reset to quickly restore operational state. Currently, VF queues are disabled if an MDD event occurs. This patch adds the ability to reset the VF if a Tx or Rx MDD event occurs. It also includes MDD event logging throttling to avoid dmesg pollution and unifies the format of Tx and Rx MDD messages. Note: Standard message rate limiting functions like dev_info_ratelimited() do not meet our requirements. Custom rate limiting is implemented, please see the code for details. Co-developed-by: Jan Sokolowski <[email protected]> Signed-off-by: Jan Sokolowski <[email protected]> Co-developed-by: Padraig J Connolly <[email protected]> Signed-off-by: Padraig J Connolly <[email protected]> Signed-off-by: Aleksandr Loktionov <[email protected]> Reviewed-by: Michal Schmidt <[email protected]> Tested-by: Rafal Romanowski <[email protected]> Signed-off-by: Tony Nguyen <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]> (cherry picked from commit 07af482) Signed-off-by: Jonathan Maple <[email protected]>
1 parent 895a2f4 commit eb51afb

File tree

7 files changed

+123
-17
lines changed

7 files changed

+123
-17
lines changed

Documentation/networking/device_drivers/ethernet/intel/i40e.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
299299
ethtool --show-priv-flags ethX
300300
ethtool --set-priv-flags ethX link-down-on-close [on|off]
301301

302+
Setting the mdd-auto-reset-vf Private Flag
303+
------------------------------------------
304+
305+
When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
306+
be automatically reset if a malformed descriptor is detected. If the flag is
307+
set to "off", the problematic VF will be disabled.
308+
309+
Use ethtool to view and set mdd-auto-reset-vf, as follows::
310+
311+
ethtool --show-priv-flags ethX
312+
ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
313+
302314
Viewing Link Messages
303315
---------------------
304316
Link messages will not be displayed to the console if the distribution is

drivers/net/ethernet/intel/i40e/i40e.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ enum i40e_state {
8787
__I40E_SERVICE_SCHED,
8888
__I40E_ADMINQ_EVENT_PENDING,
8989
__I40E_MDD_EVENT_PENDING,
90+
__I40E_MDD_VF_PRINT_PENDING,
9091
__I40E_VFLR_EVENT_PENDING,
9192
__I40E_RESET_RECOVERY_PENDING,
9293
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -190,6 +191,7 @@ enum i40e_pf_flags {
190191
*/
191192
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
192193
I40E_FLAG_VF_VLAN_PRUNING_ENA,
194+
I40E_FLAG_MDD_AUTO_RESET_VF,
193195
I40E_PF_FLAGS_NBITS, /* must be last */
194196
};
195197

@@ -571,7 +573,7 @@ struct i40e_pf {
571573
int num_alloc_vfs; /* actual number of VFs allocated */
572574
u32 vf_aq_requests;
573575
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
574-
576+
struct ratelimit_state mdd_message_rate_limit;
575577
/* DCBx/DCBNL capability for PF that indicates
576578
* whether DCBx is managed by firmware or host
577579
* based agent (LLDPAD). Also, indicates what

drivers/net/ethernet/intel/i40e/i40e_debugfs.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
722722
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
723723
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
724724
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
725-
vf->num_mdd_events);
725+
vf->mdd_tx_events.count + vf->mdd_rx_events.count);
726726
} else {
727727
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
728728
}

drivers/net/ethernet/intel/i40e/i40e_ethtool.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
459459
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
460460
I40E_PRIV_FLAG("vf-vlan-pruning",
461461
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
462+
I40E_PRIV_FLAG("mdd-auto-reset-vf",
463+
I40E_FLAG_MDD_AUTO_RESET_VF, 0),
462464
};
463465

464466
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)

drivers/net/ethernet/intel/i40e/i40e_main.c

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11193,6 +11193,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
1119311193
i40e_reset_and_rebuild(pf, false, lock_acquired);
1119411194
}
1119511195

11196+
/**
11197+
* i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
11198+
* @pf: board private structure
11199+
* @vf: pointer to the VF structure
11200+
* @is_tx: true - for Tx event, false - for Rx
11201+
*/
11202+
static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
11203+
bool is_tx)
11204+
{
11205+
dev_err(&pf->pdev->dev, is_tx ?
11206+
"%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
11207+
"%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
11208+
is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
11209+
pf->hw.pf_id,
11210+
vf->vf_id,
11211+
vf->default_lan_addr.addr,
11212+
str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
11213+
}
11214+
11215+
/**
11216+
* i40e_print_vfs_mdd_events - print VFs malicious driver detect event
11217+
* @pf: pointer to the PF structure
11218+
*
11219+
* Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
11220+
*/
11221+
static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
11222+
{
11223+
unsigned int i;
11224+
11225+
/* check that there are pending MDD events to print */
11226+
if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
11227+
return;
11228+
11229+
if (!__ratelimit(&pf->mdd_message_rate_limit))
11230+
return;
11231+
11232+
for (i = 0; i < pf->num_alloc_vfs; i++) {
11233+
struct i40e_vf *vf = &pf->vf[i];
11234+
bool is_printed = false;
11235+
11236+
/* only print Rx MDD event message if there are new events */
11237+
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
11238+
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
11239+
i40e_print_vf_mdd_event(pf, vf, false);
11240+
is_printed = true;
11241+
}
11242+
11243+
/* only print Tx MDD event message if there are new events */
11244+
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
11245+
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
11246+
i40e_print_vf_mdd_event(pf, vf, true);
11247+
is_printed = true;
11248+
}
11249+
11250+
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
11251+
dev_info(&pf->pdev->dev,
11252+
"Use PF Control I/F to re-enable the VF #%d\n",
11253+
i);
11254+
}
11255+
}
11256+
1119611257
/**
1119711258
* i40e_handle_mdd_event
1119811259
* @pf: pointer to the PF structure
@@ -11207,8 +11268,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1120711268
u32 reg;
1120811269
int i;
1120911270

11210-
if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
11271+
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
11272+
/* Since the VF MDD event logging is rate limited, check if
11273+
* there are pending MDD events.
11274+
*/
11275+
i40e_print_vfs_mdd_events(pf);
1121111276
return;
11277+
}
1121211278

1121311279
/* find what triggered the MDD event */
1121411280
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11252,36 +11318,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
1125211318

1125311319
/* see if one of the VFs needs its hand slapped */
1125411320
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
11321+
bool is_mdd_on_tx = false;
11322+
bool is_mdd_on_rx = false;
11323+
1125511324
vf = &(pf->vf[i]);
1125611325
reg = rd32(hw, I40E_VP_MDET_TX(i));
1125711326
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
11327+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1125811328
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
11259-
vf->num_mdd_events++;
11260-
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
11261-
i);
11262-
dev_info(&pf->pdev->dev,
11263-
"Use PF Control I/F to re-enable the VF\n");
11329+
vf->mdd_tx_events.count++;
1126411330
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11331+
is_mdd_on_tx = true;
1126511332
}
1126611333

1126711334
reg = rd32(hw, I40E_VP_MDET_RX(i));
1126811335
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
11336+
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
1126911337
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
11270-
vf->num_mdd_events++;
11271-
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
11272-
i);
11273-
dev_info(&pf->pdev->dev,
11274-
"Use PF Control I/F to re-enable the VF\n");
11338+
vf->mdd_rx_events.count++;
1127511339
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
11340+
is_mdd_on_rx = true;
11341+
}
11342+
11343+
if ((is_mdd_on_tx || is_mdd_on_rx) &&
11344+
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
11345+
/* VF MDD event counters will be cleared by
11346+
* reset, so print the event prior to reset.
11347+
*/
11348+
if (is_mdd_on_rx)
11349+
i40e_print_vf_mdd_event(pf, vf, false);
11350+
if (is_mdd_on_tx)
11351+
i40e_print_vf_mdd_event(pf, vf, true);
11352+
11353+
i40e_vc_reset_vf(vf, true);
1127611354
}
1127711355
}
1127811356

11279-
/* re-enable mdd interrupt cause */
11280-
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
1128111357
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
1128211358
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
1128311359
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
1128411360
i40e_flush(hw);
11361+
11362+
i40e_print_vfs_mdd_events(pf);
1128511363
}
1128611364

1128711365
/**
@@ -15980,6 +16058,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
1598016058
ERR_PTR(err),
1598116059
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
1598216060

16061+
/* VF MDD event logs are rate limited to one second intervals */
16062+
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
16063+
1598316064
/* Reconfigure hardware for allowing smaller MSS in the case
1598416065
* of TSO, so that we avoid the MDD being fired and causing
1598516066
* a reset in the case of small MSS+TSO.

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
216216
* @notify_vf: notify vf about reset or not
217217
* Reset VF handler.
218218
**/
219-
static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
219+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
220220
{
221221
struct i40e_pf *pf = vf->pf;
222222
int i;

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ struct i40evf_channel {
6464
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
6565
};
6666

67+
struct i40e_mdd_vf_events {
68+
u64 count; /* total count of Rx|Tx events */
69+
/* count number of the last printed event */
70+
u64 last_printed;
71+
};
72+
6773
/* VF information structure */
6874
struct i40e_vf {
6975
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
9298

9399
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
94100
u8 num_req_queues; /* num of requested qps */
95-
u64 num_mdd_events; /* num of mdd events detected */
101+
/* num of mdd tx and rx events detected */
102+
struct i40e_mdd_vf_events mdd_rx_events;
103+
struct i40e_mdd_vf_events mdd_tx_events;
96104

97105
unsigned long vf_caps; /* vf's adv. capabilities */
98106
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
120128
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
121129
u32 v_retval, u8 *msg, u16 msglen);
122130
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
131+
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
123132
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
124133
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
125134
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);

0 commit comments

Comments
 (0)