From bb60f107c96bd80ec10b1529853a2645762670ac Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 6 Jan 2025 15:46:39 +0000 Subject: [PATCH 01/45] ASoC: wm8994: Add depends on MFD core [ Upstream commit 5ed01155cea69801f1f0c908954a56a5a3474bed ] The ASoC driver should not be used without the MFD component. This was causing randconfig issues with regmap IRQ which is selected by the MFD part of the wm8994 driver. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501061337.R0DlBUoD-lkp@intel.com/ Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250106154639.3999553-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index f1e1dbc509f6e4..6d105a23c8284c 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -2209,6 +2209,7 @@ config SND_SOC_WM8993 config SND_SOC_WM8994 tristate + depends on MFD_WM8994 config SND_SOC_WM8995 tristate From ed0d02b7e14760cc25a93e9551c3366caa572118 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 7 Jan 2025 10:41:34 +0000 Subject: [PATCH 02/45] ASoC: samsung: Add missing selects for MFD_WM8994 [ Upstream commit fd55c6065bec5268740e944a1800e6fad00974d9 ] Anything selecting SND_SOC_WM8994 should also select MFD_WM8994, as SND_SOC_WM8994 does not automatically do so. Add the missing selects. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501071530.UwIXs7OL-lkp@intel.com/ Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250107104134.12147-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/samsung/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index 93c2b1b08d0a6f..77d3beea1d715e 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -127,8 +127,9 @@ config SND_SOC_SAMSUNG_TM2_WM5110 config SND_SOC_SAMSUNG_ARIES_WM8994 tristate "SoC I2S Audio support for WM8994 on Aries" - depends on SND_SOC_SAMSUNG && MFD_WM8994 && IIO && EXTCON + depends on SND_SOC_SAMSUNG && IIO && EXTCON select SND_SOC_BT_SCO + select MFD_WM8994 select SND_SOC_WM8994 select SND_SAMSUNG_I2S help @@ -142,6 +143,7 @@ config SND_SOC_SAMSUNG_MIDAS_WM1811 tristate "SoC I2S Audio support for Midas boards" depends on SND_SOC_SAMSUNG select SND_SAMSUNG_I2S + select MFD_WM8994 select SND_SOC_WM8994 help Say Y if you want to add support for SoC audio on the Midas boards. From 06bfc95f817bafac7f93bb075e334905d24b0ab4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 8 Jan 2025 23:44:45 +0100 Subject: [PATCH 03/45] seccomp: Stub for !CONFIG_SECCOMP [ Upstream commit f90877dd7fb5085dd9abd6399daf63dd2969fc90 ] When using !CONFIG_SECCOMP with CONFIG_GENERIC_ENTRY, the randconfig bots found the following snag: kernel/entry/common.c: In function 'syscall_trace_enter': >> kernel/entry/common.c:52:23: error: implicit declaration of function '__secure_computing' [-Wimplicit-function-declaration] 52 | ret = __secure_computing(NULL); | ^~~~~~~~~~~~~~~~~~ Since generic entry calls __secure_computing() unconditionally, fix this by moving the stub out of the ifdef clause for CONFIG_HAVE_ARCH_SECCOMP_FILTER so it's always available. Link: https://lore.kernel.org/oe-kbuild-all/202501061240.Fzk9qiFZ-lkp@intel.com/ Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20250108-seccomp-stub-2-v2-1-74523d49420f@linaro.org Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- include/linux/seccomp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index 175079552f68da..8cf31bb8871988 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -70,10 +70,10 @@ struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER static inline int secure_computing(void) { return 0; } -static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif +static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } static inline long prctl_get_seccomp(void) { From 44c485f0fcb254b07ba9d04a70089c78faae222c Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Tue, 7 Jan 2025 10:24:31 +0800 Subject: [PATCH 04/45] scsi: iscsi: Fix redundant response for ISCSI_UEVENT_GET_HOST_STATS request [ Upstream commit 63ca02221cc5aa0731fe2b0cc28158aaa4b84982 ] The ISCSI_UEVENT_GET_HOST_STATS request is already handled in iscsi_get_host_stats(). This fix ensures that redundant responses are skipped in iscsi_if_rx(). - On success: send reply and stats from iscsi_get_host_stats() within if_recv_msg(). - On error: fall through. Signed-off-by: Xiang Zhang Link: https://lore.kernel.org/r/20250107022432.65390-1-hawkxiang.cpp@gmail.com Reviewed-by: Mike Christie Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 3075b2ddf7a697..deeb657981a690 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -4103,7 +4103,7 @@ iscsi_if_rx(struct sk_buff *skb) } do { /* - * special case for GET_STATS: + * special case for GET_STATS, GET_CHAP and GET_HOST_STATS: * on success - sending reply and stats from * inside of if_recv_msg(), * on error - fall through. @@ -4112,6 +4112,8 @@ iscsi_if_rx(struct sk_buff *skb) break; if (ev->type == ISCSI_UEVENT_GET_CHAP && !err) break; + if (ev->type == ISCSI_UEVENT_GET_HOST_STATS && !err) + break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); if (err == -EAGAIN && --retries < 0) { From 758abba3dd413dc5de2016f8588403294263a30a Mon Sep 17 00:00:00 2001 From: Tom Chung Date: Tue, 1 Oct 2024 17:13:07 +0800 Subject: [PATCH 05/45] drm/amd/display: Use HW lock mgr for PSR1 [ Upstream commit b5c764d6ed556c4e81fbe3fd976da77ec450c08e ] [Why] Without the dmub hw lock, it may cause the lock timeout issue while do modeset on PSR1 eDP panel. [How] Allow dmub hw lock for PSR1. Reviewed-by: Sun peng Li Signed-off-by: Tom Chung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher (cherry picked from commit a2b5a9956269f4c1a09537177f18ab0229fe79f7) Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c index 2aa0e01a6891b0..f11b071a896f59 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_hw_lock_mgr.c @@ -63,7 +63,8 @@ void dmub_hw_lock_mgr_inbox0_cmd(struct dc_dmub_srv *dmub_srv, bool should_use_dmub_lock(struct dc_link *link) { - if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1) + if (link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 || + link->psr_settings.psr_version == DC_PSR_VERSION_1) return true; return false; } From b25bf1d7f5ff3f85dd66b73aa432d2b4ad3c9855 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Fri, 10 Jan 2025 15:50:28 -0600 Subject: [PATCH 06/45] of/unittest: Add test that of_address_to_resource() fails on non-translatable address [ Upstream commit 44748065ed321041db6e18cdcaa8c2a9554768ac ] of_address_to_resource() on a non-translatable address should return an error. Additionally, this case also triggers a spurious WARN for missing #address-cells/#size-cells. Link: https://lore.kernel.org/r/20250110215030.3637845-1-robh@kernel.org Signed-off-by: Rob Herring (Arm) Signed-off-by: Sasha Levin --- drivers/of/unittest-data/tests-platform.dtsi | 13 +++++++++++++ drivers/of/unittest.c | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/of/unittest-data/tests-platform.dtsi b/drivers/of/unittest-data/tests-platform.dtsi index fa39611071b32f..cd310b26b50c81 100644 --- a/drivers/of/unittest-data/tests-platform.dtsi +++ b/drivers/of/unittest-data/tests-platform.dtsi @@ -34,5 +34,18 @@ }; }; }; + + platform-tests-2 { + // No #address-cells or #size-cells + node { + #address-cells = <1>; + #size-cells = <1>; + + test-device@100 { + compatible = "test-sub-device"; + reg = <0x100 1>; + }; + }; + }; }; }; diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 7986113adc7d31..3b22c36bfb0b7c 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -1186,6 +1186,7 @@ static void __init of_unittest_bus_3cell_ranges(void) static void __init of_unittest_reg(void) { struct device_node *np; + struct resource res; int ret; u64 addr, size; @@ -1202,6 +1203,19 @@ static void __init of_unittest_reg(void) np, addr); of_node_put(np); + + np = of_find_node_by_path("/testcase-data/platform-tests-2/node/test-device@100"); + if (!np) { + pr_err("missing testcase data\n"); + return; + } + + ret = of_address_to_resource(np, 0, &res); + unittest(ret == -EINVAL, "of_address_to_resource(%pOF) expected error on untranslatable address\n", + np); + + of_node_put(np); + } static void __init of_unittest_parse_interrupts(void) From 2148a41dc8ff324278b078343826ddaa88382d50 Mon Sep 17 00:00:00 2001 From: Philippe Simons Date: Sun, 12 Jan 2025 13:34:02 +0100 Subject: [PATCH 07/45] irqchip/sunxi-nmi: Add missing SKIP_WAKE flag [ Upstream commit 3a748d483d80f066ca4b26abe45cdc0c367d13e9 ] Some boards with Allwinner SoCs connect the PMIC's IRQ pin to the SoC's NMI pin instead of a normal GPIO. Since the power key is connected to the PMIC, and people expect to wake up a suspended system via this key, the NMI IRQ controller must stay alive when the system goes into suspend. Add the SKIP_WAKE flag to prevent the sunxi NMI controller from going to sleep, so that the power key can wake up those systems. [ tglx: Fixed up coding style ] Signed-off-by: Philippe Simons Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250112123402.388520-1-simons.philippe@gmail.com Signed-off-by: Sasha Levin --- drivers/irqchip/irq-sunxi-nmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c index e760b1278143dd..262b625c30c102 100644 --- a/drivers/irqchip/irq-sunxi-nmi.c +++ b/drivers/irqchip/irq-sunxi-nmi.c @@ -186,7 +186,8 @@ static int __init sunxi_sc_nmi_irq_init(struct device_node *node, gc->chip_types[0].chip.irq_unmask = irq_gc_mask_set_bit; gc->chip_types[0].chip.irq_eoi = irq_gc_ack_set_bit; gc->chip_types[0].chip.irq_set_type = sunxi_sc_nmi_set_type; - gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED; + gc->chip_types[0].chip.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED | + IRQCHIP_SKIP_SET_WAKE; gc->chip_types[0].regs.ack = reg_offs->pend; gc->chip_types[0].regs.mask = reg_offs->enable; gc->chip_types[0].regs.type = reg_offs->ctrl; From 85af156e158c4cbe606432a4ee719d7048da74dd Mon Sep 17 00:00:00 2001 From: Russell Harmon Date: Wed, 15 Jan 2025 05:13:41 -0800 Subject: [PATCH 08/45] hwmon: (drivetemp) Set scsi command timeout to 10s [ Upstream commit b46ba47d7bb461a0969317be1f2e165c0571d6c5 ] There's at least one drive (MaxDigitalData OOS14000G) such that if it receives a large amount of I/O while entering an idle power state will first exit idle before responding, including causing SMART temperature requests to be delayed. This causes the drivetemp request to exceed its timeout of 1 second. Signed-off-by: Russell Harmon Link: https://lore.kernel.org/r/20250115131340.3178988-1-russ@har.mn Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/drivetemp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c index 2a4ec55ddb47ed..291d91f6864676 100644 --- a/drivers/hwmon/drivetemp.c +++ b/drivers/hwmon/drivetemp.c @@ -194,7 +194,7 @@ static int drivetemp_scsi_command(struct drivetemp_data *st, scsi_cmd[14] = ata_command; err = scsi_execute_cmd(st->sdev, scsi_cmd, op, st->smartdata, - ATA_SECT_SIZE, HZ, 5, NULL); + ATA_SECT_SIZE, 10 * HZ, 5, NULL); if (err > 0) err = -EIO; return err; From 509a928e815e2b50b6ef6c9ad7353a58b366d097 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 8 Jan 2025 13:48:28 +0000 Subject: [PATCH 09/45] ASoC: samsung: Add missing depends on I2C [ Upstream commit 704dbe97a68153a84319ad63f526e12ba868b88e ] When switching to selects for MFD_WM8994 a dependency should have also been added for I2C, as the dependency on MFD_WM8994 will not be considered by the select. Fixes: fd55c6065bec ("ASoC: samsung: Add missing selects for MFD_WM8994") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202501082020.2bpGGVTW-lkp@intel.com/ Signed-off-by: Charles Keepax Link: https://patch.msgid.link/20250108134828.246570-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/samsung/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index 77d3beea1d715e..e8716501685bce 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -127,7 +127,7 @@ config SND_SOC_SAMSUNG_TM2_WM5110 config SND_SOC_SAMSUNG_ARIES_WM8994 tristate "SoC I2S Audio support for WM8994 on Aries" - depends on SND_SOC_SAMSUNG && IIO && EXTCON + depends on SND_SOC_SAMSUNG && I2C && IIO && EXTCON select SND_SOC_BT_SCO select MFD_WM8994 select SND_SOC_WM8994 @@ -141,7 +141,7 @@ config SND_SOC_SAMSUNG_ARIES_WM8994 config SND_SOC_SAMSUNG_MIDAS_WM1811 tristate "SoC I2S Audio support for Midas boards" - depends on SND_SOC_SAMSUNG + depends on SND_SOC_SAMSUNG && I2C select SND_SAMSUNG_I2S select MFD_WM8994 select SND_SOC_WM8994 From 2364dc21ba5a97c6f76b55a514b8ec9d062c5551 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Tue, 2 Jul 2024 02:47:34 +0000 Subject: [PATCH 10/45] ata: libata-core: Set ATA_QCFLAG_RTF_FILLED in fill_result_tf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 18676c6aab0863618eb35443e7b8615eea3535a9 upstream. ATA_QCFLAG_RTF_FILLED is not specific to ahci and can be used generally to check if qc->result_tf contains valid data. Reviewed-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Igor Pylypiv Link: https://lore.kernel.org/r/20240702024735.1152293-7-ipylypiv@google.com Signed-off-by: Niklas Cassel Cc: Christian Kühnke Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libahci.c | 12 ++---------- drivers/ata/libata-core.c | 8 ++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index f1263364fa97fa..86fa5dc7dd99a8 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -2082,13 +2082,6 @@ static void ahci_qc_fill_rtf(struct ata_queued_cmd *qc) struct ahci_port_priv *pp = qc->ap->private_data; u8 *rx_fis = pp->rx_fis; - /* - * rtf may already be filled (e.g. for successful NCQ commands). - * If that is the case, we have nothing to do. - */ - if (qc->flags & ATA_QCFLAG_RTF_FILLED) - return; - if (pp->fbs_enabled) rx_fis += qc->dev->link->pmp * AHCI_RX_FIS_SZ; @@ -2102,7 +2095,6 @@ static void ahci_qc_fill_rtf(struct ata_queued_cmd *qc) !(qc->flags & ATA_QCFLAG_EH)) { ata_tf_from_fis(rx_fis + RX_FIS_PIO_SETUP, &qc->result_tf); qc->result_tf.status = (rx_fis + RX_FIS_PIO_SETUP)[15]; - qc->flags |= ATA_QCFLAG_RTF_FILLED; return; } @@ -2125,12 +2117,10 @@ static void ahci_qc_fill_rtf(struct ata_queued_cmd *qc) */ qc->result_tf.status = fis[2]; qc->result_tf.error = fis[3]; - qc->flags |= ATA_QCFLAG_RTF_FILLED; return; } ata_tf_from_fis(rx_fis + RX_FIS_D2H_REG, &qc->result_tf); - qc->flags |= ATA_QCFLAG_RTF_FILLED; } static void ahci_qc_ncq_fill_rtf(struct ata_port *ap, u64 done_mask) @@ -2165,6 +2155,7 @@ static void ahci_qc_ncq_fill_rtf(struct ata_port *ap, u64 done_mask) if (qc && ata_is_ncq(qc->tf.protocol)) { qc->result_tf.status = status; qc->result_tf.error = error; + qc->result_tf.flags = qc->tf.flags; qc->flags |= ATA_QCFLAG_RTF_FILLED; } done_mask &= ~(1ULL << tag); @@ -2189,6 +2180,7 @@ static void ahci_qc_ncq_fill_rtf(struct ata_port *ap, u64 done_mask) fis += RX_FIS_SDB; qc->result_tf.status = fis[2]; qc->result_tf.error = fis[3]; + qc->result_tf.flags = qc->tf.flags; qc->flags |= ATA_QCFLAG_RTF_FILLED; } done_mask &= ~(1ULL << tag); diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 4ed90d46a017a8..f627753519b978 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4792,8 +4792,16 @@ static void fill_result_tf(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + /* + * rtf may already be filled (e.g. for successful NCQ commands). + * If that is the case, we have nothing to do. + */ + if (qc->flags & ATA_QCFLAG_RTF_FILLED) + return; + qc->result_tf.flags = qc->tf.flags; ap->ops->qc_fill_rtf(qc); + qc->flags |= ATA_QCFLAG_RTF_FILLED; } static void ata_verify_xfer(struct ata_queued_cmd *qc) From cd9f7bf6cad8b2d3876105ce3c9fc63460a046f6 Mon Sep 17 00:00:00 2001 From: Anastasia Belova Date: Mon, 26 Aug 2024 16:38:41 +0300 Subject: [PATCH 11/45] cpufreq: amd-pstate: add check for cpufreq_cpu_get's return value commit 5493f9714e4cdaf0ee7cec15899a231400cb1a9f upstream. cpufreq_cpu_get may return NULL. To avoid NULL-dereference check it and return in case of error. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Anastasia Belova Reviewed-by: Perry Yuan Signed-off-by: Viresh Kumar [ Raj: on 6.6, there don't have function amd_pstate_update_limits() so applied the NULL checking in amd_pstate_adjust_perf() only ] Signed-off-by: Rajani Kantha Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index cdead37d0823ad..a64baa97e3583d 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -579,8 +579,13 @@ static void amd_pstate_adjust_perf(unsigned int cpu, unsigned long max_perf, min_perf, des_perf, cap_perf, lowest_nonlinear_perf, max_freq; struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); - struct amd_cpudata *cpudata = policy->driver_data; unsigned int target_freq; + struct amd_cpudata *cpudata; + + if (!policy) + return; + + cpudata = policy->driver_data; if (policy->min != cpudata->min_limit_freq || policy->max != cpudata->max_limit_freq) amd_pstate_update_min_max_limit(policy); From 52da02521ede55fb86546c3fffd9377b3261b91f Mon Sep 17 00:00:00 2001 From: Omid Ehtemam-Haghighi Date: Tue, 5 Nov 2024 17:02:36 -0800 Subject: [PATCH 12/45] ipv6: Fix soft lockups in fib6_select_path under high next hop churn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d9ccb18f83ea2bb654289b6ecf014fd267cc988b upstream. Soft lockups have been observed on a cluster of Linux-based edge routers located in a highly dynamic environment. Using the `bird` service, these routers continuously update BGP-advertised routes due to frequently changing nexthop destinations, while also managing significant IPv6 traffic. The lockups occur during the traversal of the multipath circular linked-list in the `fib6_select_path` function, particularly while iterating through the siblings in the list. The issue typically arises when the nodes of the linked list are unexpectedly deleted concurrently on a different core—indicated by their 'next' and 'previous' elements pointing back to the node itself and their reference count dropping to zero. This results in an infinite loop, leading to a soft lockup that triggers a system panic via the watchdog timer. Apply RCU primitives in the problematic code sections to resolve the issue. Where necessary, update the references to fib6_siblings to annotate or use the RCU APIs. Include a test script that reproduces the issue. The script periodically updates the routing table while generating a heavy load of outgoing IPv6 traffic through multiple iperf3 clients. It consistently induces infinite soft lockups within a couple of minutes. Kernel log: 0 [ffffbd13003e8d30] machine_kexec at ffffffff8ceaf3eb 1 [ffffbd13003e8d90] __crash_kexec at ffffffff8d0120e3 2 [ffffbd13003e8e58] panic at ffffffff8cef65d4 3 [ffffbd13003e8ed8] watchdog_timer_fn at ffffffff8d05cb03 4 [ffffbd13003e8f08] __hrtimer_run_queues at ffffffff8cfec62f 5 [ffffbd13003e8f70] hrtimer_interrupt at ffffffff8cfed756 6 [ffffbd13003e8fd0] __sysvec_apic_timer_interrupt at ffffffff8cea01af 7 [ffffbd13003e8ff0] sysvec_apic_timer_interrupt at ffffffff8df1b83d -- -- 8 [ffffbd13003d3708] asm_sysvec_apic_timer_interrupt at ffffffff8e000ecb [exception RIP: fib6_select_path+299] RIP: ffffffff8ddafe7b RSP: ffffbd13003d37b8 RFLAGS: 00000287 RAX: ffff975850b43600 RBX: ffff975850b40200 RCX: 0000000000000000 RDX: 000000003fffffff RSI: 0000000051d383e4 RDI: ffff975850b43618 RBP: ffffbd13003d3800 R8: 0000000000000000 R9: ffff975850b40200 R10: 0000000000000000 R11: 0000000000000000 R12: ffffbd13003d3830 R13: ffff975850b436a8 R14: ffff975850b43600 R15: 0000000000000007 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 9 [ffffbd13003d3808] ip6_pol_route at ffffffff8ddb030c 10 [ffffbd13003d3888] ip6_pol_route_input at ffffffff8ddb068c 11 [ffffbd13003d3898] fib6_rule_lookup at ffffffff8ddf02b5 12 [ffffbd13003d3928] ip6_route_input at ffffffff8ddb0f47 13 [ffffbd13003d3a18] ip6_rcv_finish_core.constprop.0 at ffffffff8dd950d0 14 [ffffbd13003d3a30] ip6_list_rcv_finish.constprop.0 at ffffffff8dd96274 15 [ffffbd13003d3a98] ip6_sublist_rcv at ffffffff8dd96474 16 [ffffbd13003d3af8] ipv6_list_rcv at ffffffff8dd96615 17 [ffffbd13003d3b60] __netif_receive_skb_list_core at ffffffff8dc16fec 18 [ffffbd13003d3be0] netif_receive_skb_list_internal at ffffffff8dc176b3 19 [ffffbd13003d3c50] napi_gro_receive at ffffffff8dc565b9 20 [ffffbd13003d3c80] ice_receive_skb at ffffffffc087e4f5 [ice] 21 [ffffbd13003d3c90] ice_clean_rx_irq at ffffffffc0881b80 [ice] 22 [ffffbd13003d3d20] ice_napi_poll at ffffffffc088232f [ice] 23 [ffffbd13003d3d80] __napi_poll at ffffffff8dc18000 24 [ffffbd13003d3db8] net_rx_action at ffffffff8dc18581 25 [ffffbd13003d3e40] __do_softirq at ffffffff8df352e9 26 [ffffbd13003d3eb0] run_ksoftirqd at ffffffff8ceffe47 27 [ffffbd13003d3ec0] smpboot_thread_fn at ffffffff8cf36a30 28 [ffffbd13003d3ee8] kthread at ffffffff8cf2b39f 29 [ffffbd13003d3f28] ret_from_fork at ffffffff8ce5fa64 30 [ffffbd13003d3f50] ret_from_fork_asm at ffffffff8ce03cbb Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table") Reported-by: Adrian Oliver Signed-off-by: Omid Ehtemam-Haghighi Cc: Shuah Khan Cc: Ido Schimmel Cc: Kuniyuki Iwashima Cc: Simon Horman Reviewed-by: David Ahern Link: https://patch.msgid.link/20241106010236.1239299-1-omid.ehtemamhaghighi@menlosecurity.com Signed-off-by: Jakub Kicinski Signed-off-by: Rajani Kantha Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 8 +- net/ipv6/route.c | 45 ++- tools/testing/selftests/net/Makefile | 1 + .../net/ipv6_route_update_soft_lockup.sh | 262 ++++++++++++++++++ 4 files changed, 297 insertions(+), 19 deletions(-) create mode 100644 tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index afa9073567dc40..023ac39041a214 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1179,8 +1179,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, while (sibling) { if (sibling->fib6_metric == rt->fib6_metric && rt6_qualify_for_ecmp(sibling)) { - list_add_tail(&rt->fib6_siblings, - &sibling->fib6_siblings); + list_add_tail_rcu(&rt->fib6_siblings, + &sibling->fib6_siblings); break; } sibling = rcu_dereference_protected(sibling->fib6_next, @@ -1241,7 +1241,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); return err; } @@ -1954,7 +1954,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, &rt->fib6_siblings, fib6_siblings) sibling->fib6_nsiblings--; rt->fib6_nsiblings = 0; - list_del_init(&rt->fib6_siblings); + list_del_rcu(&rt->fib6_siblings); rt6_multipath_rebalance(next_sibling); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fc5c5346202530..c5cee40a658b46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -418,8 +418,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, struct flowi6 *fl6, int oif, bool have_oif_match, const struct sk_buff *skb, int strict) { - struct fib6_info *sibling, *next_sibling; struct fib6_info *match = res->f6i; + struct fib6_info *sibling; if (!match->nh && (!match->fib6_nsiblings || have_oif_match)) goto out; @@ -445,8 +445,8 @@ void fib6_select_path(const struct net *net, struct fib6_result *res, if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound)) goto out; - list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, - fib6_siblings) { + list_for_each_entry_rcu(sibling, &match->fib6_siblings, + fib6_siblings) { const struct fib6_nh *nh = sibling->fib6_nh; int nh_upper_bound; @@ -5186,14 +5186,18 @@ static void ip6_route_mpath_notify(struct fib6_info *rt, * nexthop. Since sibling routes are always added at the end of * the list, find the first sibling of the last route appended */ + rcu_read_lock(); + if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { - rt = list_first_entry(&rt_last->fib6_siblings, - struct fib6_info, - fib6_siblings); + rt = list_first_or_null_rcu(&rt_last->fib6_siblings, + struct fib6_info, + fib6_siblings); } if (rt) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); + + rcu_read_unlock(); } static bool ip6_route_mpath_should_notify(const struct fib6_info *rt) @@ -5538,17 +5542,21 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i) nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size, &nexthop_len); } else { - struct fib6_info *sibling, *next_sibling; struct fib6_nh *nh = f6i->fib6_nh; + struct fib6_info *sibling; nexthop_len = 0; if (f6i->fib6_nsiblings) { rt6_nh_nlmsg_size(nh, &nexthop_len); - list_for_each_entry_safe(sibling, next_sibling, - &f6i->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &f6i->fib6_siblings, + fib6_siblings) { rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len); } + + rcu_read_unlock(); } nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws); } @@ -5712,7 +5720,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; } else if (rt->fib6_nsiblings) { - struct fib6_info *sibling, *next_sibling; + struct fib6_info *sibling; struct nlattr *mp; mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); @@ -5724,14 +5732,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, 0) < 0) goto nla_put_failure; - list_for_each_entry_safe(sibling, next_sibling, - &rt->fib6_siblings, fib6_siblings) { + rcu_read_lock(); + + list_for_each_entry_rcu(sibling, &rt->fib6_siblings, + fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6, 0) < 0) + AF_INET6, 0) < 0) { + rcu_read_unlock(); + goto nla_put_failure; + } } + rcu_read_unlock(); + nla_nest_end(skb, mp); } else if (rt->nh) { if (nla_put_u32(skb, RTA_NH_ID, rt->nh->id)) @@ -6168,7 +6183,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, err = -ENOBUFS; seq = info->nlh ? info->nlh->nlmsg_seq : 0; - skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); + skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC); if (!skb) goto errout; @@ -6181,7 +6196,7 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, goto errout; } rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + info->nlh, GFP_ATOMIC); return; errout: if (err < 0) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 91a48efb140bef..efaf0e0bc4592b 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -91,6 +91,7 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100644 index 00000000000000..a6b2b1f9c641c9 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh +source net_helper.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh From 7998e7efd1d557af118ac96f48ebc569b929b555 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 8 Oct 2024 00:41:38 -0700 Subject: [PATCH 13/45] RDMA/bnxt_re: Avoid CPU lockups due fifo occupancy check loop commit 8be3e5b0c96beeefe9d5486b96575d104d3e7d17 upstream. Driver waits indefinitely for the fifo occupancy to go below a threshold as soon as the pacing interrupt is received. This can cause soft lockup on one of the processors, if the rate of DB is very high. Add a loop count for FPGA and exit the __wait_for_fifo_occupancy_below_th if the loop is taking more time. Pacing will be continuing until the occupancy is below the threshold. This is ensured by the checks in bnxt_re_pacing_timer_exp and further scheduling the work for pacing based on the fifo occupancy. Fixes: 2ad4e6303a6d ("RDMA/bnxt_re: Implement doorbell pacing algorithm") Link: https://patch.msgid.link/r/1728373302-19530-7-git-send-email-selvin.xavier@broadcom.com Reviewed-by: Kalesh AP Reviewed-by: Chandramohan Akula Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe [ Add the declaration of variable pacing_data to make it work on 6.6.y ] Signed-off-by: Alva Lan Signed-off-by: Greg Kroah-Hartman --- drivers/infiniband/hw/bnxt_re/main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index c7e51cc2ea2687..082a383c4913ec 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -485,6 +485,8 @@ static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) { u32 read_val, fifo_occup; + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; + u32 retry_fifo_check = 1000; /* loop shouldn't run infintely as the occupancy usually goes * below pacing algo threshold as soon as pacing kicks in. @@ -500,6 +502,14 @@ static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th) break; + if (!retry_fifo_check--) { + dev_info_once(rdev_to_dev(rdev), + "%s: fifo_occup = 0x%xfifo_max_depth = 0x%x pacing_th = 0x%x\n", + __func__, fifo_occup, pacing_data->fifo_max_depth, + pacing_data->pacing_th); + break; + } + } } From 4dd57d1f0e9844311c635a7fb39abce4f2ac5a61 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Mon, 13 Jan 2025 19:31:28 +0100 Subject: [PATCH 14/45] gfs2: Truncate address space when flipping GFS2_DIF_JDATA flag commit 7c9d9223802fbed4dee1ae301661bf346964c9d2 upstream. Truncate an inode's address space when flipping the GFS2_DIF_JDATA flag: depending on that flag, the pages in the address space will either use buffer heads or iomap_folio_state structs, and we cannot mix the two. Reported-by: Kun Hu , Jiaji Qin Signed-off-by: Andreas Gruenbacher Signed-off-by: Greg Kroah-Hartman --- fs/gfs2/file.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 9296e0e282bcd8..2adaffa58e88b4 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -251,6 +251,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask) error = filemap_fdatawait(inode->i_mapping); if (error) goto out; + truncate_inode_pages(inode->i_mapping, 0); if (new_flags & GFS2_DIF_JDATA) gfs2_ordered_del_inode(ip); } From 3bd97ebf7e4fcdc3c5c0d95ad68c7e503274c8d9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:36 -0500 Subject: [PATCH 15/45] libfs: Re-arrange locking in offset_iterate_dir() [ Upstream commit 3f6d810665dfde0d33785420618ceb03fba0619d ] Liam and Matthew say that once the RCU read lock is released, xa_state is not safe to re-use for the next xas_find() call. But the RCU read lock must be released on each loop iteration so that dput(), which might_sleep(), can be called safely. Thus we are forced to walk the offset tree with fresh state for each directory entry. xa_find() can do this for us, though it might be a little less efficient than maintaining xa_state locally. We believe that in the current code base, inode->i_rwsem provides protection for the xa_state maintained in offset_iterate_dir(). However, there is no guarantee that will continue to be the case in the future. Since offset_iterate_dir() doesn't build xa_state locally any more, there's no longer a strong need for offset_find_next(). Clean up by rolling these two helpers together. Suggested-by: Liam R. Howlett Message-ID: <170785993027.11135.8830043889278631735.stgit@91.116.238.104.host.secureserver.net> Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170820142021.6328.15047865406275957018.stgit@91.116.238.104.host.secureserver.net Reviewed-by: Jan Kara Signed-off-by: Christian Brauner Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index dc0f7519045f11..430f7c95336c4d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -401,12 +401,13 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, offset, U32_MAX); } -static struct dentry *offset_find_next(struct xa_state *xas) +static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) { struct dentry *child, *found = NULL; + XA_STATE(xas, &octx->xa, offset); rcu_read_lock(); - child = xas_next_entry(xas, U32_MAX); + child = xas_next_entry(&xas, U32_MAX); if (!child) goto out; spin_lock(&child->d_lock); @@ -429,12 +430,11 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { - struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode); - XA_STATE(xas, &so_ctx->xa, ctx->pos); + struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; while (true) { - dentry = offset_find_next(&xas); + dentry = offset_find_next(octx, ctx->pos); if (!dentry) return ERR_PTR(-ENOENT); @@ -443,8 +443,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) break; } + ctx->pos = dentry2offset(dentry) + 1; dput(dentry); - ctx->pos = xas.xa_index + 1; } return NULL; } From fc90bbcc08da47a4635455b4e946717d5c26258b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:37 -0500 Subject: [PATCH 16/45] libfs: Define a minimum directory offset [ Upstream commit 7beea725a8ca412c6190090ce7c3a13b169592a1 ] This value is used in several places, so make it a symbolic constant. Reviewed-by: Jan Kara Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170820142741.6328.12428356024575347885.stgit@91.116.238.104.host.secureserver.net Signed-off-by: Christian Brauner Stable-dep-of: ecba88a3b32d ("libfs: Add simple_offset_empty()") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 430f7c95336c4d..c3dc58e776f972 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -239,6 +239,11 @@ const struct inode_operations simple_dir_inode_operations = { }; EXPORT_SYMBOL(simple_dir_inode_operations); +/* 0 is '.', 1 is '..', so always start with offset 2 or more */ +enum { + DIR_OFFSET_MIN = 2, +}; + static void offset_set(struct dentry *dentry, u32 offset) { dentry->d_fsdata = (void *)((uintptr_t)(offset)); @@ -260,9 +265,7 @@ void simple_offset_init(struct offset_ctx *octx) { xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1); lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock); - - /* 0 is '.', 1 is '..', so always start with offset 2 */ - octx->next_offset = 2; + octx->next_offset = DIR_OFFSET_MIN; } /** @@ -275,7 +278,7 @@ void simple_offset_init(struct offset_ctx *octx) */ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) { - static const struct xa_limit limit = XA_LIMIT(2, U32_MAX); + static const struct xa_limit limit = XA_LIMIT(DIR_OFFSET_MIN, U32_MAX); u32 offset; int ret; @@ -480,7 +483,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) return 0; /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == 2) + if (ctx->pos == DIR_OFFSET_MIN) file->private_data = NULL; else if (file->private_data == ERR_PTR(-ENOENT)) return 0; From 307f68e49ddafa89f8d0c3fdbdb396cb01ee8373 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:38 -0500 Subject: [PATCH 17/45] libfs: Add simple_offset_empty() [ Upstream commit ecba88a3b32d733d41e27973e25b2bc580f64281 ] For simple filesystems that use directory offset mapping, rely strictly on the directory offset map to tell when a directory has no children. After this patch is applied, the emptiness test holds only the RCU read lock when the directory being tested has no children. In addition, this adds another layer of confirmation that simple_offset_add/remove() are working as expected. Reviewed-by: Jan Kara Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/170820143463.6328.7872919188371286951.stgit@91.116.238.104.host.secureserver.net Signed-off-by: Christian Brauner Stable-dep-of: 5a1a25be995e ("libfs: Add simple_offset_rename() API") Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 32 ++++++++++++++++++++++++++++++++ include/linux/fs.h | 1 + mm/shmem.c | 4 ++-- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index c3dc58e776f972..d7b901cb9af413 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -312,6 +312,38 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) offset_set(dentry, 0); } +/** + * simple_offset_empty - Check if a dentry can be unlinked + * @dentry: dentry to be tested + * + * Returns 0 if @dentry is a non-empty directory; otherwise returns 1. + */ +int simple_offset_empty(struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + struct offset_ctx *octx; + struct dentry *child; + unsigned long index; + int ret = 1; + + if (!inode || !S_ISDIR(inode->i_mode)) + return ret; + + index = DIR_OFFSET_MIN; + octx = inode->i_op->get_offset_ctx(inode); + xa_for_each(&octx->xa, index, child) { + spin_lock(&child->d_lock); + if (simple_positive(child)) { + spin_unlock(&child->d_lock); + ret = 0; + break; + } + spin_unlock(&child->d_lock); + } + + return ret; +} + /** * simple_offset_rename_exchange - exchange rename with directory offsets * @old_dir: parent of dentry being moved diff --git a/include/linux/fs.h b/include/linux/fs.h index 6c3d86532e3f91..5104405ce3e648 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3197,6 +3197,7 @@ struct offset_ctx { void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); +int simple_offset_empty(struct dentry *dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, diff --git a/mm/shmem.c b/mm/shmem.c index db7dd45c918158..aaf679976f3b50 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3368,7 +3368,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) static int shmem_rmdir(struct inode *dir, struct dentry *dentry) { - if (!simple_empty(dentry)) + if (!simple_offset_empty(dentry)) return -ENOTEMPTY; drop_nlink(d_inode(dentry)); @@ -3425,7 +3425,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, return simple_offset_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); - if (!simple_empty(new_dentry)) + if (!simple_offset_empty(new_dentry)) return -ENOTEMPTY; if (flags & RENAME_WHITEOUT) { From 3e716f31ff8b681f35927c95e12fc3874ed7dea7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:39 -0500 Subject: [PATCH 18/45] libfs: Fix simple_offset_rename_exchange() [ Upstream commit 23cdd0eed3f1fff3af323092b0b88945a7950d8e ] User space expects the replacement (old) directory entry to have the same directory offset after the rename. Suggested-by: Christian Brauner Fixes: a2e459555c5f ("shmem: stable directory offsets") Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20240415152057.4605-2-cel@kernel.org Signed-off-by: Christian Brauner [ cel: adjusted to apply to origin/linux-6.6.y ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index d7b901cb9af413..2029cb6a0e157c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -294,6 +294,18 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) return 0; } +static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry, + long offset) +{ + void *ret; + + ret = xa_store(&octx->xa, offset, dentry, GFP_KERNEL); + if (xa_is_err(ret)) + return xa_err(ret); + offset_set(dentry, offset); + return 0; +} + /** * simple_offset_remove - Remove an entry to a directory's offset map * @octx: directory offset ctx to be updated @@ -351,6 +363,9 @@ int simple_offset_empty(struct dentry *dentry) * @new_dir: destination parent * @new_dentry: destination dentry * + * This API preserves the directory offset values. Caller provides + * appropriate serialization. + * * Returns zero on success. Otherwise a negative errno is returned and the * rename is rolled back. */ @@ -368,11 +383,11 @@ int simple_offset_rename_exchange(struct inode *old_dir, simple_offset_remove(old_ctx, old_dentry); simple_offset_remove(new_ctx, new_dentry); - ret = simple_offset_add(new_ctx, old_dentry); + ret = simple_offset_replace(new_ctx, old_dentry, new_index); if (ret) goto out_restore; - ret = simple_offset_add(old_ctx, new_dentry); + ret = simple_offset_replace(old_ctx, new_dentry, old_index); if (ret) { simple_offset_remove(new_ctx, old_dentry); goto out_restore; @@ -387,10 +402,8 @@ int simple_offset_rename_exchange(struct inode *old_dir, return 0; out_restore: - offset_set(old_dentry, old_index); - xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL); - offset_set(new_dentry, new_index); - xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL); + (void)simple_offset_replace(old_ctx, old_dentry, old_index); + (void)simple_offset_replace(new_ctx, new_dentry, new_index); return ret; } From 753828d6775e9b2cff8d2b865beae65817fcc5eb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:40 -0500 Subject: [PATCH 19/45] libfs: Add simple_offset_rename() API [ Upstream commit 5a1a25be995e1014abd01600479915683e356f5c ] I'm about to fix a tmpfs rename bug that requires the use of internal simple_offset helpers that are not available in mm/shmem.c Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20240415152057.4605-3-cel@kernel.org Signed-off-by: Christian Brauner Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 21 +++++++++++++++++++++ include/linux/fs.h | 2 ++ mm/shmem.c | 3 +-- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 2029cb6a0e157c..15d8c3300dae3b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -356,6 +356,27 @@ int simple_offset_empty(struct dentry *dentry) return ret; } +/** + * simple_offset_rename - handle directory offsets for rename + * @old_dir: parent directory of source entry + * @old_dentry: dentry of source entry + * @new_dir: parent_directory of destination entry + * @new_dentry: dentry of destination + * + * Caller provides appropriate serialization. + * + * Returns zero on success, a negative errno value on failure. + */ +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); + struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); + + simple_offset_remove(old_ctx, old_dentry); + return simple_offset_add(new_ctx, old_dentry); +} + /** * simple_offset_rename_exchange - exchange rename with directory offsets * @old_dir: parent of dentry being moved diff --git a/include/linux/fs.h b/include/linux/fs.h index 5104405ce3e648..e4d139fcaad011 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3198,6 +3198,8 @@ void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); int simple_offset_empty(struct dentry *dentry); +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, diff --git a/mm/shmem.c b/mm/shmem.c index aaf679976f3b50..ab2b0e87b051f8 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3434,8 +3434,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, return error; } - simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry); - error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry); + error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (error) return error; From 2b6da3fa94cd19dce63f9821f4dd8bccbf8dc10b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:41 -0500 Subject: [PATCH 20/45] shmem: Fix shmem_rename2() [ Upstream commit ad191eb6d6942bb835a0b20b647f7c53c1d99ca4 ] When renaming onto an existing directory entry, user space expects the replacement entry to have the same directory offset as the original one. Link: https://gitlab.alpinelinux.org/alpine/aports/-/issues/15966 Fixes: a2e459555c5f ("shmem: stable directory offsets") Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20240415152057.4605-4-cel@kernel.org Signed-off-by: Christian Brauner Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index 15d8c3300dae3b..ab9fc182fd2261 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -365,6 +365,9 @@ int simple_offset_empty(struct dentry *dentry) * * Caller provides appropriate serialization. * + * User space expects the directory offset value of the replaced + * (new) directory entry to be unchanged after a rename. + * * Returns zero on success, a negative errno value on failure. */ int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -372,8 +375,14 @@ int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, { struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); + long new_offset = dentry2offset(new_dentry); simple_offset_remove(old_ctx, old_dentry); + + if (new_offset) { + offset_set(new_dentry, 0); + return simple_offset_replace(new_ctx, old_dentry, new_offset); + } return simple_offset_add(new_ctx, old_dentry); } From a01bb1c5cac90074d2ec3e71ffa6463419523023 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:42 -0500 Subject: [PATCH 21/45] libfs: Return ENOSPC when the directory offset range is exhausted [ Upstream commit 903dc9c43a155e0893280c7472d4a9a3a83d75a6 ] Testing shows that the EBUSY error return from mtree_alloc_cyclic() leaks into user space. The ERRORS section of "man creat(2)" says: > EBUSY O_EXCL was specified in flags and pathname refers > to a block device that is in use by the system > (e.g., it is mounted). ENOSPC is closer to what applications expect in this situation. Note that the normal range of simple directory offset values is 2..2^63, so hitting this error is going to be rare to impossible. Fixes: 6faddda69f62 ("libfs: Add directory operations for stable offsets") Cc: stable@vger.kernel.org # v6.9+ Reviewed-by: Jeff Layton Reviewed-by: Yang Erkun Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20241228175522.1854234-2-cel@kernel.org Signed-off-by: Christian Brauner [ cel: adjusted to apply to origin/linux-6.6.y ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index ab9fc182fd2261..200bcfc2ac347d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -287,8 +287,8 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit, &octx->next_offset, GFP_KERNEL); - if (ret < 0) - return ret; + if (unlikely(ret < 0)) + return ret == -EBUSY ? -ENOSPC : ret; offset_set(dentry, offset); return 0; From 6b1de53b1a0a559611217515f262ba8c1779fb30 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:43 -0500 Subject: [PATCH 22/45] Revert "libfs: Add simple_offset_empty()" [ Upstream commit d7bde4f27ceef3dc6d72010a20d4da23db835a32 ] simple_empty() and simple_offset_empty() perform the same task. The latter's use as a canary to find bugs has not found any new issues. A subsequent patch will remove the use of the mtree for iterating directory contents, so revert back to using a similar mechanism for determining whether a directory is indeed empty. Only one such mechanism is ever needed. Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20241228175522.1854234-3-cel@kernel.org Reviewed-by: Yang Erkun Signed-off-by: Christian Brauner [ cel: adjusted to apply to origin/linux-6.6.y ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 32 -------------------------------- include/linux/fs.h | 1 - mm/shmem.c | 4 ++-- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 200bcfc2ac347d..082bacf0b9e6d6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -324,38 +324,6 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry) offset_set(dentry, 0); } -/** - * simple_offset_empty - Check if a dentry can be unlinked - * @dentry: dentry to be tested - * - * Returns 0 if @dentry is a non-empty directory; otherwise returns 1. - */ -int simple_offset_empty(struct dentry *dentry) -{ - struct inode *inode = d_inode(dentry); - struct offset_ctx *octx; - struct dentry *child; - unsigned long index; - int ret = 1; - - if (!inode || !S_ISDIR(inode->i_mode)) - return ret; - - index = DIR_OFFSET_MIN; - octx = inode->i_op->get_offset_ctx(inode); - xa_for_each(&octx->xa, index, child) { - spin_lock(&child->d_lock); - if (simple_positive(child)) { - spin_unlock(&child->d_lock); - ret = 0; - break; - } - spin_unlock(&child->d_lock); - } - - return ret; -} - /** * simple_offset_rename - handle directory offsets for rename * @old_dir: parent directory of source entry diff --git a/include/linux/fs.h b/include/linux/fs.h index e4d139fcaad011..e47596d354ff75 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3197,7 +3197,6 @@ struct offset_ctx { void simple_offset_init(struct offset_ctx *octx); int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry); void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry); -int simple_offset_empty(struct dentry *dentry); int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); int simple_offset_rename_exchange(struct inode *old_dir, diff --git a/mm/shmem.c b/mm/shmem.c index ab2b0e87b051f8..283fb62084d454 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3368,7 +3368,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) static int shmem_rmdir(struct inode *dir, struct dentry *dentry) { - if (!simple_offset_empty(dentry)) + if (!simple_empty(dentry)) return -ENOTEMPTY; drop_nlink(d_inode(dentry)); @@ -3425,7 +3425,7 @@ static int shmem_rename2(struct mnt_idmap *idmap, return simple_offset_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); - if (!simple_offset_empty(new_dentry)) + if (!simple_empty(new_dentry)) return -ENOTEMPTY; if (flags & RENAME_WHITEOUT) { From 0f03dd06e5d18ac46d9ee739740940f4c601f70f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:44 -0500 Subject: [PATCH 23/45] libfs: Replace simple_offset end-of-directory detection [ Upstream commit 68a3a65003145644efcbb651e91db249ccd96281 ] According to getdents(3), the d_off field in each returned directory entry points to the next entry in the directory. The d_off field in the last returned entry in the readdir buffer must contain a valid offset value, but if it points to an actual directory entry, then readdir/getdents can loop. This patch introduces a specific fixed offset value that is placed in the d_off field of the last entry in a directory. Some user space applications assume that the EOD offset value is larger than the offsets of real directory entries, so the largest valid offset value is reserved for this purpose. This new value is never allocated by simple_offset_add(). When ->iterate_dir() returns, getdents{64} inserts the ctx->pos value into the d_off field of the last valid entry in the readdir buffer. When it hits EOD, offset_readdir() sets ctx->pos to the EOD offset value so the last entry is updated to point to the EOD marker. When trying to read the entry at the EOD offset, offset_readdir() terminates immediately. It is worth noting that using a Maple tree for directory offset value allocation does not guarantee a 63-bit range of values -- on platforms where "long" is a 32-bit type, the directory offset value range is still 0..(2^31 - 1). For broad compatibility with 32-bit user space, the largest tmpfs directory cookie value is now S32_MAX. Fixes: 796432efab1e ("libfs: getdents() should return 0 after reaching EOD") Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20241228175522.1854234-5-cel@kernel.org Signed-off-by: Christian Brauner [ cel: adjusted to apply to origin/linux-6.6.y ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index 082bacf0b9e6d6..d546f3f0c036f7 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -239,9 +239,15 @@ const struct inode_operations simple_dir_inode_operations = { }; EXPORT_SYMBOL(simple_dir_inode_operations); -/* 0 is '.', 1 is '..', so always start with offset 2 or more */ +/* simple_offset_add() never assigns these to a dentry */ enum { - DIR_OFFSET_MIN = 2, + DIR_OFFSET_EOD = S32_MAX, +}; + +/* simple_offset_add() allocation range */ +enum { + DIR_OFFSET_MIN = 2, + DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1, }; static void offset_set(struct dentry *dentry, u32 offset) @@ -278,7 +284,8 @@ void simple_offset_init(struct offset_ctx *octx) */ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) { - static const struct xa_limit limit = XA_LIMIT(DIR_OFFSET_MIN, U32_MAX); + static const struct xa_limit limit = XA_LIMIT(DIR_OFFSET_MIN, + DIR_OFFSET_MAX); u32 offset; int ret; @@ -442,8 +449,6 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return -EINVAL; } - /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; return vfs_setpos(file, offset, U32_MAX); } @@ -453,7 +458,7 @@ static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) XA_STATE(xas, &octx->xa, offset); rcu_read_lock(); - child = xas_next_entry(&xas, U32_MAX); + child = xas_next_entry(&xas, DIR_OFFSET_MAX); if (!child) goto out; spin_lock(&child->d_lock); @@ -474,7 +479,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -482,7 +487,7 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return ERR_PTR(-ENOENT); + goto out_eod; if (!offset_dir_emit(ctx, dentry)) { dput(dentry); @@ -492,7 +497,10 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; + return; + +out_eod: + ctx->pos = DIR_OFFSET_EOD; } /** @@ -512,6 +520,8 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) * * On return, @ctx->pos contains an offset that will read the next entry * in this directory when offset_readdir() is called again with @ctx. + * Caller places this value in the d_off field of the last entry in the + * user's buffer. * * Return values: * %0 - Complete @@ -524,13 +534,8 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + if (ctx->pos != DIR_OFFSET_EOD) + offset_iterate_dir(d_inode(dir), ctx); return 0; } From 850e696f362729a163fb6af21a54cde565b71f75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 24 Jan 2025 14:19:45 -0500 Subject: [PATCH 24/45] libfs: Use d_children list to iterate simple_offset directories [ Upstream commit b9b588f22a0c049a14885399e27625635ae6ef91 ] The mtree mechanism has been effective at creating directory offsets that are stable over multiple opendir instances. However, it has not been able to handle the subtleties of renames that are concurrent with readdir. Instead of using the mtree to emit entries in the order of their offset values, use it only to map incoming ctx->pos to a starting entry. Then use the directory's d_children list, which is already maintained properly by the dcache, to find the next child to emit. One of the sneaky things about this is that when the mtree-allocated offset value wraps (which is very rare), looking up ctx->pos++ is not going to find the next entry; it will return NULL. Instead, by following the d_children list, the offset values can appear in any order but all of the entries in the directory will be visited eventually. Note also that the readdir() is guaranteed to reach the tail of this list. Entries are added only at the head of d_children, and readdir walks from its current position in that list towards its tail. Signed-off-by: Chuck Lever Link: https://lore.kernel.org/r/20241228175522.1854234-6-cel@kernel.org Signed-off-by: Christian Brauner [ cel: adjusted to apply to origin/linux-6.6.y ] Signed-off-by: Chuck Lever Signed-off-by: Greg Kroah-Hartman --- fs/libfs.c | 84 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 25 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index d546f3f0c036f7..f5566964aa7d13 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -241,12 +241,13 @@ EXPORT_SYMBOL(simple_dir_inode_operations); /* simple_offset_add() never assigns these to a dentry */ enum { + DIR_OFFSET_FIRST = 2, /* Find first real entry */ DIR_OFFSET_EOD = S32_MAX, }; /* simple_offset_add() allocation range */ enum { - DIR_OFFSET_MIN = 2, + DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1, DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1, }; @@ -452,51 +453,84 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) return vfs_setpos(file, offset, U32_MAX); } -static struct dentry *offset_find_next(struct offset_ctx *octx, loff_t offset) +static struct dentry *find_positive_dentry(struct dentry *parent, + struct dentry *dentry, + bool next) { + struct dentry *found = NULL; + + spin_lock(&parent->d_lock); + if (next) + dentry = list_next_entry(dentry, d_child); + else if (!dentry) + dentry = list_first_entry_or_null(&parent->d_subdirs, + struct dentry, d_child); + for (; dentry && !list_entry_is_head(dentry, &parent->d_subdirs, d_child); + dentry = list_next_entry(dentry, d_child)) { + if (!simple_positive(dentry)) + continue; + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + if (simple_positive(dentry)) + found = dget_dlock(dentry); + spin_unlock(&dentry->d_lock); + if (likely(found)) + break; + } + spin_unlock(&parent->d_lock); + return found; +} + +static noinline_for_stack struct dentry * +offset_dir_lookup(struct dentry *parent, loff_t offset) +{ + struct inode *inode = d_inode(parent); + struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *child, *found = NULL; + XA_STATE(xas, &octx->xa, offset); - rcu_read_lock(); - child = xas_next_entry(&xas, DIR_OFFSET_MAX); - if (!child) - goto out; - spin_lock(&child->d_lock); - if (simple_positive(child)) - found = dget_dlock(child); - spin_unlock(&child->d_lock); -out: - rcu_read_unlock(); + if (offset == DIR_OFFSET_FIRST) + found = find_positive_dentry(parent, NULL, false); + else { + rcu_read_lock(); + child = xas_next_entry(&xas, DIR_OFFSET_MAX); + found = find_positive_dentry(parent, child, false); + rcu_read_unlock(); + } return found; } static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) { - u32 offset = dentry2offset(dentry); struct inode *inode = d_inode(dentry); - return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset, - inode->i_ino, fs_umode_to_dtype(inode->i_mode)); + return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len, + inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct file *file, struct dir_context *ctx) { - struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); + struct dentry *dir = file->f_path.dentry; struct dentry *dentry; + dentry = offset_dir_lookup(dir, ctx->pos); + if (!dentry) + goto out_eod; while (true) { - dentry = offset_find_next(octx, ctx->pos); - if (!dentry) - goto out_eod; + struct dentry *next; - if (!offset_dir_emit(ctx, dentry)) { - dput(dentry); + ctx->pos = dentry2offset(dentry); + if (!offset_dir_emit(ctx, dentry)) break; - } - ctx->pos = dentry2offset(dentry) + 1; + next = find_positive_dentry(dir, dentry, true); dput(dentry); + + if (!next) + goto out_eod; + dentry = next; } + dput(dentry); return; out_eod: @@ -535,7 +569,7 @@ static int offset_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; if (ctx->pos != DIR_OFFSET_EOD) - offset_iterate_dir(d_inode(dir), ctx); + offset_iterate_dir(file, ctx); return 0; } From 5ddcc9e92d54548219985ce4de88618fb53e14ec Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 21 Jan 2025 15:25:36 -0300 Subject: [PATCH 25/45] smb: client: handle lack of EA support in smb2_query_path_info() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3681c74d342db75b0d641ba60de27bf73e16e66b upstream. If the server doesn't support both EAs and reparse point in a file, the SMB2_QUERY_INFO request will fail with either STATUS_NO_EAS_ON_FILE or STATUS_EAS_NOT_SUPPORT in the compound chain, so ignore it as long as reparse point isn't IO_REPARSE_TAG_LX_(CHR|BLK), which would require the EAs to know about major/minor numbers. Reported-by: Pali Rohár Signed-off-by: Paulo Alcantara (Red Hat) Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2inode.c | 92 +++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 23 deletions(-) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index e695df1dbb23b2..9ebd7a5ee23c21 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -176,27 +176,27 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, struct kvec *out_iov, int *out_buftype, struct dentry *dentry) { - struct reparse_data_buffer *rbuf; + struct smb2_query_info_rsp *qi_rsp = NULL; struct smb2_compound_vars *vars = NULL; - struct kvec *rsp_iov, *iov; - struct smb_rqst *rqst; - int rc; - __le16 *utf16_path = NULL; __u8 oplock = SMB2_OPLOCK_LEVEL_NONE; - struct cifs_fid fid; + struct cifs_open_info_data *idata; struct cifs_ses *ses = tcon->ses; + struct reparse_data_buffer *rbuf; struct TCP_Server_Info *server; - int num_rqst = 0, i; int resp_buftype[MAX_COMPOUND]; - struct smb2_query_info_rsp *qi_rsp = NULL; - struct cifs_open_info_data *idata; + int retries = 0, cur_sleep = 1; + __u8 delete_pending[8] = {1,}; + struct kvec *rsp_iov, *iov; struct inode *inode = NULL; - int flags = 0; - __u8 delete_pending[8] = {1, 0, 0, 0, 0, 0, 0, 0}; + __le16 *utf16_path = NULL; + struct smb_rqst *rqst; unsigned int size[2]; - void *data[2]; + struct cifs_fid fid; + int num_rqst = 0, i; unsigned int len; - int retries = 0, cur_sleep = 1; + int tmp_rc, rc; + int flags = 0; + void *data[2]; replay_again: /* reinitialize for possible replay */ @@ -637,7 +637,14 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, tcon->need_reconnect = true; } + tmp_rc = rc; for (i = 0; i < num_cmds; i++) { + char *buf = rsp_iov[i + i].iov_base; + + if (buf && resp_buftype[i + 1] != CIFS_NO_BUFFER) + rc = server->ops->map_error(buf, false); + else + rc = tmp_rc; switch (cmds[i]) { case SMB2_OP_QUERY_INFO: idata = in_iov[i].iov_base; @@ -803,6 +810,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, } } SMB2_close_free(&rqst[num_rqst]); + rc = tmp_rc; num_cmds += 2; if (out_iov && out_buftype) { @@ -858,22 +866,52 @@ static int parse_create_response(struct cifs_open_info_data *data, return rc; } +/* Check only if SMB2_OP_QUERY_WSL_EA command failed in the compound chain */ +static bool ea_unsupported(int *cmds, int num_cmds, + struct kvec *out_iov, int *out_buftype) +{ + int i; + + if (cmds[num_cmds - 1] != SMB2_OP_QUERY_WSL_EA) + return false; + + for (i = 1; i < num_cmds - 1; i++) { + struct smb2_hdr *hdr = out_iov[i].iov_base; + + if (out_buftype[i] == CIFS_NO_BUFFER || !hdr || + hdr->Status != STATUS_SUCCESS) + return false; + } + return true; +} + +static inline void free_rsp_iov(struct kvec *iovs, int *buftype, int count) +{ + int i; + + for (i = 0; i < count; i++) { + free_rsp_buf(buftype[i], iovs[i].iov_base); + memset(&iovs[i], 0, sizeof(*iovs)); + buftype[i] = CIFS_NO_BUFFER; + } +} + int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, struct cifs_open_info_data *data) { + struct kvec in_iov[3], out_iov[5] = {}; + struct cached_fid *cfid = NULL; struct cifs_open_parms oparms; - __u32 create_options = 0; struct cifsFileInfo *cfile; - struct cached_fid *cfid = NULL; + __u32 create_options = 0; + int out_buftype[5] = {}; struct smb2_hdr *hdr; - struct kvec in_iov[3], out_iov[3] = {}; - int out_buftype[3] = {}; + int num_cmds = 0; int cmds[3]; bool islink; - int i, num_cmds = 0; int rc, rc2; data->adjust_tz = false; @@ -943,14 +981,14 @@ int smb2_query_path_info(const unsigned int xid, if (rc || !data->reparse_point) goto out; - if (!tcon->posix_extensions) - cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; /* * Skip SMB2_OP_GET_REPARSE if symlink already parsed in create * response. */ if (data->reparse.tag != IO_REPARSE_TAG_SYMLINK) cmds[num_cmds++] = SMB2_OP_GET_REPARSE; + if (!tcon->posix_extensions) + cmds[num_cmds++] = SMB2_OP_QUERY_WSL_EA; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES | @@ -958,9 +996,18 @@ int smb2_query_path_info(const unsigned int xid, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); + free_rsp_iov(out_iov, out_buftype, ARRAY_SIZE(out_iov)); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, in_iov, cmds, num_cmds, - cfile, NULL, NULL, NULL); + cfile, out_iov, out_buftype, NULL); + if (rc && ea_unsupported(cmds, num_cmds, + out_iov, out_buftype)) { + if (data->reparse.tag != IO_REPARSE_TAG_LX_BLK && + data->reparse.tag != IO_REPARSE_TAG_LX_CHR) + rc = 0; + else + rc = -EOPNOTSUPP; + } break; case -EREMOTE: break; @@ -978,8 +1025,7 @@ int smb2_query_path_info(const unsigned int xid, } out: - for (i = 0; i < ARRAY_SIZE(out_buftype); i++) - free_rsp_buf(out_buftype[i], out_iov[i].iov_base); + free_rsp_iov(out_iov, out_buftype, ARRAY_SIZE(out_iov)); return rc; } From f4168299e553f17aa2ba4016e77a9c38da40eb1d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sat, 11 Jan 2025 09:57:39 -0500 Subject: [PATCH 26/45] net: sched: fix ets qdisc OOB Indexing commit d62b04fca4340a0d468d7853bd66e511935a18cb upstream. Haowei Yan found that ets_class_from_arg() can index an Out-Of-Bound class in ets_class_from_arg() when passed clid of 0. The overflow may cause local privilege escalation. [ 18.852298] ------------[ cut here ]------------ [ 18.853271] UBSAN: array-index-out-of-bounds in net/sched/sch_ets.c:93:20 [ 18.853743] index 18446744073709551615 is out of range for type 'ets_class [16]' [ 18.854254] CPU: 0 UID: 0 PID: 1275 Comm: poc Not tainted 6.12.6-dirty #17 [ 18.854821] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014 [ 18.856532] Call Trace: [ 18.857441] [ 18.858227] dump_stack_lvl+0xc2/0xf0 [ 18.859607] dump_stack+0x10/0x20 [ 18.860908] __ubsan_handle_out_of_bounds+0xa7/0xf0 [ 18.864022] ets_class_change+0x3d6/0x3f0 [ 18.864322] tc_ctl_tclass+0x251/0x910 [ 18.864587] ? lock_acquire+0x5e/0x140 [ 18.865113] ? __mutex_lock+0x9c/0xe70 [ 18.866009] ? __mutex_lock+0xa34/0xe70 [ 18.866401] rtnetlink_rcv_msg+0x170/0x6f0 [ 18.866806] ? __lock_acquire+0x578/0xc10 [ 18.867184] ? __pfx_rtnetlink_rcv_msg+0x10/0x10 [ 18.867503] netlink_rcv_skb+0x59/0x110 [ 18.867776] rtnetlink_rcv+0x15/0x30 [ 18.868159] netlink_unicast+0x1c3/0x2b0 [ 18.868440] netlink_sendmsg+0x239/0x4b0 [ 18.868721] ____sys_sendmsg+0x3e2/0x410 [ 18.869012] ___sys_sendmsg+0x88/0xe0 [ 18.869276] ? rseq_ip_fixup+0x198/0x260 [ 18.869563] ? rseq_update_cpu_node_id+0x10a/0x190 [ 18.869900] ? trace_hardirqs_off+0x5a/0xd0 [ 18.870196] ? syscall_exit_to_user_mode+0xcc/0x220 [ 18.870547] ? do_syscall_64+0x93/0x150 [ 18.870821] ? __memcg_slab_free_hook+0x69/0x290 [ 18.871157] __sys_sendmsg+0x69/0xd0 [ 18.871416] __x64_sys_sendmsg+0x1d/0x30 [ 18.871699] x64_sys_call+0x9e2/0x2670 [ 18.871979] do_syscall_64+0x87/0x150 [ 18.873280] ? do_syscall_64+0x93/0x150 [ 18.874742] ? lock_release+0x7b/0x160 [ 18.876157] ? do_user_addr_fault+0x5ce/0x8f0 [ 18.877833] ? irqentry_exit_to_user_mode+0xc2/0x210 [ 18.879608] ? irqentry_exit+0x77/0xb0 [ 18.879808] ? clear_bhb_loop+0x15/0x70 [ 18.880023] ? clear_bhb_loop+0x15/0x70 [ 18.880223] ? clear_bhb_loop+0x15/0x70 [ 18.880426] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 18.880683] RIP: 0033:0x44a957 [ 18.880851] Code: ff ff e8 fc 00 00 00 66 2e 0f 1f 84 00 00 00 00 00 66 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 8974 24 10 [ 18.881766] RSP: 002b:00007ffcdd00fad8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 18.882149] RAX: ffffffffffffffda RBX: 00007ffcdd010db8 RCX: 000000000044a957 [ 18.882507] RDX: 0000000000000000 RSI: 00007ffcdd00fb70 RDI: 0000000000000003 [ 18.885037] RBP: 00007ffcdd010bc0 R08: 000000000703c770 R09: 000000000703c7c0 [ 18.887203] R10: 0000000000000080 R11: 0000000000000246 R12: 0000000000000001 [ 18.888026] R13: 00007ffcdd010da8 R14: 00000000004ca7d0 R15: 0000000000000001 [ 18.888395] [ 18.888610] ---[ end trace ]--- Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc") Reported-by: Haowei Yan Suggested-by: Haowei Yan Signed-off-by: Jamal Hadi Salim Reviewed-by: Eric Dumazet Reviewed-by: Petr Machata Link: https://patch.msgid.link/20250111145740.74755-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_ets.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index b10efeaf0629d2..9fd70462b41d5a 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -91,6 +91,8 @@ ets_class_from_arg(struct Qdisc *sch, unsigned long arg) { struct ets_sched *q = qdisc_priv(sch); + if (arg == 0 || arg > q->nbands) + return NULL; return &q->classes[arg - 1]; } From a99bacb35c1416355eef957560e8fcac3a665549 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 3 Sep 2024 22:48:19 +0300 Subject: [PATCH 27/45] block: fix integer overflow in BLKSECDISCARD commit 697ba0b6ec4ae04afb67d3911799b5e2043b4455 upstream. I independently rediscovered commit 22d24a544b0d49bbcbd61c8c0eaf77d3c9297155 block: fix overflow in blk_ioctl_discard() but for secure erase. Same problem: uint64_t r[2] = {512, 18446744073709551104ULL}; ioctl(fd, BLKSECDISCARD, r); will enter near infinite loop inside blkdev_issue_secure_erase(): a.out: attempt to access beyond end of device loop0: rw=5, sector=3399043073, nr_sectors = 1024 limit=2048 bio_check_eod: 3286214 callbacks suppressed Signed-off-by: Alexey Dobriyan Link: https://lore.kernel.org/r/9e64057f-650a-46d1-b9f7-34af391536ef@p183 Signed-off-by: Jens Axboe Signed-off-by: Rajani Kantha Signed-off-by: Greg Kroah-Hartman --- block/ioctl.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/ioctl.c b/block/ioctl.c index 3786033342848d..231537f79a8cb4 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -115,7 +115,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, return -EINVAL; filemap_invalidate_lock(inode->i_mapping); - err = truncate_bdev_range(bdev, mode, start, start + len - 1); + err = truncate_bdev_range(bdev, mode, start, end - 1); if (err) goto fail; err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); @@ -127,7 +127,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, void __user *argp) { - uint64_t start, len; + uint64_t start, len, end; uint64_t range[2]; int err; @@ -142,11 +142,12 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, len = range[1]; if ((start & 511) || (len & 511)) return -EINVAL; - if (start + len > bdev_nr_bytes(bdev)) + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; filemap_invalidate_lock(bdev->bd_inode->i_mapping); - err = truncate_bdev_range(bdev, mode, start, start + len - 1); + err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); From 854d0d361e451a6b5f7bde2b86d634a8b6f80f3e Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 12 Dec 2024 09:53:10 +0100 Subject: [PATCH 28/45] Revert "HID: multitouch: Add support for lenovo Y9000P Touchpad" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3d88ba86ba6f35a0467f25a88c38aa5639190d04 upstream. This reverts commit 251efae73bd46b097deec4f9986d926813aed744. Quoting Wang Yuli: "The 27C6:01E0 touchpad doesn't require the workaround and applying it would actually break functionality. The initial report came from a BBS forum, but we suspect the information provided by the forum user may be incorrect which could happen sometimes. [1] Further investigation showed that the Lenovo Y9000P 2024 doesn't even use a Goodix touchpad. [2] For the broader issue of 27c6:01e0 being unusable on some devices, it just need to address it with a libinput quirk. In conclusion, we should revert this commit, which is the best solution." Reported-by: Ulrich Müller Reported-by: WangYuli Link: https://lore.kernel.org/all/uikt4wwpw@gentoo.org/ Signed-off-by: Jiri Kosina Signed-off-by: Greg Kroah-Hartman --- drivers/hid/hid-ids.h | 1 - drivers/hid/hid-multitouch.c | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f16940f3d93d46..1174626904cb02 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -506,7 +506,6 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 -#define I2C_DEVICE_ID_GOODIX_01E0 0x01e0 #define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 #define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index bf9cad71125923..e62104e1a6038b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1447,8 +1447,7 @@ static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, { if (hdev->vendor == I2C_VENDOR_ID_GOODIX && (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || - hdev->product == I2C_DEVICE_ID_GOODIX_01E9 || - hdev->product == I2C_DEVICE_ID_GOODIX_01E0)) { + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { if (rdesc[607] == 0x15) { rdesc[607] = 0x25; dev_info( @@ -2073,10 +2072,7 @@ static const struct hid_device_id mt_devices[] = { I2C_DEVICE_ID_GOODIX_01E8) }, { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, - I2C_DEVICE_ID_GOODIX_01E9) }, - { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, - HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, - I2C_DEVICE_ID_GOODIX_01E0) }, + I2C_DEVICE_ID_GOODIX_01E8) }, /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, From 7d6405c13b0d8a8367cd8df63f118b619a3f0dd2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 21 Jan 2025 09:27:22 -0800 Subject: [PATCH 29/45] cachestat: fix page cache statistics permission checking commit 5f537664e705b0bf8b7e329861f20128534f6a83 upstream. When the 'cachestat()' system call was added in commit cf264e1329fb ("cachestat: implement cachestat syscall"), it was meant to be a much more convenient (and performant) version of mincore() that didn't need mapping things into the user virtual address space in order to work. But it ended up missing the "check for writability or ownership" fix for mincore(), done in commit 134fca9063ad ("mm/mincore.c: make mincore() more conservative"). This just adds equivalent logic to 'cachestat()', modified for the file context (rather than vma). Reported-by: Sudheendra Raghav Neela Fixes: cf264e1329fb ("cachestat: implement cachestat syscall") Tested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Nhat Pham Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- mm/filemap.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/mm/filemap.c b/mm/filemap.c index 6a3d62de1cca7b..056422e6a0be8f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4270,6 +4270,20 @@ static void filemap_cachestat(struct address_space *mapping, rcu_read_unlock(); } +/* + * See mincore: reveal pagecache information only for files + * that the calling process has write access to, or could (if + * tried) open for writing. + */ +static inline bool can_do_cachestat(struct file *f) +{ + if (f->f_mode & FMODE_WRITE) + return true; + if (inode_owner_or_capable(file_mnt_idmap(f), file_inode(f))) + return true; + return file_permission(f, MAY_WRITE) == 0; +} + /* * The cachestat(2) system call. * @@ -4329,6 +4343,11 @@ SYSCALL_DEFINE4(cachestat, unsigned int, fd, return -EOPNOTSUPP; } + if (!can_do_cachestat(f.file)) { + fdput(f); + return -EPERM; + } + if (flags != 0) { fdput(f); return -EINVAL; From c981c32c38af80737a2fedc16e270546d139ccdd Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 22 Jan 2025 10:38:30 -0700 Subject: [PATCH 30/45] vfio/platform: check the bounds of read/write syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit ce9ff21ea89d191e477a02ad7eabf4f996b80a69 upstream. count and offset are passed from user space and not checked, only offset is capped to 40 bits, which can be used to read/write out of bounds of the device. Fixes: 6e3f26456009 (“vfio/platform: read and write support for the device fd”) Cc: stable@vger.kernel.org Reported-by: Mostafa Saleh Reviewed-by: Eric Auger Reviewed-by: Mostafa Saleh Tested-by: Mostafa Saleh Signed-off-by: Alex Williamson Signed-off-by: Greg Kroah-Hartman --- drivers/vfio/platform/vfio_platform_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index e53757d1d0958a..3bf1043cd7957c 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -388,6 +388,11 @@ static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg, { unsigned int done = 0; + if (off >= reg->size) + return -EINVAL; + + count = min_t(size_t, count, reg->size - off); + if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); @@ -467,6 +472,11 @@ static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg, { unsigned int done = 0; + if (off >= reg->size) + return -EINVAL; + + count = min_t(size_t, count, reg->size - off); + if (!reg->ioaddr) { reg->ioaddr = ioremap(reg->addr, reg->size); From 6e35f560daebe40264c95e9a1ab03110d4997df6 Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Thu, 18 Jul 2024 10:43:56 +0100 Subject: [PATCH 31/45] ext4: fix access to uninitialised lock in fc replay path commit 23dfdb56581ad92a9967bcd720c8c23356af74c1 upstream. The following kernel trace can be triggered with fstest generic/629 when executed against a filesystem with fast-commit feature enabled: INFO: trying to register non-static key. The code is fine but needs lockdep annotation, or maybe you didn't initialize this object before use? turning off the locking correctness validator. CPU: 0 PID: 866 Comm: mount Not tainted 6.10.0+ #11 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.2-3-gd478f380-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x66/0x90 register_lock_class+0x759/0x7d0 __lock_acquire+0x85/0x2630 ? __find_get_block+0xb4/0x380 lock_acquire+0xd1/0x2d0 ? __ext4_journal_get_write_access+0xd5/0x160 _raw_spin_lock+0x33/0x40 ? __ext4_journal_get_write_access+0xd5/0x160 __ext4_journal_get_write_access+0xd5/0x160 ext4_reserve_inode_write+0x61/0xb0 __ext4_mark_inode_dirty+0x79/0x270 ? ext4_ext_replay_set_iblocks+0x2f8/0x450 ext4_ext_replay_set_iblocks+0x330/0x450 ext4_fc_replay+0x14c8/0x1540 ? jread+0x88/0x2e0 ? rcu_is_watching+0x11/0x40 do_one_pass+0x447/0xd00 jbd2_journal_recover+0x139/0x1b0 jbd2_journal_load+0x96/0x390 ext4_load_and_init_journal+0x253/0xd40 ext4_fill_super+0x2cc6/0x3180 ... In the replay path there's an attempt to lock sbi->s_bdev_wb_lock in function ext4_check_bdev_write_error(). Unfortunately, at this point this spinlock has not been initialized yet. Moving it's initialization to an earlier point in __ext4_fill_super() fixes this splat. Signed-off-by: Luis Henriques (SUSE) Link: https://patch.msgid.link/20240718094356.7863-1-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Cc: stable@kernel.org Signed-off-by: Bruno VERNAY Signed-off-by: Victor Giraud Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 71ced0ada9a2e5..f019ce64eba48e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5366,6 +5366,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); + spin_lock_init(&sbi->s_bdev_wb_lock); + ext4_fast_commit_init(sb); sb->s_root = NULL; @@ -5586,7 +5588,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * Save the original bdev mapping's wb_err value which could be * used to detect the metadata async write error. */ - spin_lock_init(&sbi->s_bdev_wb_lock); errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err, &sbi->s_bdev_wb_err); EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; From e0500e4373cd3d5eace1f1712444ab830b82c114 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 23 Oct 2024 15:30:09 +0300 Subject: [PATCH 32/45] ipv4: ip_tunnel: Fix suspicious RCU usage warning in ip_tunnel_find() commit 90e0569dd3d32f4f4d2ca691d3fa5a8a14a13c12 upstream. The per-netns IP tunnel hash table is protected by the RTNL mutex and ip_tunnel_find() is only called from the control path where the mutex is taken. Add a lockdep expression to hlist_for_each_entry_rcu() in ip_tunnel_find() in order to validate that the mutex is held and to silence the suspicious RCU usage warning [1]. [1] WARNING: suspicious RCU usage 6.12.0-rc3-custom-gd95d9a31aceb #139 Not tainted ----------------------------- net/ipv4/ip_tunnel.c:221 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by ip/362: #0: ffffffff86fc7cb0 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x377/0xf60 stack backtrace: CPU: 12 UID: 0 PID: 362 Comm: ip Not tainted 6.12.0-rc3-custom-gd95d9a31aceb #139 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: dump_stack_lvl+0xba/0x110 lockdep_rcu_suspicious.cold+0x4f/0xd6 ip_tunnel_find+0x435/0x4d0 ip_tunnel_newlink+0x517/0x7a0 ipgre_newlink+0x14c/0x170 __rtnl_newlink+0x1173/0x19c0 rtnl_newlink+0x6c/0xa0 rtnetlink_rcv_msg+0x3cc/0xf60 netlink_rcv_skb+0x171/0x450 netlink_unicast+0x539/0x7f0 netlink_sendmsg+0x8c1/0xd80 ____sys_sendmsg+0x8f9/0xc20 ___sys_sendmsg+0x197/0x1e0 __sys_sendmsg+0x122/0x1f0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Suggested-by: Eric Dumazet Signed-off-by: Ido Schimmel Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241023123009.749764-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski Signed-off-by: Alva Lan Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index dd1803bf9c5c63..b5d64cd3ab0a23 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -218,7 +218,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn, struct ip_tunnel *t = NULL; struct hlist_head *head = ip_bucket(itn, parms); - hlist_for_each_entry_rcu(t, head, hash_node) { + hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && link == READ_ONCE(t->parms.link) && From 088bde862f8d3d0fc52e40e66a0484a246837087 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 7 Jan 2025 17:28:40 +0000 Subject: [PATCH 33/45] scsi: storvsc: Ratelimit warning logs to prevent VM denial of service commit d2138eab8cde61e0e6f62d0713e45202e8457d6d upstream. If there's a persistent error in the hypervisor, the SCSI warning for failed I/O can flood the kernel log and max out CPU utilization, preventing troubleshooting from the VM side. Ratelimit the warning so it doesn't DoS the VM. Closes: https://github.com/microsoft/WSL/issues/9173 Signed-off-by: Easwar Hariharan Link: https://lore.kernel.org/r/20250107-eahariha-ratelimit-storvsc-v1-1-7fc193d1f2b0@linux.microsoft.com Reviewed-by: Michael Kelley Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/storvsc_drv.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index d0b55c1fa908a5..b3c588b102d900 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -171,6 +171,12 @@ do { \ dev_warn(&(dev)->device, fmt, ##__VA_ARGS__); \ } while (0) +#define storvsc_log_ratelimited(dev, level, fmt, ...) \ +do { \ + if (do_logging(level)) \ + dev_warn_ratelimited(&(dev)->device, fmt, ##__VA_ARGS__); \ +} while (0) + struct vmscsi_request { u16 length; u8 srb_status; @@ -1177,7 +1183,7 @@ static void storvsc_on_io_completion(struct storvsc_device *stor_device, int loglevel = (stor_pkt->vm_srb.cdb[0] == TEST_UNIT_READY) ? STORVSC_LOGGING_WARN : STORVSC_LOGGING_ERROR; - storvsc_log(device, loglevel, + storvsc_log_ratelimited(device, loglevel, "tag#%d cmd 0x%x status: scsi 0x%x srb 0x%x hv 0x%x\n", scsi_cmd_to_rq(request->cmd)->tag, stor_pkt->vm_srb.cdb[0], From 6377838560c03b36e1153a42ef727533def9b68f Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Mon, 13 Jan 2025 18:00:34 +0000 Subject: [PATCH 34/45] USB: serial: quatech2: fix null-ptr-deref in qt2_process_read_urb() commit 575a5adf48b06a2980c9eeffedf699ed5534fade upstream. This patch addresses a null-ptr-deref in qt2_process_read_urb() due to an incorrect bounds check in the following: if (newport > serial->num_ports) { dev_err(&port->dev, "%s - port change to invalid port: %i\n", __func__, newport); break; } The condition doesn't account for the valid range of the serial->port buffer, which is from 0 to serial->num_ports - 1. When newport is equal to serial->num_ports, the assignment of "port" in the following code is out-of-bounds and NULL: serial_priv->current_port = newport; port = serial->port[serial_priv->current_port]; The fix checks if newport is greater than or equal to serial->num_ports indicating it is out-of-bounds. Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=506479ebf12fe435d01a Fixes: f7a33e608d9a ("USB: serial: add quatech2 usb to serial driver") Cc: # 3.5 Signed-off-by: Qasim Ijaz Reviewed-by: Greg Kroah-Hartman Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/quatech2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 821f25e52ec24c..4cde2bd3885f13 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -503,7 +503,7 @@ static void qt2_process_read_urb(struct urb *urb) newport = *(ch + 3); - if (newport > serial->num_ports) { + if (newport >= serial->num_ports) { dev_err(&port->dev, "%s - port change to invalid port: %i\n", __func__, newport); From 3d8f4dc8c78ffd77a4106614977c1e51531690f7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 17 Jan 2025 09:17:12 +0100 Subject: [PATCH 35/45] Revert "usb: gadget: u_serial: Disable ep before setting port to null to fix the crash caused by port being null" commit 086fd062bc3883ae1ce4166cff5355db315ad879 upstream. This reverts commit 13014969cbf07f18d62ceea40bd8ca8ec9d36cec. It is reported to cause crashes on Tegra systems, so revert it for now. Link: https://lore.kernel.org/r/1037c1ad-9230-4181-b9c3-167dbaa47644@nvidia.com Reported-by: Jon Hunter Cc: stable Cc: Lianqin Hu Link: https://lore.kernel.org/r/2025011711-yippee-fever-a737@gregkh Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/u_serial.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index fe2737e55f8e89..729b0472bab098 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1398,10 +1398,6 @@ void gserial_disconnect(struct gserial *gser) /* REVISIT as above: how best to track this? */ port->port_line_coding = gser->port_line_coding; - /* disable endpoints, aborting down any active I/O */ - usb_ep_disable(gser->out); - usb_ep_disable(gser->in); - port->port_usb = NULL; gser->ioport = NULL; if (port->port.count > 0) { @@ -1413,6 +1409,10 @@ void gserial_disconnect(struct gserial *gser) spin_unlock(&port->port_lock); spin_unlock_irqrestore(&serial_port_lock, flags); + /* disable endpoints, aborting down any active I/O */ + usb_ep_disable(gser->out); + usb_ep_disable(gser->in); + /* finally, free any unused/unusable I/O buffers */ spin_lock_irqsave(&port->port_lock, flags); if (port->port.count == 0) From cca07b29f7af4f778ee3bc5cbd036bf97c820b4c Mon Sep 17 00:00:00 2001 From: Lianqin Hu Date: Wed, 15 Jan 2025 09:32:35 +0000 Subject: [PATCH 36/45] ALSA: usb-audio: Add delay quirk for USB Audio Device commit ad5b205f9e022b407d91f952faddd05718be2866 upstream. Audio control requests that sets sampling frequency sometimes fail on this card. Adding delay between control messages eliminates that problem. usb 1-1: New USB device found, idVendor=0d8c, idProduct=0014 usb 1-1: New USB device strings: Mfr=1, Product=2, SerialNumber=0 usb 1-1: Product: USB Audio Device usb 1-1: Manufacturer: C-Media Electronics Inc. Signed-off-by: Lianqin Hu Cc: Signed-off-by: Takashi Iwai Link: https://patch.msgid.link/TYUPR06MB6217E94D922B9BF422A73F32D2192@TYUPR06MB6217.apcprd06.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ec81b47c41c9ea..9cdd6cfd8219a7 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2139,6 +2139,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0c45, 0x6340, /* Sonix HD USB Camera */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x0d8c, 0x0014, /* USB Audio Device */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x0ecb, 0x205c, /* JBL Quantum610 Wireless */ QUIRK_FLAG_FIXED_RATE), DEVICE_FLG(0x0ecb, 0x2069, /* JBL Quantum810 Wireless */ From 80327feb234c336ea554166f0e02c9c3a9ee9eae Mon Sep 17 00:00:00 2001 From: Nicolas Nobelis Date: Sat, 16 Nov 2024 19:24:19 +0100 Subject: [PATCH 37/45] Input: xpad - add support for Nacon Pro Compact commit 1bba29603a2812e7b3dbb4ec1558ecb626ee933e upstream. Add Nacon Pro Compact to the list of supported devices. These are the ids of the "Colorlight" variant. The buttons, sticks and vibrations work. The decorative LEDs on the other hand do not (they stay turned off). Signed-off-by: Nicolas Nobelis Link: https://lore.kernel.org/r/20241116182419.33833-1-nicolas@nobelis.eu Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 0cfcad8348a6da..dca7e354dde273 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -382,6 +382,7 @@ static const struct xpad_device { { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, From dc8c9c171ef3285ac62589c6faef0bf4d5e6b29b Mon Sep 17 00:00:00 2001 From: Mark Pearson Date: Mon, 20 Jan 2025 20:24:08 -0800 Subject: [PATCH 38/45] Input: atkbd - map F23 key to support default copilot shortcut commit 907bc9268a5a9f823ffa751957a5c1dd59f83f42 upstream. Microsoft defined Meta+Shift+F23 as the Copilot shortcut instead of a dedicated keycode, and multiple vendors have their keyboards emit this sequence in response to users pressing a dedicated "Copilot" key. Unfortunately the default keymap table in atkbd does not map scancode 0x6e (F23) and so the key combination does not work even if userspace is ready to handle it. Because this behavior is common between multiple vendors and the scancode is currently unused map 0x6e to keycode 193 (KEY_F23) so that key sequence is generated properly. MS documentation for the scan code: https://learn.microsoft.com/en-us/windows/win32/inputdev/about-keyboard-input#scan-codes Confirmed on Lenovo, HP and Dell machines by Canonical. Tested on Lenovo T14s G6 AMD. Signed-off-by: Mark Pearson Link: https://lore.kernel.org/r/20250107034554.25843-1-mpearson-lenovo@squebb.ca Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/keyboard/atkbd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index c229bd6b3f7f2f..aad2d75c036781 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -89,7 +89,7 @@ static const unsigned short atkbd_set2_keycode[ATKBD_KEYMAP_SIZE] = { 0, 46, 45, 32, 18, 5, 4, 95, 0, 57, 47, 33, 20, 19, 6,183, 0, 49, 48, 35, 34, 21, 7,184, 0, 0, 50, 36, 22, 8, 9,185, 0, 51, 37, 23, 24, 11, 10, 0, 0, 52, 53, 38, 39, 25, 12, 0, - 0, 89, 40, 0, 26, 13, 0, 0, 58, 54, 28, 27, 0, 43, 0, 85, + 0, 89, 40, 0, 26, 13, 0,193, 58, 54, 28, 27, 0, 43, 0, 85, 0, 86, 91, 90, 92, 0, 14, 94, 0, 79,124, 75, 71,121, 0, 0, 82, 83, 80, 76, 77, 72, 1, 69, 87, 78, 81, 74, 55, 73, 70, 99, From 7ea7e327a19988d6dcf97b60492691456499825b Mon Sep 17 00:00:00 2001 From: Nilton Perim Neto Date: Fri, 17 Jan 2025 09:34:18 -0800 Subject: [PATCH 39/45] Input: xpad - add unofficial Xbox 360 wireless receiver clone commit e4940fe6322c851659c17852b671c6e7b1aa9f56 upstream. Although it mimics the Microsoft's VendorID, it is in fact a clone. Taking into account that the original Microsoft Receiver is not being manufactured anymore, this drive can solve dpad issues encontered by those who still use the original 360 Wireless controller but are using a receiver clone. Signed-off-by: Nilton Perim Neto Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250107192830.414709-12-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index dca7e354dde273..a0c6c71587afdc 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -150,6 +150,7 @@ static const struct xpad_device { { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 }, { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, + { 0x045e, 0x02a9, "Xbox 360 Wireless Receiver (Unofficial)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE }, { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE }, From 723aa536831c5169ca9edafd8921911bf7e1b316 Mon Sep 17 00:00:00 2001 From: "Pierre-Loup A. Griffais" Date: Thu, 16 Jan 2025 10:29:53 -0800 Subject: [PATCH 40/45] Input: xpad - add QH Electronics VID/PID commit 92600f3295ff571890c981d886c6544030cc05f3 upstream. Add support for QH Electronics Xbox 360-compatible controller Signed-off-by: Pierre-Loup A. Griffais Signed-off-by: Vicki Pfau Link: https://lore.kernel.org/r/20250116012518.3476735-1-vi@endrift.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index a0c6c71587afdc..f0c37d5a008628 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -306,6 +306,7 @@ static const struct xpad_device { { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, + { 0x1a86, 0xe310, "QH Electronics Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, @@ -515,6 +516,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ + XPAD_XBOX360_VENDOR(0x1a86), /* QH Electronics */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ From c009f186558274f81d74f76c9a66c4923f91bfbd Mon Sep 17 00:00:00 2001 From: Leonardo Brondani Schenkel Date: Fri, 17 Jan 2025 16:46:11 -0800 Subject: [PATCH 41/45] Input: xpad - improve name of 8BitDo controller 2dc8:3106 commit 66372fa9936088bf29c4f47907efeff03c51a2c8 upstream. 8BitDo Pro 2 Wired Controller shares the same USB identifier (2dc8:3106) as a different device, so amend name to reflect that and reduce confusion as the user might think the controller was misdetected. Because Pro 2 Wired will not work in XTYPE_XBOXONE mode (button presses won't register), tagging it as XTYPE_XBOX360 remains appropriate. Signed-off-by: Leonardo Brondani Schenkel Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250107192830.414709-2-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index f0c37d5a008628..138093516b55ef 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -374,7 +374,7 @@ static const struct xpad_device { { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, - { 0x2dc8, 0x3106, "8BitDo Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, + { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, From 7c477b26d39ed72ff3e996613d8191350f423f42 Mon Sep 17 00:00:00 2001 From: Matheos Mattsson Date: Fri, 17 Jan 2025 17:00:48 -0800 Subject: [PATCH 42/45] Input: xpad - add support for Nacon Evol-X Xbox One Controller commit 3a6e5ed2372bcb2a3c554fda32419efd91ff9b0c upstream. Add Nacon Evol-X Xbox One to the list of supported devices. Signed-off-by: Matheos Mattsson Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250107192830.414709-9-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 138093516b55ef..a2715c36ab0a88 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -385,6 +385,7 @@ static const struct xpad_device { { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, + { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, @@ -532,6 +533,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ + XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ { } From 35b144b393db1477fc2281e3082a3ae87d23f3ec Mon Sep 17 00:00:00 2001 From: Jack Greiner Date: Fri, 17 Jan 2025 16:51:58 -0800 Subject: [PATCH 43/45] Input: xpad - add support for wooting two he (arm) commit 222f3390c15c4452a9f7e26f5b7d9138e75d00d5 upstream. Add Wooting Two HE (ARM) to the list of supported devices. Signed-off-by: Jack Greiner Signed-off-by: Pavel Rojtberg Link: https://lore.kernel.org/r/20250107192830.414709-3-rojtberg@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov Signed-off-by: Greg Kroah-Hartman --- drivers/input/joystick/xpad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index a2715c36ab0a88..198a44c87e8411 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -381,6 +381,7 @@ static const struct xpad_device { { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 }, + { 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, From 431fb709db434565b5e7cee82a11bd681a794fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ADra=20Canal?= Date: Wed, 22 Jan 2025 22:24:03 -0300 Subject: [PATCH 44/45] drm/v3d: Assign job pointer to NULL before signaling the fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6e64d6b3a3c39655de56682ec83e894978d23412 upstream. In commit e4b5ccd392b9 ("drm/v3d: Ensure job pointer is set to NULL after job completion"), we introduced a change to assign the job pointer to NULL after completing a job, indicating job completion. However, this approach created a race condition between the DRM scheduler workqueue and the IRQ execution thread. As soon as the fence is signaled in the IRQ execution thread, a new job starts to be executed. This results in a race condition where the IRQ execution thread sets the job pointer to NULL simultaneously as the `run_job()` function assigns a new job to the pointer. This race condition can lead to a NULL pointer dereference if the IRQ execution thread sets the job pointer to NULL after `run_job()` assigns it to the new job. When the new job completes and the GPU emits an interrupt, `v3d_irq()` is triggered, potentially causing a crash. [ 466.310099] Unable to handle kernel NULL pointer dereference at virtual address 00000000000000c0 [ 466.318928] Mem abort info: [ 466.321723] ESR = 0x0000000096000005 [ 466.325479] EC = 0x25: DABT (current EL), IL = 32 bits [ 466.330807] SET = 0, FnV = 0 [ 466.333864] EA = 0, S1PTW = 0 [ 466.337010] FSC = 0x05: level 1 translation fault [ 466.341900] Data abort info: [ 466.344783] ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000 [ 466.350285] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 466.355350] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 466.360677] user pgtable: 4k pages, 39-bit VAs, pgdp=0000000089772000 [ 466.367140] [00000000000000c0] pgd=0000000000000000, p4d=0000000000000000, pud=0000000000000000 [ 466.375875] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP [ 466.382163] Modules linked in: rfcomm snd_seq_dummy snd_hrtimer snd_seq snd_seq_device algif_hash algif_skcipher af_alg bnep binfmt_misc vc4 snd_soc_hdmi_codec drm_display_helper cec brcmfmac_wcc spidev rpivid_hevc(C) drm_client_lib brcmfmac hci_uart drm_dma_helper pisp_be btbcm brcmutil snd_soc_core aes_ce_blk v4l2_mem2mem bluetooth aes_ce_cipher snd_compress videobuf2_dma_contig ghash_ce cfg80211 gf128mul snd_pcm_dmaengine videobuf2_memops ecdh_generic sha2_ce ecc videobuf2_v4l2 snd_pcm v3d sha256_arm64 rfkill videodev snd_timer sha1_ce libaes gpu_sched snd videobuf2_common sha1_generic drm_shmem_helper mc rp1_pio drm_kms_helper raspberrypi_hwmon spi_bcm2835 gpio_keys i2c_brcmstb rp1 raspberrypi_gpiomem rp1_mailbox rp1_adc nvmem_rmem uio_pdrv_genirq uio i2c_dev drm ledtrig_pattern drm_panel_orientation_quirks backlight fuse dm_mod ip_tables x_tables ipv6 [ 466.458429] CPU: 0 UID: 1000 PID: 2008 Comm: chromium Tainted: G C 6.13.0-v8+ #18 [ 466.467336] Tainted: [C]=CRAP [ 466.470306] Hardware name: Raspberry Pi 5 Model B Rev 1.0 (DT) [ 466.476157] pstate: 404000c9 (nZcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 466.483143] pc : v3d_irq+0x118/0x2e0 [v3d] [ 466.487258] lr : __handle_irq_event_percpu+0x60/0x228 [ 466.492327] sp : ffffffc080003ea0 [ 466.495646] x29: ffffffc080003ea0 x28: ffffff80c0c94200 x27: 0000000000000000 [ 466.502807] x26: ffffffd08dd81d7b x25: ffffff80c0c94200 x24: ffffff8003bdc200 [ 466.509969] x23: 0000000000000001 x22: 00000000000000a7 x21: 0000000000000000 [ 466.517130] x20: ffffff8041bb0000 x19: 0000000000000001 x18: 0000000000000000 [ 466.524291] x17: ffffffafadfb0000 x16: ffffffc080000000 x15: 0000000000000000 [ 466.531452] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 466.538613] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffffd08c527eb0 [ 466.545777] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 [ 466.552941] x5 : ffffffd08c4100d0 x4 : ffffffafadfb0000 x3 : ffffffc080003f70 [ 466.560102] x2 : ffffffc0829e8058 x1 : 0000000000000001 x0 : 0000000000000000 [ 466.567263] Call trace: [ 466.569711] v3d_irq+0x118/0x2e0 [v3d] (P) [ 466.573826] __handle_irq_event_percpu+0x60/0x228 [ 466.578546] handle_irq_event+0x54/0xb8 [ 466.582391] handle_fasteoi_irq+0xac/0x240 [ 466.586498] generic_handle_domain_irq+0x34/0x58 [ 466.591128] gic_handle_irq+0x48/0xd8 [ 466.594798] call_on_irq_stack+0x24/0x58 [ 466.598730] do_interrupt_handler+0x88/0x98 [ 466.602923] el0_interrupt+0x44/0xc0 [ 466.606508] __el0_irq_handler_common+0x18/0x28 [ 466.611050] el0t_64_irq_handler+0x10/0x20 [ 466.615156] el0t_64_irq+0x198/0x1a0 [ 466.618740] Code: 52800035 3607faf3 f9442e80 52800021 (f9406018) [ 466.624853] ---[ end trace 0000000000000000 ]--- [ 466.629483] Kernel panic - not syncing: Oops: Fatal exception in interrupt [ 466.636384] SMP: stopping secondary CPUs [ 466.640320] Kernel Offset: 0x100c400000 from 0xffffffc080000000 [ 466.646259] PHYS_OFFSET: 0x0 [ 466.649141] CPU features: 0x100,00000170,00901250,0200720b [ 466.654644] Memory Limit: none [ 466.657706] ---[ end Kernel panic - not syncing: Oops: Fatal exception in interrupt ]--- Fix the crash by assigning the job pointer to NULL before signaling the fence. This ensures that the job pointer is cleared before any new job starts execution, preventing the race condition and the NULL pointer dereference crash. Cc: stable@vger.kernel.org Fixes: e4b5ccd392b9 ("drm/v3d: Ensure job pointer is set to NULL after job completion") Signed-off-by: Maíra Canal Reviewed-by: Jose Maria Casanova Crespo Reviewed-by: Iago Toral Quiroga Tested-by: Phil Elwell Link: https://patchwork.freedesktop.org/patch/msgid/20250123012403.20447-1-mcanal@igalia.com Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/v3d/v3d_irq.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c index 76806039691a2c..b2d59a16869728 100644 --- a/drivers/gpu/drm/v3d/v3d_irq.c +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -102,8 +102,10 @@ v3d_irq(int irq, void *arg) to_v3d_fence(v3d->bin_job->base.irq_fence); trace_v3d_bcl_irq(&v3d->drm, fence->seqno); - dma_fence_signal(&fence->base); + v3d->bin_job = NULL; + dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -112,8 +114,10 @@ v3d_irq(int irq, void *arg) to_v3d_fence(v3d->render_job->base.irq_fence); trace_v3d_rcl_irq(&v3d->drm, fence->seqno); - dma_fence_signal(&fence->base); + v3d->render_job = NULL; + dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -122,8 +126,10 @@ v3d_irq(int irq, void *arg) to_v3d_fence(v3d->csd_job->base.irq_fence); trace_v3d_csd_irq(&v3d->drm, fence->seqno); - dma_fence_signal(&fence->base); + v3d->csd_job = NULL; + dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } @@ -159,8 +165,10 @@ v3d_hub_irq(int irq, void *arg) to_v3d_fence(v3d->tfu_job->base.irq_fence); trace_v3d_tfu_irq(&v3d->drm, fence->seqno); - dma_fence_signal(&fence->base); + v3d->tfu_job = NULL; + dma_fence_signal(&fence->base); + status = IRQ_HANDLED; } From d51b7d37f14e76db7a1a13046ed87198c0407fcb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 1 Feb 2025 18:37:57 +0100 Subject: [PATCH 45/45] Linux 6.6.75 Link: https://lore.kernel.org/r/20250130133458.903274626@linuxfoundation.org Tested-by: Mark Brown Tested-by: Florian Fainelli Tested-by: Jon Hunter Tested-by: Ron Economos Tested-by: Linux Kernel Functional Testing Tested-by: kernelci.org bot Tested-by: Hardik Garg Tested-by: Peter Schneider Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b8e5c65910862e..b8041104f248d3 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 74 +SUBLEVEL = 75 EXTRAVERSION = NAME = Pinguïn Aangedreven