From dde33a9d0b80aae0c69594d1f462515d7ff1cb3d Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Sep 2024 11:16:07 -0700 Subject: [PATCH 001/268] sch/netem: fix use after free in netem_dequeue commit 3b3a2a9c6349e25a025d2330f479bc33a6ccb54a upstream. If netem_dequeue() enqueues packet to inner qdisc and that qdisc returns __NET_XMIT_STOLEN. The packet is dropped but qdisc_tree_reduce_backlog() is not called to update the parent's q.qlen, leading to the similar use-after-free as Commit e04991a48dbaf382 ("netem: fix return value if duplicate enqueue fails") Commands to trigger KASAN UaF: ip link add type dummy ip link set lo up ip link set dummy0 up tc qdisc add dev lo parent root handle 1: drr tc filter add dev lo parent 1: basic classid 1:1 tc class add dev lo classid 1:1 drr tc qdisc add dev lo parent 1:1 handle 2: netem tc qdisc add dev lo parent 2: handle 3: drr tc filter add dev lo parent 3: basic classid 3:1 action mirred egress redirect dev dummy0 tc class add dev lo classid 3:1 drr ping -c1 -W0.01 localhost # Trigger bug tc class del dev lo classid 1:1 tc class add dev lo classid 1:1 drr ping -c1 -W0.01 localhost # UaF Fixes: 50612537e9ab ("netem: fix classful handling") Reported-by: Budimir Markovic Signed-off-by: Stephen Hemminger Link: https://patch.msgid.link/20240901182438.4992-1-stephen@networkplumber.org Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_netem.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 0224a92492453f..d36eeb7b050293 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -742,11 +742,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) err = qdisc_enqueue(skb, q->qdisc, &to_free); kfree_skb_list(to_free); - if (err != NET_XMIT_SUCCESS && - net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - pkt_len); + if (err != NET_XMIT_SUCCESS) { + if (net_xmit_drop_count(err)) + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } From b0804c286ccfcf5f5c004d5bf8a54c0508b5e86b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jens=20Emil=20Schulz=20=C3=98stergaard?= Date: Thu, 29 Aug 2024 11:52:54 +0200 Subject: [PATCH 002/268] net: microchip: vcap: Fix use-after-free error in kunit test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit a3c1e45156ad39f225cd7ddae0f81230a3b1e657 upstream. This is a clear use-after-free error. We remove it, and rely on checking the return code of vcap_del_rule. Reported-by: Dan Carpenter Closes: https://lore.kernel.org/kernel-janitors/7bffefc6-219a-4f71-baa0-ad4526e5c198@kili.mountain/ Fixes: c956b9b318d9 ("net: microchip: sparx5: Adding KUNIT tests of key/action values in VCAP API") Signed-off-by: Jens Emil Schulz Østergaard Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/microchip/vcap/vcap_api_kunit.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c index fe4e166de8a045..79276bc3d49519 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_kunit.c @@ -1442,18 +1442,8 @@ static void vcap_api_encode_rule_test(struct kunit *test) vcap_enable_lookups(&test_vctrl, &test_netdev, 0, 0, rule->cookie, false); - vcap_free_rule(rule); - - /* Check that the rule has been freed: tricky to access since this - * memory should not be accessible anymore - */ - KUNIT_EXPECT_PTR_NE(test, NULL, rule); - ret = list_empty(&rule->keyfields); - KUNIT_EXPECT_EQ(test, true, ret); - ret = list_empty(&rule->actionfields); - KUNIT_EXPECT_EQ(test, true, ret); - - vcap_del_rule(&test_vctrl, &test_netdev, id); + ret = vcap_del_rule(&test_vctrl, &test_netdev, id); + KUNIT_EXPECT_EQ(test, 0, ret); } static void vcap_api_set_rule_counter_test(struct kunit *test) From 5d13afd021eb43868fe03cef6da34ad08831ad6d Mon Sep 17 00:00:00 2001 From: robelin Date: Fri, 23 Aug 2024 14:43:41 +0000 Subject: [PATCH 003/268] ASoC: dapm: Fix UAF for snd_soc_pcm_runtime object commit b4a90b543d9f62d3ac34ec1ab97fc5334b048565 upstream. When using kernel with the following extra config, - CONFIG_KASAN=y - CONFIG_KASAN_GENERIC=y - CONFIG_KASAN_INLINE=y - CONFIG_KASAN_VMALLOC=y - CONFIG_FRAME_WARN=4096 kernel detects that snd_pcm_suspend_all() access a freed 'snd_soc_pcm_runtime' object when the system is suspended, which leads to a use-after-free bug: [ 52.047746] BUG: KASAN: use-after-free in snd_pcm_suspend_all+0x1a8/0x270 [ 52.047765] Read of size 1 at addr ffff0000b9434d50 by task systemd-sleep/2330 [ 52.047785] Call trace: [ 52.047787] dump_backtrace+0x0/0x3c0 [ 52.047794] show_stack+0x34/0x50 [ 52.047797] dump_stack_lvl+0x68/0x8c [ 52.047802] print_address_description.constprop.0+0x74/0x2c0 [ 52.047809] kasan_report+0x210/0x230 [ 52.047815] __asan_report_load1_noabort+0x3c/0x50 [ 52.047820] snd_pcm_suspend_all+0x1a8/0x270 [ 52.047824] snd_soc_suspend+0x19c/0x4e0 The snd_pcm_sync_stop() has a NULL check on 'substream->runtime' before making any access. So we need to always set 'substream->runtime' to NULL everytime we kfree() it. Fixes: a72706ed8208 ("ASoC: codec2codec: remove ephemeral variables") Signed-off-by: robelin Signed-off-by: Sameer Pujar Link: https://patch.msgid.link/20240823144342.4123814-2-spujar@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- sound/soc/soc-dapm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 85e3bbf7e5f0e1..7729f8f4d5e610 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4018,6 +4018,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, case SND_SOC_DAPM_POST_PMD: kfree(substream->runtime); + substream->runtime = NULL; break; default: From 939375737b5a0b1bf9b1e75129054e11bc9ca65e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 23 Jul 2024 16:20:55 -0700 Subject: [PATCH 004/268] KVM: x86: Acquire kvm->srcu when handling KVM_SET_VCPU_EVENTS commit 4bcdd831d9d01e0fb64faea50732b59b2ee88da1 upstream. Grab kvm->srcu when processing KVM_SET_VCPU_EVENTS, as KVM will forcibly leave nested VMX/SVM if SMM mode is being toggled, and leaving nested VMX reads guest memory. Note, kvm_vcpu_ioctl_x86_set_vcpu_events() can also be called from KVM_RUN via sync_regs(), which already holds SRCU. I.e. trying to precisely use kvm_vcpu_srcu_read_lock() around the problematic SMM code would cause problems. Acquiring SRCU isn't all that expensive, so for simplicity, grab it unconditionally for KVM_SET_VCPU_EVENTS. ============================= WARNING: suspicious RCU usage 6.10.0-rc7-332d2c1d713e-next-vm #552 Not tainted ----------------------------- include/linux/kvm_host.h:1027 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by repro/1071: #0: ffff88811e424430 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x7d/0x970 [kvm] stack backtrace: CPU: 15 PID: 1071 Comm: repro Not tainted 6.10.0-rc7-332d2c1d713e-next-vm #552 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 Call Trace: dump_stack_lvl+0x7f/0x90 lockdep_rcu_suspicious+0x13f/0x1a0 kvm_vcpu_gfn_to_memslot+0x168/0x190 [kvm] kvm_vcpu_read_guest+0x3e/0x90 [kvm] nested_vmx_load_msr+0x6b/0x1d0 [kvm_intel] load_vmcs12_host_state+0x432/0xb40 [kvm_intel] vmx_leave_nested+0x30/0x40 [kvm_intel] kvm_vcpu_ioctl_x86_set_vcpu_events+0x15d/0x2b0 [kvm] kvm_arch_vcpu_ioctl+0x1107/0x1750 [kvm] ? mark_held_locks+0x49/0x70 ? kvm_vcpu_ioctl+0x7d/0x970 [kvm] ? kvm_vcpu_ioctl+0x497/0x970 [kvm] kvm_vcpu_ioctl+0x497/0x970 [kvm] ? lock_acquire+0xba/0x2d0 ? find_held_lock+0x2b/0x80 ? do_user_addr_fault+0x40c/0x6f0 ? lock_release+0xb7/0x270 __x64_sys_ioctl+0x82/0xb0 do_syscall_64+0x6c/0x170 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7ff11eb1b539 Fixes: f7e570780efc ("KVM: x86: Forcibly leave nested virt when SMM state is toggled") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240723232055.3643811-1-seanjc@google.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c7e7ab1593d5b4..50cc822e129007 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5829,7 +5829,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events))) break; + kvm_vcpu_srcu_read_lock(vcpu); r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); + kvm_vcpu_srcu_read_unlock(vcpu); break; } case KVM_GET_DEBUGREGS: { From c98bb4f15e7f008ac88aa6850a53637abb26798a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Fri, 2 Aug 2024 18:16:08 +0300 Subject: [PATCH 005/268] KVM: SVM: fix emulation of msr reads/writes of MSR_FS_BASE and MSR_GS_BASE commit dad1613e0533b380318281c1519e1a3477c2d0d2 upstream. If these msrs are read by the emulator (e.g due to 'force emulation' prefix), SVM code currently fails to extract the corresponding segment bases, and return them to the emulator. Fix that. Cc: stable@vger.kernel.org Signed-off-by: Maxim Levitsky Link: https://lore.kernel.org/r/20240802151608.72896-3-mlevitsk@redhat.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e3c2acc1adc735..c19b9b96143d82 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2869,6 +2869,12 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CSTAR: msr_info->data = svm->vmcb01.ptr->save.cstar; break; + case MSR_GS_BASE: + msr_info->data = svm->vmcb01.ptr->save.gs.base; + break; + case MSR_FS_BASE: + msr_info->data = svm->vmcb01.ptr->save.fs.base; + break; case MSR_KERNEL_GS_BASE: msr_info->data = svm->vmcb01.ptr->save.kernel_gs_base; break; @@ -3090,6 +3096,12 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_CSTAR: svm->vmcb01.ptr->save.cstar = data; break; + case MSR_GS_BASE: + svm->vmcb01.ptr->save.gs.base = data; + break; + case MSR_FS_BASE: + svm->vmcb01.ptr->save.fs.base = data; + break; case MSR_KERNEL_GS_BASE: svm->vmcb01.ptr->save.kernel_gs_base = data; break; From 6c7c519c4dc05174c09eec7f92862dbd70c17341 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 8 Aug 2024 06:29:36 +0000 Subject: [PATCH 006/268] KVM: SVM: Don't advertise Bus Lock Detect to guest if SVM support is missing commit 54950bfe2b69cdc06ef753872b5225e54eb73506 upstream. If host supports Bus Lock Detect, KVM advertises it to guests even if SVM support is absent. Additionally, guest wouldn't be able to use it despite guest CPUID bit being set. Fix it by unconditionally clearing the feature bit in KVM cpu capability. Reported-by: Jim Mattson Closes: https://lore.kernel.org/r/CALMp9eRet6+v8Y1Q-i6mqPm4hUow_kJNhmVHfOV8tMfuSS=tVg@mail.gmail.com Fixes: 76ea438b4afc ("KVM: X86: Expose bus lock debug exception to guest") Cc: stable@vger.kernel.org Signed-off-by: Ravi Bangoria Reviewed-by: Jim Mattson Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/20240808062937.1149-4-ravi.bangoria@amd.com Signed-off-by: Sean Christopherson Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm/svm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c19b9b96143d82..413f1f2aadd1a3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -5178,6 +5178,9 @@ static __init void svm_set_cpu_caps(void) /* CPUID 0x8000001F (SME/SEV features) */ sev_set_cpu_caps(); + + /* Don't advertise Bus Lock Detect to guest if SVM support is absent */ + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT); } static __init int svm_hardware_setup(void) From 638e61b00208302b6f96871e5278d94c512a69cb Mon Sep 17 00:00:00 2001 From: Christoffer Sandberg Date: Tue, 27 Aug 2024 12:25:40 +0200 Subject: [PATCH 007/268] ALSA: hda/conexant: Add pincfg quirk to enable top speakers on Sirius devices commit 4178d78cd7a86510ba68d203f26fc01113c7f126 upstream. The Sirius notebooks have two sets of speakers 0x17 (sides) and 0x1d (top center). The side speakers are active by default but the top speakers aren't. This patch provides a pincfg quirk to activate the top speakers. Signed-off-by: Christoffer Sandberg Signed-off-by: Werner Sembach Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20240827102540.9480-1-wse@tuxedocomputers.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_conexant.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 8396d1d93668c3..63bd0e384bae2c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -311,6 +311,7 @@ enum { CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, CXT_PINCFG_SWS_JS201D, + CXT_PINCFG_TOP_SPEAKER, }; /* for hda_fixup_thinkpad_acpi() */ @@ -978,6 +979,13 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_PINS, .v.pins = cxt_pincfg_sws_js201d, }, + [CXT_PINCFG_TOP_SPEAKER] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1d, 0x82170111 }, + { } + }, + }, }; static const struct snd_pci_quirk cxt5045_fixups[] = { @@ -1074,6 +1082,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), SND_PCI_QUIRK(0x1c06, 0x2011, "Lemote A1004", CXT_PINCFG_LEMOTE_A1004), SND_PCI_QUIRK(0x1c06, 0x2012, "Lemote A1205", CXT_PINCFG_LEMOTE_A1205), + SND_PCI_QUIRK(0x2782, 0x12c3, "Sirius Gen1", CXT_PINCFG_TOP_SPEAKER), + SND_PCI_QUIRK(0x2782, 0x12c5, "Sirius Gen2", CXT_PINCFG_TOP_SPEAKER), {} }; @@ -1093,6 +1103,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, { .id = CXT_PINCFG_LENOVO_NOTEBOOK, .name = "lenovo-20149" }, { .id = CXT_PINCFG_SWS_JS201D, .name = "sws-js201d" }, + { .id = CXT_PINCFG_TOP_SPEAKER, .name = "sirius-top-speaker" }, {} }; From 421c2701a9fe5313091bd2ac25379c0cfcd74f61 Mon Sep 17 00:00:00 2001 From: Terry Cheong Date: Fri, 30 Aug 2024 04:11:53 +0800 Subject: [PATCH 008/268] ALSA: hda/realtek: add patch for internal mic in Lenovo V145 commit ef27e89e7f3015be2b3c124833fbd6d2e4686561 upstream. Lenovo V145 is having phase inverted dmic but simply applying inverted dmic fixups does not work. Chaining up verb fixes for ALC283 enables inverting dmic fixup to work properly. Signed-off-by: Terry Cheong Cc: Link: https://patch.msgid.link/20240830-lenovo-v145-fixes-v3-1-f7b7265068fa@chromium.org Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 5736516275a347..6e3c5da2455059 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7366,6 +7366,7 @@ enum { ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, + ALC236_FIXUP_LENOVO_INV_DMIC, ALC298_FIXUP_SAMSUNG_AMP, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, @@ -8922,6 +8923,12 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led_micmute_vref, }, + [ALC236_FIXUP_LENOVO_INV_DMIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc_fixup_inv_dmic, + .chained = true, + .chain_id = ALC283_FIXUP_INT_MIC, + }, [ALC298_FIXUP_SAMSUNG_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc298_fixup_samsung_amp, @@ -10298,6 +10305,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x38f9, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x38fa, "Thinkbook 16P Gen5", ALC287_FIXUP_CS35L41_I2C_2), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), + SND_PCI_QUIRK(0x17aa, 0x3913, "Lenovo 145", ALC236_FIXUP_LENOVO_INV_DMIC), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K), @@ -10546,6 +10554,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, + {.id = ALC236_FIXUP_LENOVO_INV_DMIC, .name = "alc236-fixup-lenovo-inv-mic"}, {} }; #define ALC225_STANDARD_PINS \ From c1f23443da3f347f1eca224762da043c920ae20c Mon Sep 17 00:00:00 2001 From: Maximilien Perreault Date: Tue, 3 Sep 2024 20:10:13 -0700 Subject: [PATCH 009/268] ALSA: hda/realtek: Support mute LED on HP Laptop 14-dq2xxx commit 47a9e8dbb8d4713a9aac7cc6ce3c82dcc94217d8 upstream. The mute LED on this HP laptop uses ALC236 and requires a quirk to function. This patch enables the existing quirk for the device. Signed-off-by: Maximilien Perreault Cc: Link: https://patch.msgid.link/20240904031013.21220-1-maximilienperreault@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6e3c5da2455059..6661fed2c2bbf1 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9873,6 +9873,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f6, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x87fd, "HP Laptop 14-dq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x87fe, "HP Laptop 15s-fq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2), SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), From d84ab6661e8d09092de9b034b016515ef9b66085 Mon Sep 17 00:00:00 2001 From: "Nysal Jan K.A." Date: Thu, 29 Aug 2024 07:58:27 +0530 Subject: [PATCH 010/268] powerpc/qspinlock: Fix deadlock in MCS queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 734ad0af3609464f8f93e00b6c0de1e112f44559 upstream. If an interrupt occurs in queued_spin_lock_slowpath() after we increment qnodesp->count and before node->lock is initialized, another CPU might see stale lock values in get_tail_qnode(). If the stale lock value happens to match the lock on that CPU, then we write to the "next" pointer of the wrong qnode. This causes a deadlock as the former CPU, once it becomes the head of the MCS queue, will spin indefinitely until it's "next" pointer is set by its successor in the queue. Running stress-ng on a 16 core (16EC/16VP) shared LPAR, results in occasional lockups similar to the following: $ stress-ng --all 128 --vm-bytes 80% --aggressive \ --maximize --oomable --verify --syslog \ --metrics --times --timeout 5m watchdog: CPU 15 Hard LOCKUP ...... NIP [c0000000000b78f4] queued_spin_lock_slowpath+0x1184/0x1490 LR [c000000001037c5c] _raw_spin_lock+0x6c/0x90 Call Trace: 0xc000002cfffa3bf0 (unreliable) _raw_spin_lock+0x6c/0x90 raw_spin_rq_lock_nested.part.135+0x4c/0xd0 sched_ttwu_pending+0x60/0x1f0 __flush_smp_call_function_queue+0x1dc/0x670 smp_ipi_demux_relaxed+0xa4/0x100 xive_muxed_ipi_action+0x20/0x40 __handle_irq_event_percpu+0x80/0x240 handle_irq_event_percpu+0x2c/0x80 handle_percpu_irq+0x84/0xd0 generic_handle_irq+0x54/0x80 __do_irq+0xac/0x210 __do_IRQ+0x74/0xd0 0x0 do_IRQ+0x8c/0x170 hardware_interrupt_common_virt+0x29c/0x2a0 --- interrupt: 500 at queued_spin_lock_slowpath+0x4b8/0x1490 ...... NIP [c0000000000b6c28] queued_spin_lock_slowpath+0x4b8/0x1490 LR [c000000001037c5c] _raw_spin_lock+0x6c/0x90 --- interrupt: 500 0xc0000029c1a41d00 (unreliable) _raw_spin_lock+0x6c/0x90 futex_wake+0x100/0x260 do_futex+0x21c/0x2a0 sys_futex+0x98/0x270 system_call_exception+0x14c/0x2f0 system_call_vectored_common+0x15c/0x2ec The following code flow illustrates how the deadlock occurs. For the sake of brevity, assume that both locks (A and B) are contended and we call the queued_spin_lock_slowpath() function. CPU0 CPU1 ---- ---- spin_lock_irqsave(A) | spin_unlock_irqrestore(A) | spin_lock(B) | | | ▼ | id = qnodesp->count++; | (Note that nodes[0].lock == A) | | | ▼ | Interrupt | (happens before "nodes[0].lock = B") | | | ▼ | spin_lock_irqsave(A) | | | ▼ | id = qnodesp->count++ | nodes[1].lock = A | | | ▼ | Tail of MCS queue | | spin_lock_irqsave(A) ▼ | Head of MCS queue ▼ | CPU0 is previous tail ▼ | Spin indefinitely ▼ (until "nodes[1].next != NULL") prev = get_tail_qnode(A, CPU0) | ▼ prev == &qnodes[CPU0].nodes[0] (as qnodes[CPU0].nodes[0].lock == A) | ▼ WRITE_ONCE(prev->next, node) | ▼ Spin indefinitely (until nodes[0].locked == 1) Thanks to Saket Kumar Bhaskar for help with recreating the issue Fixes: 84990b169557 ("powerpc/qspinlock: add mcs queueing for contended waiters") Cc: stable@vger.kernel.org # v6.2+ Reported-by: Geetika Moolchandani Reported-by: Vaishnavi Bhat Reported-by: Jijo Varghese Signed-off-by: Nysal Jan K.A. Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20240829022830.1164355-1-nysal@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/lib/qspinlock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c index 6dd2f46bd3ef64..8830267789c9c0 100644 --- a/arch/powerpc/lib/qspinlock.c +++ b/arch/powerpc/lib/qspinlock.c @@ -715,7 +715,15 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b } release: - qnodesp->count--; /* release the node */ + /* + * Clear the lock before releasing the node, as another CPU might see stale + * values if an interrupt occurs after we increment qnodesp->count + * but before node->lock is initialized. The barrier ensures that + * there are no further stores to the node after it has been released. + */ + node->lock = NULL; + barrier(); + qnodesp->count--; } void queued_spin_lock_slowpath(struct qspinlock *lock) From 5a72d1edb0843e4c927a4096f81e631031c25c28 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 3 Sep 2024 10:53:24 -0300 Subject: [PATCH 011/268] smb: client: fix double put of @cfile in smb2_set_path_size() commit f9c169b51b6ce20394594ef674d6b10efba31220 upstream. If smb2_compound_op() is called with a valid @cfile and returned -EINVAL, we need to call cifs_get_writable_path() before retrying it as the reference of @cfile was already dropped by previous call. This fixes the following KASAN splat when running fstests generic/013 against Windows Server 2022: CIFS: Attempting to mount //w22-fs0/scratch run fstests generic/013 at 2024-09-02 19:48:59 ================================================================== BUG: KASAN: slab-use-after-free in detach_if_pending+0xab/0x200 Write of size 8 at addr ffff88811f1a3730 by task kworker/3:2/176 CPU: 3 UID: 0 PID: 176 Comm: kworker/3:2 Not tainted 6.11.0-rc6 #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 Workqueue: cifsoplockd cifs_oplock_break [cifs] Call Trace: dump_stack_lvl+0x5d/0x80 ? detach_if_pending+0xab/0x200 print_report+0x156/0x4d9 ? detach_if_pending+0xab/0x200 ? __virt_addr_valid+0x145/0x300 ? __phys_addr+0x46/0x90 ? detach_if_pending+0xab/0x200 kasan_report+0xda/0x110 ? detach_if_pending+0xab/0x200 detach_if_pending+0xab/0x200 timer_delete+0x96/0xe0 ? __pfx_timer_delete+0x10/0x10 ? rcu_is_watching+0x20/0x50 try_to_grab_pending+0x46/0x3b0 __cancel_work+0x89/0x1b0 ? __pfx___cancel_work+0x10/0x10 ? kasan_save_track+0x14/0x30 cifs_close_deferred_file+0x110/0x2c0 [cifs] ? __pfx_cifs_close_deferred_file+0x10/0x10 [cifs] ? __pfx_down_read+0x10/0x10 cifs_oplock_break+0x4c1/0xa50 [cifs] ? __pfx_cifs_oplock_break+0x10/0x10 [cifs] ? lock_is_held_type+0x85/0xf0 ? mark_held_locks+0x1a/0x90 process_one_work+0x4c6/0x9f0 ? find_held_lock+0x8a/0xa0 ? __pfx_process_one_work+0x10/0x10 ? lock_acquired+0x220/0x550 ? __list_add_valid_or_report+0x37/0x100 worker_thread+0x2e4/0x570 ? __kthread_parkme+0xd1/0xf0 ? __pfx_worker_thread+0x10/0x10 kthread+0x17f/0x1c0 ? kthread+0xda/0x1c0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Allocated by task 1118: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 __kasan_kmalloc+0xaa/0xb0 cifs_new_fileinfo+0xc8/0x9d0 [cifs] cifs_atomic_open+0x467/0x770 [cifs] lookup_open.isra.0+0x665/0x8b0 path_openat+0x4c3/0x1380 do_filp_open+0x167/0x270 do_sys_openat2+0x129/0x160 __x64_sys_creat+0xad/0xe0 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Freed by task 83: kasan_save_stack+0x30/0x50 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x70 poison_slab_object+0xe9/0x160 __kasan_slab_free+0x32/0x50 kfree+0xf2/0x300 process_one_work+0x4c6/0x9f0 worker_thread+0x2e4/0x570 kthread+0x17f/0x1c0 ret_from_fork+0x31/0x60 ret_from_fork_asm+0x1a/0x30 Last potentially related work creation: kasan_save_stack+0x30/0x50 __kasan_record_aux_stack+0xad/0xc0 insert_work+0x29/0xe0 __queue_work+0x5ea/0x760 queue_work_on+0x6d/0x90 _cifsFileInfo_put+0x3f6/0x770 [cifs] smb2_compound_op+0x911/0x3940 [cifs] smb2_set_path_size+0x228/0x270 [cifs] cifs_set_file_size+0x197/0x460 [cifs] cifs_setattr+0xd9c/0x14b0 [cifs] notify_change+0x4e3/0x740 do_truncate+0xfa/0x180 vfs_truncate+0x195/0x200 __x64_sys_truncate+0x109/0x150 do_syscall_64+0xbb/0x1d0 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 71f15c90e785 ("smb: client: retry compound request without reusing lease") Cc: stable@vger.kernel.org Signed-off-by: Paulo Alcantara (Red Hat) Cc: David Howells Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/client/smb2inode.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 15cbfec4c28c78..fc82d5ebf923cf 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1149,6 +1149,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon, cfile, NULL, NULL, dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); + cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, &(int){SMB2_OP_SET_EOF}, 1, From 41bc256da7e47b679df87c7fc7a5b393052b9cce Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Tue, 27 Aug 2024 21:44:41 +0900 Subject: [PATCH 012/268] ksmbd: unset the binding mark of a reused connection commit 78c5a6f1f630172b19af4912e755e1da93ef0ab5 upstream. Steve French reported null pointer dereference error from sha256 lib. cifs.ko can send session setup requests on reused connection. If reused connection is used for binding session, conn->binding can still remain true and generate_preauth_hash() will not set sess->Preauth_HashValue and it will be NULL. It is used as a material to create an encryption key in ksmbd_gen_smb311_encryptionkey. ->Preauth_HashValue cause null pointer dereference error from crypto_shash_update(). BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 8 PID: 429254 Comm: kworker/8:39 Hardware name: LENOVO 20MAS08500/20MAS08500, BIOS N2CET69W (1.52 ) Workqueue: ksmbd-io handle_ksmbd_work [ksmbd] RIP: 0010:lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] ? show_regs+0x6d/0x80 ? __die+0x24/0x80 ? page_fault_oops+0x99/0x1b0 ? do_user_addr_fault+0x2ee/0x6b0 ? exc_page_fault+0x83/0x1b0 ? asm_exc_page_fault+0x27/0x30 ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] ? lib_sha256_base_do_update.isra.0+0x11e/0x1d0 [sha256_ssse3] ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] ? __pfx_sha256_transform_rorx+0x10/0x10 [sha256_ssse3] _sha256_update+0x77/0xa0 [sha256_ssse3] sha256_avx2_update+0x15/0x30 [sha256_ssse3] crypto_shash_update+0x1e/0x40 hmac_update+0x12/0x20 crypto_shash_update+0x1e/0x40 generate_key+0x234/0x380 [ksmbd] generate_smb3encryptionkey+0x40/0x1c0 [ksmbd] ksmbd_gen_smb311_encryptionkey+0x72/0xa0 [ksmbd] ntlm_authenticate.isra.0+0x423/0x5d0 [ksmbd] smb2_sess_setup+0x952/0xaa0 [ksmbd] __process_request+0xa3/0x1d0 [ksmbd] __handle_ksmbd_work+0x1c4/0x2f0 [ksmbd] handle_ksmbd_work+0x2d/0xa0 [ksmbd] process_one_work+0x16c/0x350 worker_thread+0x306/0x440 ? __pfx_worker_thread+0x10/0x10 kthread+0xef/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x44/0x70 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1b/0x30 Fixes: f5a544e3bab7 ("ksmbd: add support for SMB3 multichannel") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/smb2pdu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index da57adc2bd6898..c55924f5164e20 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -1687,6 +1687,8 @@ int smb2_sess_setup(struct ksmbd_work *work) rc = ksmbd_session_register(conn, sess); if (rc) goto out_err; + + conn->binding = false; } else if (conn->dialect >= SMB30_PROT_ID && (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL) && req->Flags & SMB2_SESSION_REQ_FLAG_BINDING) { @@ -1765,6 +1767,8 @@ int smb2_sess_setup(struct ksmbd_work *work) sess = NULL; goto out_err; } + + conn->binding = false; } work->sess = sess; From f75881f54c05305cb04e4db3739b59b721d68876 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2024 22:22:35 +0300 Subject: [PATCH 013/268] ksmbd: Unlock on in ksmbd_tcp_set_interfaces() commit 844436e045ac2ab7895d8b281cb784a24de1d14d upstream. Unlock before returning an error code if this allocation fails. Fixes: 0626e6641f6b ("cifsd: add server handler for central processing and tranport layers") Cc: stable@vger.kernel.org # v5.15+ Signed-off-by: Dan Carpenter Acked-by: Namjae Jeon Signed-off-by: Steve French Signed-off-by: Greg Kroah-Hartman --- fs/smb/server/transport_tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 6633fa78e9b96b..2ce7f75059cb35 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -624,8 +624,10 @@ int ksmbd_tcp_set_interfaces(char *ifc_list, int ifc_list_sz) for_each_netdev(&init_net, netdev) { if (netif_is_bridge_port(netdev)) continue; - if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) + if (!alloc_iface(kstrdup(netdev->name, GFP_KERNEL))) { + rtnl_unlock(); return -ENOMEM; + } } rtnl_unlock(); bind_additional_ifaces = 1; From c0fbc9593b18b9d7ad0ad500f2ff55bbbf8ab2a2 Mon Sep 17 00:00:00 2001 From: Zheng Qixing Date: Thu, 22 Aug 2024 11:30:50 +0800 Subject: [PATCH 014/268] ata: libata: Fix memory leak for error path in ata_host_alloc() commit 284b75a3d83c7631586d98f6dede1d90f128f0db upstream. In ata_host_alloc(), if devres_alloc() fails to allocate the device host resource data pointer, the already allocated ata_host structure is not freed before returning from the function. This results in a potential memory leak. Call kfree(host) before jumping to the error handling path to ensure that the ata_host structure is properly freed if devres_alloc() fails. Fixes: 2623c7a5f279 ("libata: add refcounting to ata_host") Cc: stable@vger.kernel.org Signed-off-by: Zheng Qixing Reviewed-by: Yu Kuai Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 373d23af1d9acf..4ed90d46a017a8 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5593,8 +5593,10 @@ struct ata_host *ata_host_alloc(struct device *dev, int max_ports) } dr = devres_alloc(ata_devres_release, 0, GFP_KERNEL); - if (!dr) + if (!dr) { + kfree(host); goto err_out; + } devres_add(dev, dr); dev_set_drvdata(dev, host); From ef00818c50cf55a3a56bd9a9fae867c92dfb84e7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 26 Aug 2024 15:53:04 +0300 Subject: [PATCH 015/268] x86/tdx: Fix data leak in mmio_read() commit b6fb565a2d15277896583d471b21bc14a0c99661 upstream. The mmio_read() function makes a TDVMCALL to retrieve MMIO data for an address from the VMM. Sean noticed that mmio_read() unintentionally exposes the value of an initialized variable (val) on the stack to the VMM. This variable is only needed as an output value. It did not need to be passed to the VMM in the first place. Do not send the original value of *val to the VMM. [ dhansen: clarify what 'val' is used for. ] Fixes: 31d58c4e557d ("x86/tdx: Handle in-kernel MMIO") Reported-by: Sean Christopherson Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20240826125304.1566719-1-kirill.shutemov%40linux.intel.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/coco/tdx/tdx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index f3c75809fed26d..006041fbb65f83 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -362,7 +362,6 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val) .r12 = size, .r13 = EPT_READ, .r14 = addr, - .r15 = *val, }; if (__tdx_hypercall_ret(&args)) From 0eaf812aa1506704f3b78be87036860e5d0fe81d Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 19 Aug 2024 11:30:04 -0700 Subject: [PATCH 016/268] perf/x86/intel: Limit the period on Haswell commit 25dfc9e357af8aed1ca79b318a73f2c59c1f0b2b upstream. Running the ltp test cve-2015-3290 concurrently reports the following warnings. perfevents: irq loop stuck! WARNING: CPU: 31 PID: 32438 at arch/x86/events/intel/core.c:3174 intel_pmu_handle_irq+0x285/0x370 Call Trace: ? __warn+0xa4/0x220 ? intel_pmu_handle_irq+0x285/0x370 ? __report_bug+0x123/0x130 ? intel_pmu_handle_irq+0x285/0x370 ? __report_bug+0x123/0x130 ? intel_pmu_handle_irq+0x285/0x370 ? report_bug+0x3e/0xa0 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x50 ? asm_exc_invalid_op+0x1a/0x20 ? irq_work_claim+0x1e/0x40 ? intel_pmu_handle_irq+0x285/0x370 perf_event_nmi_handler+0x3d/0x60 nmi_handle+0x104/0x330 Thanks to Thomas Gleixner's analysis, the issue is caused by the low initial period (1) of the frequency estimation algorithm, which triggers the defects of the HW, specifically erratum HSW11 and HSW143. (For the details, please refer https://lore.kernel.org/lkml/87plq9l5d2.ffs@tglx/) The HSW11 requires a period larger than 100 for the INST_RETIRED.ALL event, but the initial period in the freq mode is 1. The erratum is the same as the BDM11, which has been supported in the kernel. A minimum period of 128 is enforced as well on HSW. HSW143 is regarding that the fixed counter 1 may overcount 32 with the Hyper-Threading is enabled. However, based on the test, the hardware has more issues than it tells. Besides the fixed counter 1, the message 'interrupt took too long' can be observed on any counter which was armed with a period < 32 and two events expired in the same NMI. A minimum period of 32 is enforced for the rest of the events. The recommended workaround code of the HSW143 is not implemented. Because it only addresses the issue for the fixed counter. It brings extra overhead through extra MSR writing. No related overcounting issue has been reported so far. Fixes: 3a632cb229bf ("perf/x86/intel: Add simple Haswell PMU support") Reported-by: Li Huafei Suggested-by: Thomas Gleixner Signed-off-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240819183004.3132920-1-kan.liang@linux.intel.com Closes: https://lore.kernel.org/lkml/20240729223328.327835-1-lihuafei1@huawei.com/ Signed-off-by: Greg Kroah-Hartman --- arch/x86/events/intel/core.c | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index bc4fcf0d94056c..688550e336ce17 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4465,6 +4465,25 @@ static u8 adl_get_hybrid_cpu_type(void) return hybrid_big; } +static inline bool erratum_hsw11(struct perf_event *event) +{ + return (event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01); +} + +/* + * The HSW11 requires a period larger than 100 which is the same as the BDM11. + * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. + * + * The message 'interrupt took too long' can be observed on any counter which + * was armed with a period < 32 and two events expired in the same NMI. + * A minimum period of 32 is enforced for the rest of the events. + */ +static void hsw_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, erratum_hsw11(event) ? 128 : 32); +} + /* * Broadwell: * @@ -4482,8 +4501,7 @@ static u8 adl_get_hybrid_cpu_type(void) */ static void bdw_limit_period(struct perf_event *event, s64 *left) { - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (erratum_hsw11(event)) { if (*left < 128) *left = 128; *left &= ~0x3fULL; @@ -6392,6 +6410,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = hsw_limit_period; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; From 2f4d7b702668cb5b53125d7689d1b77368497952 Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 20 Aug 2024 17:28:43 +0800 Subject: [PATCH 017/268] irqchip/gic-v2m: Fix refcount leak in gicv2m_of_init() commit c5af2c90ba5629f0424a8d315f75fb8d91713c3c upstream. gicv2m_of_init() fails to perform an of_node_put() when of_address_to_resource() fails, leading to a refcount leak. Address this by moving the error handling path outside of the loop and making it common to all failure modes. Fixes: 4266ab1a8ff5 ("irqchip/gic-v2m: Refactor to prepare for ACPI support") Signed-off-by: Ma Ke Signed-off-by: Thomas Gleixner Reviewed-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240820092843.1219933-1-make24@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman --- drivers/irqchip/irq-gic-v2m.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index f2ff4387870d69..d83c2c85962c37 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -438,12 +438,12 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, ret = gicv2m_init_one(&child->fwnode, spi_start, nr_spis, &res, 0); - if (ret) { - of_node_put(child); + if (ret) break; - } } + if (ret && child) + of_node_put(child); if (!ret) ret = gicv2m_allocate_domains(parent); if (ret) From 0b46b4ac929f28ec59de2bea60759d096802bd2a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 14 Aug 2024 00:29:36 +0200 Subject: [PATCH 018/268] x86/kaslr: Expose and use the end of the physical memory address space commit ea72ce5da22806d5713f3ffb39a6d5ae73841f93 upstream. iounmap() on x86 occasionally fails to unmap because the provided valid ioremap address is not below high_memory. It turned out that this happens due to KASLR. KASLR uses the full address space between PAGE_OFFSET and vaddr_end to randomize the starting points of the direct map, vmalloc and vmemmap regions. It thereby limits the size of the direct map by using the installed memory size plus an extra configurable margin for hot-plug memory. This limitation is done to gain more randomization space because otherwise only the holes between the direct map, vmalloc, vmemmap and vaddr_end would be usable for randomizing. The limited direct map size is not exposed to the rest of the kernel, so the memory hot-plug and resource management related code paths still operate under the assumption that the available address space can be determined with MAX_PHYSMEM_BITS. request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1 downwards. That means the first allocation happens past the end of the direct map and if unlucky this address is in the vmalloc space, which causes high_memory to become greater than VMALLOC_START and consequently causes iounmap() to fail for valid ioremap addresses. MAX_PHYSMEM_BITS cannot be changed for that because the randomization does not align with address bit boundaries and there are other places which actually require to know the maximum number of address bits. All remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found to be correct. Cure this by exposing the end of the direct map via PHYSMEM_END and use that for the memory hot-plug and resource management related places instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END maps to a variable which is initialized by the KASLR initialization and otherwise it is based on MAX_PHYSMEM_BITS as before. To prevent future hickups add a check into add_pages() to catch callers trying to add memory above PHYSMEM_END. Fixes: 0483e1fa6e09 ("x86/mm: Implement ASLR for kernel memory regions") Reported-by: Max Ramanouski Reported-by: Alistair Popple Signed-off-by: Thomas Gleixner Tested-By: Max Ramanouski Tested-by: Alistair Popple Reviewed-by: Dan Williams Reviewed-by: Alistair Popple Reviewed-by: Kees Cook Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/page_64.h | 1 + arch/x86/include/asm/pgtable_64_types.h | 4 ++++ arch/x86/mm/init_64.c | 4 ++++ arch/x86/mm/kaslr.c | 32 ++++++++++++++++++++----- include/linux/mm.h | 4 ++++ kernel/resource.c | 6 ++--- mm/memory_hotplug.c | 2 +- mm/sparse.c | 2 +- 8 files changed, 43 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index cc6b8e087192e4..9dab85aba7afd9 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -17,6 +17,7 @@ extern unsigned long phys_base; extern unsigned long page_offset_base; extern unsigned long vmalloc_base; extern unsigned long vmemmap_base; +extern unsigned long physmem_end; static __always_inline unsigned long __phys_addr_nodebug(unsigned long x) { diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 38b54b992f32e3..35c416f061552b 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d; # define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ +#ifdef CONFIG_RANDOMIZE_MEMORY +# define PHYSMEM_END physmem_end +#endif + /* * End of the region for which vmalloc page tables are pre-allocated. * For non-KMSAN builds, this is the same as VMALLOC_END. diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 19d209b412d7ac..aa69353da49f24 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -950,8 +950,12 @@ static void update_end_of_memory_vars(u64 start, u64 size) int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params) { + unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1; int ret; + if (WARN_ON_ONCE(end > PHYSMEM_END)) + return -ERANGE; + ret = __add_pages(nid, start_pfn, nr_pages, params); WARN_ON_ONCE(ret); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 37db264866b648..230f1dee4f0954 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -47,13 +47,24 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE; */ static __initdata struct kaslr_memory_region { unsigned long *base; + unsigned long *end; unsigned long size_tb; } kaslr_regions[] = { - { &page_offset_base, 0 }, - { &vmalloc_base, 0 }, - { &vmemmap_base, 0 }, + { + .base = &page_offset_base, + .end = &physmem_end, + }, + { + .base = &vmalloc_base, + }, + { + .base = &vmemmap_base, + }, }; +/* The end of the possible address space for physical memory */ +unsigned long physmem_end __ro_after_init; + /* Get size in bytes used by the memory region */ static inline unsigned long get_padding(struct kaslr_memory_region *region) { @@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void) BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + /* Preset the end of the possible address space for physical memory */ + physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1); if (!kaslr_memory_enabled()) return; @@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void) vaddr += entropy; *kaslr_regions[i].base = vaddr; + /* Calculate the end of the region */ + vaddr += get_padding(&kaslr_regions[i]); /* - * Jump the region and add a minimum padding based on - * randomization alignment. + * KASLR trims the maximum possible size of the + * direct-map. Update the physmem_end boundary. + * No rounding required as the region starts + * PUD aligned and size is in units of TB. */ - vaddr += get_padding(&kaslr_regions[i]); + if (kaslr_regions[i].end) + *kaslr_regions[i].end = __pa_nodebug(vaddr - 1); + + /* Add a minimum padding based on randomization alignment. */ vaddr = round_up(vaddr + 1, PUD_SIZE); remain_entropy -= entropy; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d617d0d696751..830b925c2d0054 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -95,6 +95,10 @@ extern const int mmap_rnd_compat_bits_max; extern int mmap_rnd_compat_bits __read_mostly; #endif +#ifndef PHYSMEM_END +# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1) +#endif + #include #include diff --git a/kernel/resource.c b/kernel/resource.c index e3f5680a564cf6..ce127a829ead70 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1778,8 +1778,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size, if (flags & GFR_DESCENDING) { resource_size_t end; - end = min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + end = min_t(resource_size_t, base->end, PHYSMEM_END); return end - size + 1; } @@ -1796,8 +1795,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr, * @size did not wrap 0. */ return addr > addr - size && - addr <= min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + addr <= min_t(resource_size_t, base->end, PHYSMEM_END); } static resource_size_t gfr_next(resource_size_t addr, resource_size_t size, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f36525a595a93c..9beed7c71a8e91 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1610,7 +1610,7 @@ struct range __weak arch_get_mappable_range(void) struct range mhp_get_pluggable_range(bool need_mapping) { - const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1; + const u64 max_phys = PHYSMEM_END; struct range mhp_range; if (need_mapping) { diff --git a/mm/sparse.c b/mm/sparse.c index 338cf946dee8de..0706113c4c8433 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section) static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn, unsigned long *end_pfn) { - unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT); + unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT; /* * Sanity checks - do not allow an architecture to pass From 85f03ca98e07cd0786738b56ae73740bce0ac27f Mon Sep 17 00:00:00 2001 From: Roland Xu Date: Thu, 15 Aug 2024 10:58:13 +0800 Subject: [PATCH 019/268] rtmutex: Drop rt_mutex::wait_lock before scheduling commit d33d26036a0274b472299d7dcdaa5fb34329f91b upstream. rt_mutex_handle_deadlock() is called with rt_mutex::wait_lock held. In the good case it returns with the lock held and in the deadlock case it emits a warning and goes into an endless scheduling loop with the lock held, which triggers the 'scheduling in atomic' warning. Unlock rt_mutex::wait_lock in the dead lock case before issuing the warning and dropping into the schedule for ever loop. [ tglx: Moved unlock before the WARN(), removed the pointless comment, massaged changelog, added Fixes tag ] Fixes: 3d5c9340d194 ("rtmutex: Handle deadlock detection smarter") Signed-off-by: Roland Xu Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/ME0P300MB063599BEF0743B8FA339C2CECC802@ME0P300MB0635.AUSP300.PROD.OUTLOOK.COM Signed-off-by: Greg Kroah-Hartman --- kernel/locking/rtmutex.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 21db0df0eb0007..bf3a28ee7d8f47 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1624,6 +1624,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, } static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + struct rt_mutex_base *lock, struct rt_mutex_waiter *w) { /* @@ -1636,10 +1637,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, if (build_ww_mutex() && w->ww_ctx) return; - /* - * Yell loudly and stop the task right here. - */ + raw_spin_unlock_irq(&lock->wait_lock); + WARN(1, "rtmutex deadlock detected\n"); + while (1) { set_current_state(TASK_INTERRUPTIBLE); schedule(); @@ -1693,7 +1694,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, } else { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); } /* From 77ee2eaee4d9329915dbd421ed1640d17ed500ce Mon Sep 17 00:00:00 2001 From: Georg Gottleuber Date: Tue, 27 Aug 2024 12:41:33 +0200 Subject: [PATCH 020/268] nvme-pci: Add sleep quirk for Samsung 990 Evo commit 61aa894e7a2fda4ee026523b01d07e83ce2abb72 upstream. On some TUXEDO platforms, a Samsung 990 Evo NVMe leads to a high power consumption in s2idle sleep (2-3 watts). This patch applies 'Force No Simple Suspend' quirk to achieve a sleep with a lower power consumption, typically around 0.5 watts. Signed-off-by: Georg Gottleuber Signed-off-by: Werner Sembach Cc: Signed-off-by: Keith Busch Signed-off-by: Greg Kroah-Hartman --- drivers/nvme/host/pci.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2c3f55877a1134..89e0798af780d0 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2929,6 +2929,17 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) dmi_match(DMI_BOARD_NAME, "NS5x_7xPU") || dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1")) return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; + } else if (pdev->vendor == 0x144d && pdev->device == 0xa80d) { + /* + * Exclude Samsung 990 Evo from NVME_QUIRK_SIMPLE_SUSPEND + * because of high power consumption (> 2 Watt) in s2idle + * sleep. Only some boards with Intel CPU are affected. + */ + if (dmi_match(DMI_BOARD_NAME, "GMxPXxx") || + dmi_match(DMI_BOARD_NAME, "PH4PG31") || + dmi_match(DMI_BOARD_NAME, "PH4PRX1_PH6PRX1") || + dmi_match(DMI_BOARD_NAME, "PH6PG01_PH6PG71")) + return NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND; } /* From d6344cc86f314ef9ab2001e89d2602359ffe1b70 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Mon, 1 Apr 2024 14:45:36 -0700 Subject: [PATCH 021/268] rust: types: Make Opaque::get const commit be2ca1e03965ffb214b6cbda0ffd84daeeb5f214 upstream. To support a potential usage: static foo: Opaque = ..; // Or defined in an extern block. ... fn bar() { let ptr = foo.get(); } `Opaque::get` need to be `const`, otherwise compiler will complain because calls on statics are limited to const functions. Also `Opaque::get` should be naturally `const` since it's a composition of two `const` functions: `UnsafeCell::get` and `ptr::cast`. Signed-off-by: Boqun Feng Reviewed-by: Alice Ryhl Reviewed-by: Wedson Almeida Filho Reviewed-by: Benno Lossin Link: https://lore.kernel.org/r/20240401214543.1242286-1-boqun.feng@gmail.com Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/kernel/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index fdb778e65d79d3..e23e7827d756d7 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -248,7 +248,7 @@ impl Opaque { } /// Returns a raw pointer to the opaque data. - pub fn get(&self) -> *mut T { + pub const fn get(&self) -> *mut T { UnsafeCell::get(&self.value).cast::() } From f9275893b07f23bb2c490a7a725709b56382e155 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 28 Aug 2024 11:01:29 -0700 Subject: [PATCH 022/268] rust: macros: provide correct provenance when constructing THIS_MODULE commit a5a3c952e82c1ada12bf8c55b73af26f1a454bd2 upstream. Currently while defining `THIS_MODULE` symbol in `module!()`, the pointer used to construct `ThisModule` is derived from an immutable reference of `__this_module`, which means the pointer doesn't have the provenance for writing, and that means any write to that pointer is UB regardless of data races or not. However, the usage of `THIS_MODULE` includes passing this pointer to functions that may write to it (probably in unsafe code), and this will create soundness issues. One way to fix this is using `addr_of_mut!()` but that requires the unstable feature "const_mut_refs". So instead of `addr_of_mut()!`, an extern static `Opaque` is used here: since `Opaque` is transparent to `T`, an extern static `Opaque` will just wrap the C symbol (defined in a C compile unit) in an `Opaque`, which provides a pointer with writable provenance via `Opaque::get()`. This fix the potential UBs because of pointer provenance unmatched. Reported-by: Alice Ryhl Signed-off-by: Boqun Feng Reviewed-by: Alice Ryhl Reviewed-by: Trevor Gross Reviewed-by: Benno Lossin Reviewed-by: Gary Guo Closes: https://rust-for-linux.zulipchat.com/#narrow/stream/x/topic/x/near/465412664 Fixes: 1fbde52bde73 ("rust: add `macros` crate") Cc: stable@vger.kernel.org # 6.6.x: be2ca1e03965: ("rust: types: Make Opaque::get const") Link: https://lore.kernel.org/r/20240828180129.4046355-1-boqun.feng@gmail.com [ Fixed two typos, reworded title. - Miguel ] Signed-off-by: Miguel Ojeda Signed-off-by: Greg Kroah-Hartman --- rust/macros/module.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rust/macros/module.rs b/rust/macros/module.rs index acd0393b509574..7dee348ef0cc82 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -203,7 +203,11 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { // freed until the module is unloaded. #[cfg(MODULE)] static THIS_MODULE: kernel::ThisModule = unsafe {{ - kernel::ThisModule::from_ptr(&kernel::bindings::__this_module as *const _ as *mut _) + extern \"C\" {{ + static __this_module: kernel::types::Opaque; + }} + + kernel::ThisModule::from_ptr(__this_module.get()) }}; #[cfg(not(MODULE))] static THIS_MODULE: kernel::ThisModule = unsafe {{ From c4252955e1fb619fea56575f6e94b01d143ef66b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 27 Aug 2024 14:37:22 -0400 Subject: [PATCH 023/268] Revert "Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE" commit 532f8bcd1c2c4e8112f62e1922fd1703bc0ffce0 upstream. This reverts commit 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 which breaks compatibility with commands like: bluetoothd[46328]: @ MGMT Command: Load.. (0x0013) plen 74 {0x0001} [hci0] Keys: 2 BR/EDR Address: C0:DC:DA:A5:E5:47 (Samsung Electronics Co.,Ltd) Key type: Authenticated key from P-256 (0x03) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 6ed96089bd9765be2f2c971b0b95f624 LE Address: D7:2A:DE:1E:73:A2 (Static) Key type: Unauthenticated key from P-256 (0x02) Central: 0x00 Encryption size: 16 Diversifier[2]: 0000 Randomizer[8]: 0000000000000000 Key[16]: 87dd2546ededda380ffcdc0a8faa4597 @ MGMT Event: Command Status (0x0002) plen 3 {0x0001} [hci0] Load Long Term Keys (0x0013) Status: Invalid Parameters (0x0d) Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/875 Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 5 ----- net/bluetooth/mgmt.c | 25 +++++++------------------ net/bluetooth/smp.c | 7 ------- 3 files changed, 7 insertions(+), 30 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f89d6d43ba8f1e..070c794e6a4251 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -188,7 +188,6 @@ struct blocked_key { struct smp_csrk { bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 type; u8 val[16]; }; @@ -198,7 +197,6 @@ struct smp_ltk { struct rcu_head rcu; bdaddr_t bdaddr; u8 bdaddr_type; - u8 link_type; u8 authenticated; u8 type; u8 enc_size; @@ -213,7 +211,6 @@ struct smp_irk { bdaddr_t rpa; bdaddr_t bdaddr; u8 addr_type; - u8 link_type; u8 val[16]; }; @@ -221,8 +218,6 @@ struct link_key { struct list_head list; struct rcu_head rcu; bdaddr_t bdaddr; - u8 bdaddr_type; - u8 link_type; u8 type; u8 val[HCI_LINK_KEY_SIZE]; u8 pin_len; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3533ac679e42cd..0f6151580c31ce 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2827,8 +2827,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, for (i = 0; i < key_count; i++) { struct mgmt_link_key_info *key = &cp->keys[i]; - /* Considering SMP over BREDR/LE, there is no need to check addr_type */ - if (key->type > 0x08) + if (key->addr.type != BDADDR_BREDR || key->type > 0x08) return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); @@ -7065,7 +7064,6 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, for (i = 0; i < irk_count; i++) { struct mgmt_irk_info *irk = &cp->irks[i]; - u8 addr_type = le_addr_type(irk->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, @@ -7075,12 +7073,8 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, continue; } - /* When using SMP over BR/EDR, the addr type should be set to BREDR */ - if (irk->addr.type == BDADDR_BREDR) - addr_type = BDADDR_BREDR; - hci_add_irk(hdev, &irk->addr.bdaddr, - addr_type, irk->val, + le_addr_type(irk->addr.type), irk->val, BDADDR_ANY); } @@ -7161,7 +7155,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, for (i = 0; i < key_count; i++) { struct mgmt_ltk_info *key = &cp->keys[i]; u8 type, authenticated; - u8 addr_type = le_addr_type(key->addr.type); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, @@ -7196,12 +7189,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } - /* When using SMP over BR/EDR, the addr type should be set to BREDR */ - if (key->addr.type == BDADDR_BREDR) - addr_type = BDADDR_BREDR; - hci_add_ltk(hdev, &key->addr.bdaddr, - addr_type, type, authenticated, + le_addr_type(key->addr.type), type, authenticated, key->val, key->enc_size, key->ediv, key->rand); } @@ -9450,7 +9439,7 @@ void mgmt_new_link_key(struct hci_dev *hdev, struct link_key *key, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); + ev.key.addr.type = BDADDR_BREDR; ev.key.type = key->type; memcpy(ev.key.val, key->val, HCI_LINK_KEY_SIZE); ev.key.pin_len = key->pin_len; @@ -9501,7 +9490,7 @@ void mgmt_new_ltk(struct hci_dev *hdev, struct smp_ltk *key, bool persistent) ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &key->bdaddr); - ev.key.addr.type = link_to_bdaddr(key->link_type, key->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(LE_LINK, key->bdaddr_type); ev.key.type = mgmt_ltk_type(key); ev.key.enc_size = key->enc_size; ev.key.ediv = key->ediv; @@ -9530,7 +9519,7 @@ void mgmt_new_irk(struct hci_dev *hdev, struct smp_irk *irk, bool persistent) bacpy(&ev.rpa, &irk->rpa); bacpy(&ev.irk.addr.bdaddr, &irk->bdaddr); - ev.irk.addr.type = link_to_bdaddr(irk->link_type, irk->addr_type); + ev.irk.addr.type = link_to_bdaddr(LE_LINK, irk->addr_type); memcpy(ev.irk.val, irk->val, sizeof(irk->val)); mgmt_event(MGMT_EV_NEW_IRK, hdev, &ev, sizeof(ev), NULL); @@ -9559,7 +9548,7 @@ void mgmt_new_csrk(struct hci_dev *hdev, struct smp_csrk *csrk, ev.store_hint = persistent; bacpy(&ev.key.addr.bdaddr, &csrk->bdaddr); - ev.key.addr.type = link_to_bdaddr(csrk->link_type, csrk->bdaddr_type); + ev.key.addr.type = link_to_bdaddr(LE_LINK, csrk->bdaddr_type); ev.key.type = csrk->type; memcpy(ev.key.val, csrk->val, sizeof(csrk->val)); diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index fa3986cfd52661..56f7f041c9a604 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -1061,7 +1061,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->remote_irk) { - smp->remote_irk->link_type = hcon->type; mgmt_new_irk(hdev, smp->remote_irk, persistent); /* Now that user space can be considered to know the @@ -1081,28 +1080,24 @@ static void smp_notify_keys(struct l2cap_conn *conn) } if (smp->csrk) { - smp->csrk->link_type = hcon->type; smp->csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->csrk, persistent); } if (smp->responder_csrk) { - smp->responder_csrk->link_type = hcon->type; smp->responder_csrk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_csrk->bdaddr, &hcon->dst); mgmt_new_csrk(hdev, smp->responder_csrk, persistent); } if (smp->ltk) { - smp->ltk->link_type = hcon->type; smp->ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->ltk, persistent); } if (smp->responder_ltk) { - smp->responder_ltk->link_type = hcon->type; smp->responder_ltk->bdaddr_type = hcon->dst_type; bacpy(&smp->responder_ltk->bdaddr, &hcon->dst); mgmt_new_ltk(hdev, smp->responder_ltk, persistent); @@ -1122,8 +1117,6 @@ static void smp_notify_keys(struct l2cap_conn *conn) key = hci_add_link_key(hdev, smp->conn->hcon, &hcon->dst, smp->link_key, type, 0, &persistent); if (key) { - key->link_type = hcon->type; - key->bdaddr_type = hcon->dst_type; mgmt_new_link_key(hdev, key, persistent); /* Don't keep debug keys around if the relevant From 84996e92a184bb29308a75d7570c300397d53dc8 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 27 Aug 2024 15:01:34 -0400 Subject: [PATCH 024/268] Bluetooth: MGMT: Ignore keys being loaded with invalid type commit 1e9683c9b6ca88cc9340cdca85edd6134c8cffe3 upstream. Due to 59b047bc98084f8af2c41483e4d68a5adf2fa7f7 there could be keys stored with the wrong address type so this attempt to detect it and ignore them instead of just failing to load all keys. Cc: stable@vger.kernel.org Link: https://github.com/bluez/bluez/issues/875 Fixes: 59b047bc9808 ("Bluetooth: MGMT/SMP: Fix address type when using SMP over BREDR/LE") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/mgmt.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 0f6151580c31ce..bad365f3d7bf2c 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2824,15 +2824,6 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys, key_count); - for (i = 0; i < key_count; i++) { - struct mgmt_link_key_info *key = &cp->keys[i]; - - if (key->addr.type != BDADDR_BREDR || key->type > 0x08) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LINK_KEYS, - MGMT_STATUS_INVALID_PARAMS); - } - hci_dev_lock(hdev); hci_link_keys_clear(hdev); @@ -2857,6 +2848,19 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, continue; } + if (key->addr.type != BDADDR_BREDR) { + bt_dev_warn(hdev, + "Invalid link address type %u for %pMR", + key->addr.type, &key->addr.bdaddr); + continue; + } + + if (key->type > 0x08) { + bt_dev_warn(hdev, "Invalid link key type %u for %pMR", + key->type, &key->addr.bdaddr); + continue; + } + /* Always ignore debug keys and require a new pairing if * the user wants to use them. */ @@ -7139,15 +7143,6 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, bt_dev_dbg(hdev, "key_count %u", key_count); - for (i = 0; i < key_count; i++) { - struct mgmt_ltk_info *key = &cp->keys[i]; - - if (!ltk_is_valid(key)) - return mgmt_cmd_status(sk, hdev->id, - MGMT_OP_LOAD_LONG_TERM_KEYS, - MGMT_STATUS_INVALID_PARAMS); - } - hci_dev_lock(hdev); hci_smp_ltks_clear(hdev); @@ -7164,6 +7159,12 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, continue; } + if (!ltk_is_valid(key)) { + bt_dev_warn(hdev, "Invalid LTK for %pMR", + &key->addr.bdaddr); + continue; + } + switch (key->type) { case MGMT_LTK_UNAUTHENTICATED: authenticated = 0x00; From 115a755bb38db5a1175be44e6a9a93a0a8233885 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Wed, 21 Aug 2024 08:06:31 +0900 Subject: [PATCH 025/268] mmc: core: apply SD quirks earlier during probe commit 469e5e4713989fdd5e3e502b922e7be0da2464b9 upstream. Applying MMC_QUIRK_BROKEN_SD_CACHE is broken, as the card's SD quirks are referenced in sd_parse_ext_reg_perf() prior to the quirks being initialized in mmc_blk_probe(). To fix this problem, let's split out an SD-specific list of quirks and apply in mmc_sd_init_card() instead. In this way, sd_read_ext_regs() to has the available information for not assigning the SD_EXT_PERF_CACHE as one of the (un)supported features, which in turn allows mmc_sd_init_card() to properly skip execution of sd_enable_cache(). Fixes: c467c8f08185 ("mmc: Add MMC_QUIRK_BROKEN_SD_CACHE for Kingston Canvas Go Plus from 11/2019") Signed-off-by: Jonathan Bell Co-developed-by: Keita Aihara Signed-off-by: Keita Aihara Reviewed-by: Dragan Simic Reviewed-by: Avri Altman Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240820230631.GA436523@sony.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/core/quirks.h | 22 +++++++++++++--------- drivers/mmc/core/sd.c | 4 ++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index cca71867bc4ad6..92905fc46436dd 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -15,6 +15,19 @@ #include "card.h" +static const struct mmc_fixup __maybe_unused mmc_sd_fixups[] = { + /* + * Kingston Canvas Go! Plus microSD cards never finish SD cache flush. + * This has so far only been observed on cards from 11/2019, while new + * cards from 2023/05 do not exhibit this behavior. + */ + _FIXUP_EXT("SD64G", CID_MANFID_KINGSTON_SD, 0x5449, 2019, 11, + 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, + MMC_QUIRK_BROKEN_SD_CACHE, EXT_CSD_REV_ANY), + + END_FIXUP +}; + static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { #define INAND_CMD38_ARG_EXT_CSD 113 #define INAND_CMD38_ARG_ERASE 0x00 @@ -53,15 +66,6 @@ static const struct mmc_fixup __maybe_unused mmc_blk_fixups[] = { MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc, MMC_QUIRK_BLK_NO_CMD23), - /* - * Kingston Canvas Go! Plus microSD cards never finish SD cache flush. - * This has so far only been observed on cards from 11/2019, while new - * cards from 2023/05 do not exhibit this behavior. - */ - _FIXUP_EXT("SD64G", CID_MANFID_KINGSTON_SD, 0x5449, 2019, 11, - 0, -1ull, SDIO_ANY_ID, SDIO_ANY_ID, add_quirk_sd, - MMC_QUIRK_BROKEN_SD_CACHE, EXT_CSD_REV_ANY), - /* * Some SD cards lockup while using CMD23 multiblock transfers. */ diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index c3e554344c99f9..240469a881a272 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -26,6 +26,7 @@ #include "host.h" #include "bus.h" #include "mmc_ops.h" +#include "quirks.h" #include "sd.h" #include "sd_ops.h" @@ -1475,6 +1476,9 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr, goto free_card; } + /* Apply quirks prior to card setup */ + mmc_fixup_device(card, mmc_sd_fixups); + err = mmc_sd_setup_card(host, card, oldcard != NULL); if (err) goto free_card; From 5b4bf3948875064a9adcda4b52b59e0520a8c576 Mon Sep 17 00:00:00 2001 From: Sam Protsenko Date: Wed, 6 Mar 2024 17:20:52 -0600 Subject: [PATCH 026/268] mmc: dw_mmc: Fix IDMAC operation with pages bigger than 4K commit 8396c793ffdf28bb8aee7cfe0891080f8cab7890 upstream. Commit 616f87661792 ("mmc: pass queue_limits to blk_mq_alloc_disk") [1] revealed the long living issue in dw_mmc.c driver, existing since the time when it was first introduced in commit f95f3850f7a9 ("mmc: dw_mmc: Add Synopsys DesignWare mmc host driver."), also making kernel boot broken on platforms using dw_mmc driver with 16K or 64K pages enabled, with this message in dmesg: mmcblk: probe of mmc0:0001 failed with error -22 That's happening because mmc_blk_probe() fails when it calls blk_validate_limits() consequently, which returns the error due to failed max_segment_size check in this code: /* * The maximum segment size has an odd historic 64k default that * drivers probably should override. Just like the I/O size we * require drivers to at least handle a full page per segment. */ ... if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) return -EINVAL; In case when IDMAC (Internal DMA Controller) is used, dw_mmc.c always sets .max_seg_size to 4 KiB: mmc->max_seg_size = 0x1000; The comment in the code above explains why it's incorrect. Arnd suggested setting .max_seg_size to .max_req_size to fix it, which is also what some other drivers are doing: $ grep -rl 'max_seg_size.*=.*max_req_size' drivers/mmc/host/ | \ wc -l 18 This change is not only fixing the boot with 16K/64K pages, but also leads to a better MMC performance. The linear write performance was tested on E850-96 board (eMMC only), before commit [1] (where it's possible to boot with 16K/64K pages without this fix, to be able to do a comparison). It was tested with this command: # dd if=/dev/zero of=somefile bs=1M count=500 oflag=sync Test results are as follows: - 4K pages, .max_seg_size = 4 KiB: 94.2 MB/s - 4K pages, .max_seg_size = .max_req_size = 512 KiB: 96.9 MB/s - 16K pages, .max_seg_size = 4 KiB: 126 MB/s - 16K pages, .max_seg_size = .max_req_size = 2 MiB: 128 MB/s - 64K pages, .max_seg_size = 4 KiB: 138 MB/s - 64K pages, .max_seg_size = .max_req_size = 8 MiB: 138 MB/s Unfortunately, SD card controller is not enabled in E850-96 yet, so it wasn't possible for me to run the test on some cheap SD cards to check this patch's impact on those. But it's possible that this change might also reduce the writes count, thus improving SD/eMMC longevity. All credit for the analysis and the suggested solution goes to Arnd. [1] https://lore.kernel.org/all/20240215070300.2200308-18-hch@lst.de/ Fixes: f95f3850f7a9 ("mmc: dw_mmc: Add Synopsys DesignWare mmc host driver.") Suggested-by: Arnd Bergmann Reported-by: Linux Kernel Functional Testing Closes: https://lore.kernel.org/all/CA+G9fYtddf2Fd3be+YShHP6CmSDNcn0ptW8qg+stUKW+Cn0rjQ@mail.gmail.com/ Signed-off-by: Sam Protsenko Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240306232052.21317-1-semen.protsenko@linaro.org Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/dw_mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 02bee7afab37ef..2f0bc79ef856a8 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2952,8 +2952,8 @@ static int dw_mci_init_slot(struct dw_mci *host) if (host->use_dma == TRANS_MODE_IDMAC) { mmc->max_segs = host->ring_size; mmc->max_blk_size = 65535; - mmc->max_seg_size = 0x1000; - mmc->max_req_size = mmc->max_seg_size * host->ring_size; + mmc->max_req_size = DW_MCI_DESC_DATA_LENGTH * host->ring_size; + mmc->max_seg_size = mmc->max_req_size; mmc->max_blk_count = mmc->max_req_size / 512; } else if (host->use_dma == TRANS_MODE_EDMAC) { mmc->max_segs = 64; From 4c6520627bbeb66c62644835dcda0bd8f1432bc2 Mon Sep 17 00:00:00 2001 From: Liao Chen Date: Mon, 26 Aug 2024 12:48:51 +0000 Subject: [PATCH 027/268] mmc: sdhci-of-aspeed: fix module autoloading commit 6e540da4c1db7b840e347c4dfe48359b18b7e376 upstream. Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from of_device_id table. Signed-off-by: Liao Chen Acked-by: Andrew Jeffery Fixes: bb7b8ec62dfb ("mmc: sdhci-of-aspeed: Add support for the ASPEED SD controller") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240826124851.379759-1-liaochen4@huawei.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-aspeed.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-of-aspeed.c b/drivers/mmc/host/sdhci-of-aspeed.c index 42d54532cabe6c..8379a0620c8fed 100644 --- a/drivers/mmc/host/sdhci-of-aspeed.c +++ b/drivers/mmc/host/sdhci-of-aspeed.c @@ -510,6 +510,7 @@ static const struct of_device_id aspeed_sdhci_of_match[] = { { .compatible = "aspeed,ast2600-sdhci", .data = &ast2600_sdhci_pdata, }, { } }; +MODULE_DEVICE_TABLE(of, aspeed_sdhci_of_match); static struct platform_driver aspeed_sdhci_driver = { .driver = { From a7fa220ebb4151dbfc5799ef308f6318ee6f91ab Mon Sep 17 00:00:00 2001 From: Seunghwan Baek Date: Thu, 29 Aug 2024 15:18:22 +0900 Subject: [PATCH 028/268] mmc: cqhci: Fix checking of CQHCI_HALT state commit aea62c744a9ae2a8247c54ec42138405216414da upstream. To check if mmc cqe is in halt state, need to check set/clear of CQHCI_HALT bit. At this time, we need to check with &, not &&. Fixes: a4080225f51d ("mmc: cqhci: support for command queue enabled host") Cc: stable@vger.kernel.org Signed-off-by: Seunghwan Baek Reviewed-by: Ritesh Harjani Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20240829061823.3718-2-sh8267.baek@samsung.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/cqhci-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 41e94cd1410980..fe7a4eac9595c0 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -612,7 +612,7 @@ static int cqhci_request(struct mmc_host *mmc, struct mmc_request *mrq) cqhci_writel(cq_host, 0, CQHCI_CTL); mmc->cqe_on = true; pr_debug("%s: cqhci: CQE on\n", mmc_hostname(mmc)); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) { + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) { pr_err("%s: cqhci: CQE failed to exit halt state\n", mmc_hostname(mmc)); } From ad6451ab31e198c793c03d5c743aa97a4dc84325 Mon Sep 17 00:00:00 2001 From: Joanne Koong Date: Mon, 26 Aug 2024 14:19:04 -0700 Subject: [PATCH 029/268] fuse: update stats for pages in dropped aux writeback list commit f7790d67785302b3116bbbfda62a5a44524601a3 upstream. In the case where the aux writeback list is dropped (e.g. the pages have been truncated or the connection is broken), the stats for its pages and backing device info need to be updated as well. Fixes: e2653bd53a98 ("fuse: fix leaked aux requests") Signed-off-by: Joanne Koong Reviewed-by: Josef Bacik Cc: # v5.1 Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/file.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index cc9651a01351c2..ceb9f7d2303882 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1735,10 +1735,16 @@ __acquires(fi->lock) fuse_writepage_finish(fm, wpa); spin_unlock(&fi->lock); - /* After fuse_writepage_finish() aux request list is private */ + /* After rb_erase() aux request list is private */ for (aux = wpa->next; aux; aux = next) { + struct backing_dev_info *bdi = inode_to_bdi(aux->inode); + next = aux->next; aux->next = NULL; + + dec_wb_stat(&bdi->wb, WB_WRITEBACK); + dec_node_page_state(aux->ia.ap.pages[0], NR_WRITEBACK_TEMP); + wb_writeout_inc(&bdi->wb); fuse_writepage_free(aux); } From bfd55cd4295ad5317496e9dfbed099821726c15f Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Mon, 19 Aug 2024 19:52:30 +0200 Subject: [PATCH 030/268] fuse: use unsigned type for getxattr/listxattr size truncation commit b18915248a15eae7d901262f108d6ff0ffb4ffc1 upstream. The existing code uses min_t(ssize_t, outarg.size, XATTR_LIST_MAX) when parsing the FUSE daemon's response to a zero-length getxattr/listxattr request. On 32-bit kernels, where ssize_t and outarg.size are the same size, this is wrong: The min_t() will pass through any size values that are negative when interpreted as signed. fuse_listxattr() will then return this userspace-supplied negative value, which callers will treat as an error value. This kind of bug pattern can lead to fairly bad security bugs because of how error codes are used in the Linux kernel. If a caller were to convert the numeric error into an error pointer, like so: struct foo *func(...) { int len = fuse_getxattr(..., NULL, 0); if (len < 0) return ERR_PTR(len); ... } then it would end up returning this userspace-supplied negative value cast to a pointer - but the caller of this function wouldn't recognize it as an error pointer (IS_ERR_VALUE() only detects values in the narrow range in which legitimate errno values are), and so it would just be treated as a kernel pointer. I think there is at least one theoretical codepath where this could happen, but that path would involve virtio-fs with submounts plus some weird SELinux configuration, so I think it's probably not a concern in practice. Cc: stable@vger.kernel.org # v4.9 Fixes: 63401ccdb2ca ("fuse: limit xattr returned size") Signed-off-by: Jann Horn Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index 49c01559580f4e..690b9aadceaa88 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -81,7 +81,7 @@ ssize_t fuse_getxattr(struct inode *inode, const char *name, void *value, } ret = fuse_simple_request(fm, &args); if (!ret && !size) - ret = min_t(ssize_t, outarg.size, XATTR_SIZE_MAX); + ret = min_t(size_t, outarg.size, XATTR_SIZE_MAX); if (ret == -ENOSYS) { fm->fc->no_getxattr = 1; ret = -EOPNOTSUPP; @@ -143,7 +143,7 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) } ret = fuse_simple_request(fm, &args); if (!ret && !size) - ret = min_t(ssize_t, outarg.size, XATTR_LIST_MAX); + ret = min_t(size_t, outarg.size, XATTR_LIST_MAX); if (ret > 0 && size) ret = fuse_verify_xattr_list(list, ret); if (ret == -ENOSYS) { From f36df5cc866f24db38bab229419b85d1f5f322c6 Mon Sep 17 00:00:00 2001 From: yangyun Date: Fri, 23 Aug 2024 16:51:46 +0800 Subject: [PATCH 031/268] fuse: fix memory leak in fuse_create_open commit 3002240d16494d798add0575e8ba1f284258ab34 upstream. The memory of struct fuse_file is allocated but not freed when get_create_ext return error. Fixes: 3e2b6fdbdc9a ("fuse: send security context of inode on file") Cc: stable@vger.kernel.org # v5.17 Signed-off-by: yangyun Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c1703d2c4bf9aa..95f9913a353731 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -668,7 +668,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, err = get_create_ext(&args, dir, entry, mode); if (err) - goto out_put_forget_req; + goto out_free_ff; err = fuse_simple_request(fm, &args); free_ext_value(&args); From 72f4fc5fb2912e73f198f071e6fc590bd904b2ce Mon Sep 17 00:00:00 2001 From: Xingyu Wu Date: Mon, 26 Aug 2024 16:04:29 +0800 Subject: [PATCH 032/268] clk: starfive: jh7110-sys: Add notifier for PLL0 clock commit 538d5477b25289ac5d46ca37b9e5b4d685cbe019 upstream. Add notifier function for PLL0 clock. In the function, the cpu_root clock should be operated by saving its current parent and setting a new safe parent (osc clock) before setting the PLL0 clock rate. After setting PLL0 rate, it should be switched back to the original parent clock. Fixes: e2c510d6d630 ("riscv: dts: starfive: Add cpu scaling for JH7110 SoC") Cc: stable@vger.kernel.org Reviewed-by: Emil Renner Berthing Signed-off-by: Xingyu Wu Link: https://lore.kernel.org/r/20240826080430.179788-2-xingyu.wu@starfivetech.com Reviewed-by: Hal Feng Tested-by: Michael Jeanson Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- .../clk/starfive/clk-starfive-jh7110-sys.c | 31 ++++++++++++++++++- drivers/clk/starfive/clk-starfive-jh71x0.h | 2 ++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/clk/starfive/clk-starfive-jh7110-sys.c b/drivers/clk/starfive/clk-starfive-jh7110-sys.c index 3884eff9fe9315..58ef7c6d69cce9 100644 --- a/drivers/clk/starfive/clk-starfive-jh7110-sys.c +++ b/drivers/clk/starfive/clk-starfive-jh7110-sys.c @@ -385,6 +385,32 @@ int jh7110_reset_controller_register(struct jh71x0_clk_priv *priv, } EXPORT_SYMBOL_GPL(jh7110_reset_controller_register); +/* + * This clock notifier is called when the rate of PLL0 clock is to be changed. + * The cpu_root clock should save the curent parent clock and switch its parent + * clock to osc before PLL0 rate will be changed. Then switch its parent clock + * back after the PLL0 rate is completed. + */ +static int jh7110_pll0_clk_notifier_cb(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct jh71x0_clk_priv *priv = container_of(nb, struct jh71x0_clk_priv, pll_clk_nb); + struct clk *cpu_root = priv->reg[JH7110_SYSCLK_CPU_ROOT].hw.clk; + int ret = 0; + + if (action == PRE_RATE_CHANGE) { + struct clk *osc = clk_get(priv->dev, "osc"); + + priv->original_clk = clk_get_parent(cpu_root); + ret = clk_set_parent(cpu_root, osc); + clk_put(osc); + } else if (action == POST_RATE_CHANGE) { + ret = clk_set_parent(cpu_root, priv->original_clk); + } + + return notifier_from_errno(ret); +} + static int __init jh7110_syscrg_probe(struct platform_device *pdev) { struct jh71x0_clk_priv *priv; @@ -413,7 +439,10 @@ static int __init jh7110_syscrg_probe(struct platform_device *pdev) if (IS_ERR(priv->pll[0])) return PTR_ERR(priv->pll[0]); } else { - clk_put(pllclk); + priv->pll_clk_nb.notifier_call = jh7110_pll0_clk_notifier_cb; + ret = clk_notifier_register(pllclk, &priv->pll_clk_nb); + if (ret) + return ret; priv->pll[0] = NULL; } diff --git a/drivers/clk/starfive/clk-starfive-jh71x0.h b/drivers/clk/starfive/clk-starfive-jh71x0.h index 34bb11c72eb73b..ebc55b9ef83705 100644 --- a/drivers/clk/starfive/clk-starfive-jh71x0.h +++ b/drivers/clk/starfive/clk-starfive-jh71x0.h @@ -114,6 +114,8 @@ struct jh71x0_clk_priv { spinlock_t rmw_lock; struct device *dev; void __iomem *base; + struct clk *original_clk; + struct notifier_block pll_clk_nb; struct clk_hw *pll[3]; struct jh71x0_clk reg[]; }; From 229493828da0ef2d35b1a0d8125c2313936064e4 Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Wed, 31 Jul 2024 11:59:09 +0530 Subject: [PATCH 033/268] clk: qcom: clk-alpha-pll: Fix the pll post div mask commit 2c4553e6c485a96b5d86989eb9654bf20e51e6dd upstream. The PLL_POST_DIV_MASK should be 0 to (width - 1) bits. Fix it. Fixes: 1c3541145cbf ("clk: qcom: support for 2 bit PLL post divider") Cc: stable@vger.kernel.org Reviewed-by: Konrad Dybcio Signed-off-by: Satya Priya Kakitapalli Link: https://lore.kernel.org/r/20240731062916.2680823-2-quic_skakitap@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/clk-alpha-pll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 85aa089650eaa7..2e007b54dc7234 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -40,7 +40,7 @@ #define PLL_USER_CTL(p) ((p)->offset + (p)->regs[PLL_OFF_USER_CTL]) # define PLL_POST_DIV_SHIFT 8 -# define PLL_POST_DIV_MASK(p) GENMASK((p)->width, 0) +# define PLL_POST_DIV_MASK(p) GENMASK((p)->width - 1, 0) # define PLL_ALPHA_EN BIT(24) # define PLL_ALPHA_MODE BIT(25) # define PLL_VCO_SHIFT 20 From f58f2332893270b60b957713c2b4651024b31e1d Mon Sep 17 00:00:00 2001 From: Satya Priya Kakitapalli Date: Wed, 31 Jul 2024 11:59:10 +0530 Subject: [PATCH 034/268] clk: qcom: clk-alpha-pll: Fix the trion pll postdiv set rate API commit 4ad1ed6ef27cab94888bb3c740c14042d5c0dff2 upstream. Correct the pll postdiv shift used in clk_trion_pll_postdiv_set_rate API. The shift value is not same for different types of plls and should be taken from the pll's .post_div_shift member. Fixes: 548a909597d5 ("clk: qcom: clk-alpha-pll: Add support for Trion PLLs") Cc: stable@vger.kernel.org Signed-off-by: Satya Priya Kakitapalli Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240731062916.2680823-3-quic_skakitap@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- drivers/clk/qcom/clk-alpha-pll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/qcom/clk-alpha-pll.c b/drivers/clk/qcom/clk-alpha-pll.c index 2e007b54dc7234..1701cce74df796 100644 --- a/drivers/clk/qcom/clk-alpha-pll.c +++ b/drivers/clk/qcom/clk-alpha-pll.c @@ -1478,8 +1478,8 @@ clk_trion_pll_postdiv_set_rate(struct clk_hw *hw, unsigned long rate, } return regmap_update_bits(regmap, PLL_USER_CTL(pll), - PLL_POST_DIV_MASK(pll) << PLL_POST_DIV_SHIFT, - val << PLL_POST_DIV_SHIFT); + PLL_POST_DIV_MASK(pll) << pll->post_div_shift, + val << pll->post_div_shift); } const struct clk_ops clk_alpha_pll_postdiv_trion_ops = { From 8fecde9c3f9a4b97b68bb97c9f47e5b662586ba7 Mon Sep 17 00:00:00 2001 From: Simon Arlott Date: Thu, 22 Aug 2024 08:25:07 +0100 Subject: [PATCH 035/268] can: mcp251x: fix deadlock if an interrupt occurs during mcp251x_open commit 7dd9c26bd6cf679bcfdef01a8659791aa6487a29 upstream. The mcp251x_hw_wake() function is called with the mpc_lock mutex held and disables the interrupt handler so that no interrupts can be processed while waking the device. If an interrupt has already occurred then waiting for the interrupt handler to complete will deadlock because it will be trying to acquire the same mutex. CPU0 CPU1 ---- ---- mcp251x_open() mutex_lock(&priv->mcp_lock) request_threaded_irq() mcp251x_can_ist() mutex_lock(&priv->mcp_lock) mcp251x_hw_wake() disable_irq() <-- deadlock Use disable_irq_nosync() instead because the interrupt handler does everything while holding the mutex so it doesn't matter if it's still running. Fixes: 8ce8c0abcba3 ("can: mcp251x: only reset hardware as required") Signed-off-by: Simon Arlott Reviewed-by: Przemek Kitszel Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/4fc08687-1d80-43fe-9f0d-8ef8475e75f6@0882a8b5-c6c3-11e9-b005-00805fc181fe.uuid.home.arpa Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/spi/mcp251x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/spi/mcp251x.c b/drivers/net/can/spi/mcp251x.c index 79c4bab5f7246e..8c56f85e87c1af 100644 --- a/drivers/net/can/spi/mcp251x.c +++ b/drivers/net/can/spi/mcp251x.c @@ -753,7 +753,7 @@ static int mcp251x_hw_wake(struct spi_device *spi) int ret; /* Force wakeup interrupt to wake device, but don't execute IST */ - disable_irq(spi->irq); + disable_irq_nosync(spi->irq); mcp251x_write_2regs(spi, CANINTE, CANINTE_WAKIE, CANINTF_WAKIF); /* Wait for oscillator startup timer after wake up */ From c318a4bb36f2d2127505606e51aa9285e684fe6e Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 5 Aug 2024 17:07:50 +0200 Subject: [PATCH 036/268] kexec_file: fix elfcorehdr digest exclusion when CONFIG_CRASH_HOTPLUG=y commit 6dacd79d28842ff01f18b4900d897741aac5999e upstream. Fix the condition to exclude the elfcorehdr segment from the SHA digest calculation. The j iterator is an index into the output sha_regions[] array, not into the input image->segment[] array. Once it reaches image->elfcorehdr_index, all subsequent segments are excluded. Besides, if the purgatory segment precedes the elfcorehdr segment, the elfcorehdr may be wrongly included in the calculation. Link: https://lkml.kernel.org/r/20240805150750.170739-1-petr.tesarik@suse.com Fixes: f7cc804a9fd4 ("kexec: exclude elfcorehdr from the segment digest") Signed-off-by: Petr Tesarik Acked-by: Baoquan He Cc: Eric Biederman Cc: Hari Bathini Cc: Sourabh Jain Cc: Eric DeVolder Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- kernel/kexec_file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f9a419cd22d4c7..830344627e9f20 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -728,7 +728,7 @@ static int kexec_calculate_store_digests(struct kimage *image) #ifdef CONFIG_CRASH_HOTPLUG /* Exclude elfcorehdr segment to allow future changes via hotplug */ - if (j == image->elfcorehdr_index) + if (i == image->elfcorehdr_index) continue; #endif From 1b2770e27d6d952f491bb362b657e5b2713c3efd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Aug 2024 18:16:06 +0100 Subject: [PATCH 037/268] mm: vmalloc: ensure vmap_block is initialised before adding to queue commit 3e3de7947c751509027d26b679ecd243bc9db255 upstream. Commit 8c61291fd850 ("mm: fix incorrect vbq reference in purge_fragmented_block") extended the 'vmap_block' structure to contain a 'cpu' field which is set at allocation time to the id of the initialising CPU. When a new 'vmap_block' is being instantiated by new_vmap_block(), the partially initialised structure is added to the local 'vmap_block_queue' xarray before the 'cpu' field has been initialised. If another CPU is concurrently walking the xarray (e.g. via vm_unmap_aliases()), then it may perform an out-of-bounds access to the remote queue thanks to an uninitialised index. This has been observed as UBSAN errors in Android: | Internal error: UBSAN: array index out of bounds: 00000000f2005512 [#1] PREEMPT SMP | | Call trace: | purge_fragmented_block+0x204/0x21c | _vm_unmap_aliases+0x170/0x378 | vm_unmap_aliases+0x1c/0x28 | change_memory_common+0x1dc/0x26c | set_memory_ro+0x18/0x24 | module_enable_ro+0x98/0x238 | do_init_module+0x1b0/0x310 Move the initialisation of 'vb->cpu' in new_vmap_block() ahead of the addition to the xarray. Link: https://lkml.kernel.org/r/20240812171606.17486-1-will@kernel.org Fixes: 8c61291fd850 ("mm: fix incorrect vbq reference in purge_fragmented_block") Signed-off-by: Will Deacon Reviewed-by: Baoquan He Reviewed-by: Uladzislau Rezki (Sony) Cc: Zhaoyang Huang Cc: Hailong.Liu Cc: Christoph Hellwig Cc: Lorenzo Stoakes Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 732ff66d1b5135..0148be0814af73 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2066,6 +2066,7 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) vb->dirty_max = 0; bitmap_set(vb->used_map, 0, (1UL << order)); INIT_LIST_HEAD(&vb->free_list); + vb->cpu = raw_smp_processor_id(); xa = addr_to_vb_xa(va->va_start); vb_idx = addr_to_vb_idx(va->va_start); @@ -2082,7 +2083,6 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask) * integrity together with list_for_each_rcu from read * side. */ - vb->cpu = raw_smp_processor_id(); vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu); spin_lock(&vbq->lock); list_add_tail_rcu(&vb->free_list, &vbq->free); From d034bff62faea1a2219e0d2f3d17263265f24087 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 27 Aug 2024 10:11:16 -0700 Subject: [PATCH 038/268] spi: rockchip: Resolve unbalanced runtime PM / system PM handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit be721b451affbecc4ba4eaac3b71cdbdcade1b1b upstream. Commit e882575efc77 ("spi: rockchip: Suspend and resume the bus during NOIRQ_SYSTEM_SLEEP_PM ops") stopped respecting runtime PM status and simply disabled clocks unconditionally when suspending the system. This causes problems when the device is already runtime suspended when we go to sleep -- in which case we double-disable clocks and produce a WARNing. Switch back to pm_runtime_force_{suspend,resume}(), because that still seems like the right thing to do, and the aforementioned commit makes no explanation why it stopped using it. Also, refactor some of the resume() error handling, because it's not actually a good idea to re-disable clocks on failure. Fixes: e882575efc77 ("spi: rockchip: Suspend and resume the bus during NOIRQ_SYSTEM_SLEEP_PM ops") Cc: stable@vger.kernel.org Reported-by: Ondřej Jirman Closes: https://lore.kernel.org/lkml/20220621154218.sau54jeij4bunf56@core/ Signed-off-by: Brian Norris Link: https://patch.msgid.link/20240827171126.1115748-1-briannorris@chromium.org Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-rockchip.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 5b010094dace50..1f374cf4d6f65c 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -974,14 +974,16 @@ static int rockchip_spi_suspend(struct device *dev) { int ret; struct spi_controller *ctlr = dev_get_drvdata(dev); - struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); ret = spi_controller_suspend(ctlr); if (ret < 0) return ret; - clk_disable_unprepare(rs->spiclk); - clk_disable_unprepare(rs->apb_pclk); + ret = pm_runtime_force_suspend(dev); + if (ret < 0) { + spi_controller_resume(ctlr); + return ret; + } pinctrl_pm_select_sleep_state(dev); @@ -992,25 +994,14 @@ static int rockchip_spi_resume(struct device *dev) { int ret; struct spi_controller *ctlr = dev_get_drvdata(dev); - struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); pinctrl_pm_select_default_state(dev); - ret = clk_prepare_enable(rs->apb_pclk); + ret = pm_runtime_force_resume(dev); if (ret < 0) return ret; - ret = clk_prepare_enable(rs->spiclk); - if (ret < 0) - clk_disable_unprepare(rs->apb_pclk); - - ret = spi_controller_resume(ctlr); - if (ret < 0) { - clk_disable_unprepare(rs->spiclk); - clk_disable_unprepare(rs->apb_pclk); - } - - return 0; + return spi_controller_resume(ctlr); } #endif /* CONFIG_PM_SLEEP */ From 7a5f01828edf152c144d27cf63de446fdf2dc222 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Sep 2024 10:34:28 -0400 Subject: [PATCH 039/268] tracing/osnoise: Use a cpumask to know what threads are kthreads commit 177e1cc2f41235c145041eed03ef5bab18f32328 upstream. The start_kthread() and stop_thread() code was not always called with the interface_lock held. This means that the kthread variable could be unexpectedly changed causing the kthread_stop() to be called on it when it should not have been, leading to: while true; do rtla timerlat top -u -q & PID=$!; sleep 5; kill -INT $PID; sleep 0.001; kill -TERM $PID; wait $PID; done Causing the following OOPS: Oops: general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] PREEMPT SMP KASAN PTI KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017] CPU: 5 UID: 0 PID: 885 Comm: timerlatu/5 Not tainted 6.11.0-rc4-test-00002-gbc754cc76d1b-dirty #125 a533010b71dab205ad2f507188ce8c82203b0254 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 RIP: 0010:hrtimer_active+0x58/0x300 Code: 48 c1 ee 03 41 54 48 01 d1 48 01 d6 55 53 48 83 ec 20 80 39 00 0f 85 30 02 00 00 49 8b 6f 30 4c 8d 75 10 4c 89 f0 48 c1 e8 03 <0f> b6 3c 10 4c 89 f0 83 e0 07 83 c0 03 40 38 f8 7c 09 40 84 ff 0f RSP: 0018:ffff88811d97f940 EFLAGS: 00010202 RAX: 0000000000000002 RBX: ffff88823c6b5b28 RCX: ffffed10478d6b6b RDX: dffffc0000000000 RSI: ffffed10478d6b6c RDI: ffff88823c6b5b28 RBP: 0000000000000000 R08: ffff88823c6b5b58 R09: ffff88823c6b5b60 R10: ffff88811d97f957 R11: 0000000000000010 R12: 00000000000a801d R13: ffff88810d8b35d8 R14: 0000000000000010 R15: ffff88823c6b5b28 FS: 0000000000000000(0000) GS:ffff88823c680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000561858ad7258 CR3: 000000007729e001 CR4: 0000000000170ef0 Call Trace: ? die_addr+0x40/0xa0 ? exc_general_protection+0x154/0x230 ? asm_exc_general_protection+0x26/0x30 ? hrtimer_active+0x58/0x300 ? __pfx_mutex_lock+0x10/0x10 ? __pfx_locks_remove_file+0x10/0x10 hrtimer_cancel+0x15/0x40 timerlat_fd_release+0x8e/0x1f0 ? security_file_release+0x43/0x80 __fput+0x372/0xb10 task_work_run+0x11e/0x1f0 ? _raw_spin_lock+0x85/0xe0 ? __pfx_task_work_run+0x10/0x10 ? poison_slab_object+0x109/0x170 ? do_exit+0x7a0/0x24b0 do_exit+0x7bd/0x24b0 ? __pfx_migrate_enable+0x10/0x10 ? __pfx_do_exit+0x10/0x10 ? __pfx_read_tsc+0x10/0x10 ? ktime_get+0x64/0x140 ? _raw_spin_lock_irq+0x86/0xe0 do_group_exit+0xb0/0x220 get_signal+0x17ba/0x1b50 ? vfs_read+0x179/0xa40 ? timerlat_fd_read+0x30b/0x9d0 ? __pfx_get_signal+0x10/0x10 ? __pfx_timerlat_fd_read+0x10/0x10 arch_do_signal_or_restart+0x8c/0x570 ? __pfx_arch_do_signal_or_restart+0x10/0x10 ? vfs_read+0x179/0xa40 ? ksys_read+0xfe/0x1d0 ? __pfx_ksys_read+0x10/0x10 syscall_exit_to_user_mode+0xbc/0x130 do_syscall_64+0x74/0x110 ? __pfx___rseq_handle_notify_resume+0x10/0x10 ? __pfx_ksys_read+0x10/0x10 ? fpregs_restore_userregs+0xdb/0x1e0 ? fpregs_restore_userregs+0xdb/0x1e0 ? syscall_exit_to_user_mode+0x116/0x130 ? do_syscall_64+0x74/0x110 ? do_syscall_64+0x74/0x110 ? do_syscall_64+0x74/0x110 entry_SYSCALL_64_after_hwframe+0x71/0x79 RIP: 0033:0x7ff0070eca9c Code: Unable to access opcode bytes at 0x7ff0070eca72. RSP: 002b:00007ff006dff8c0 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: 0000000000000000 RBX: 0000000000000005 RCX: 00007ff0070eca9c RDX: 0000000000000400 RSI: 00007ff006dff9a0 RDI: 0000000000000003 RBP: 00007ff006dffde0 R08: 0000000000000000 R09: 00007ff000000ba0 R10: 00007ff007004b08 R11: 0000000000000246 R12: 0000000000000003 R13: 00007ff006dff9a0 R14: 0000000000000007 R15: 0000000000000008 Modules linked in: snd_hda_intel snd_intel_dspcfg snd_intel_sdw_acpi snd_hda_codec snd_hwdep snd_hda_core ---[ end trace 0000000000000000 ]--- This is because it would mistakenly call kthread_stop() on a user space thread making it "exit" before it actually exits. Since kthreads are created based on global behavior, use a cpumask to know when kthreads are running and that they need to be shutdown before proceeding to do new work. Link: https://lore.kernel.org/all/20240820130001.124768-1-tglozar@redhat.com/ This was debugged by using the persistent ring buffer: Link: https://lore.kernel.org/all/20240823013902.135036960@goodmis.org/ Note, locking was originally used to fix this, but that proved to cause too many deadlocks to work around: https://lore.kernel.org/linux-trace-kernel/20240823102816.5e55753b@gandalf.local.home/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240904103428.08efdf4c@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Reported-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_osnoise.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index a8e28f9b9271cf..49f10c7f7fd01d 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1612,6 +1612,7 @@ static int run_osnoise(void) static struct cpumask osnoise_cpumask; static struct cpumask save_cpumask; +static struct cpumask kthread_cpumask; /* * osnoise_sleep - sleep until the next period @@ -1675,6 +1676,7 @@ static inline int osnoise_migration_pending(void) */ mutex_lock(&interface_lock); this_cpu_osn_var()->kthread = NULL; + cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); mutex_unlock(&interface_lock); return 1; @@ -1947,9 +1949,10 @@ static void stop_kthread(unsigned int cpu) kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; if (kthread) { - if (test_bit(OSN_WORKLOAD, &osnoise_options)) { + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && + !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { kthread_stop(kthread); - } else { + } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { /* * This is a user thread waiting on the timerlat_fd. We need * to close all users, and the best way to guarantee this is @@ -2021,6 +2024,7 @@ static int start_kthread(unsigned int cpu) } per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; + cpumask_set_cpu(cpu, &kthread_cpumask); return 0; } @@ -2048,8 +2052,16 @@ static int start_per_cpu_kthreads(void) */ cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { + struct task_struct *kthread; + + kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; + if (!WARN_ON(!kthread)) + kthread_stop(kthread); + } per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + } for_each_cpu(cpu, current_mask) { retval = start_kthread(cpu); From 8c72f0b2c45f21cb8b00fc37f79f632d7e46c2ed Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Sep 2024 08:53:30 -0400 Subject: [PATCH 040/268] tracing/timerlat: Only clear timer if a kthread exists commit e6a53481da292d970d1edf0d8831121d1c5e2f0d upstream. The timerlat tracer can use user space threads to check for osnoise and timer latency. If the program using this is killed via a SIGTERM, the threads are shutdown one at a time and another tracing instance can start up resetting the threads before they are fully closed. That causes the hrtimer assigned to the kthread to be shutdown and freed twice when the dying thread finally closes the file descriptors, causing a use-after-free bug. Only cancel the hrtimer if the associated thread is still around. Also add the interface_lock around the resetting of the tlat_var->kthread. Note, this is just a quick fix that can be backported to stable. A real fix is to have a better synchronization between the shutdown of old threads and the starting of new ones. Link: https://lore.kernel.org/all/20240820130001.124768-1-tglozar@redhat.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240905085330.45985730@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Reported-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_osnoise.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 49f10c7f7fd01d..32993e380c2ff9 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -252,6 +252,11 @@ static inline struct timerlat_variables *this_cpu_tmr_var(void) return this_cpu_ptr(&per_cpu_timerlat_var); } +/* + * Protect the interface. + */ +static struct mutex interface_lock; + /* * tlat_var_reset - Reset the values of the given timerlat_variables */ @@ -259,14 +264,20 @@ static inline void tlat_var_reset(void) { struct timerlat_variables *tlat_var; int cpu; + + /* Synchronize with the timerlat interfaces */ + mutex_lock(&interface_lock); /* * So far, all the values are initialized as 0, so * zeroing the structure is perfect. */ for_each_cpu(cpu, cpu_online_mask) { tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); } + mutex_unlock(&interface_lock); } #else /* CONFIG_TIMERLAT_TRACER */ #define tlat_var_reset() do {} while (0) @@ -331,11 +342,6 @@ struct timerlat_sample { }; #endif -/* - * Protect the interface. - */ -static struct mutex interface_lock; - /* * Tracer data. */ @@ -2591,7 +2597,8 @@ static int timerlat_fd_release(struct inode *inode, struct file *file) osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); - hrtimer_cancel(&tlat_var->timer); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); osn_var->sampling = 0; From 993ecb4ec1f383e862e9d255514bbc9e3953eae7 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Tue, 27 Aug 2024 20:46:54 +0800 Subject: [PATCH 041/268] tracing: Avoid possible softlockup in tracing_iter_reset() commit 49aa8a1f4d6800721c7971ed383078257f12e8f9 upstream. In __tracing_open(), when max latency tracers took place on the cpu, the time start of its buffer would be updated, then event entries with timestamps being earlier than start of the buffer would be skipped (see tracing_iter_reset()). Softlockup will occur if the kernel is non-preemptible and too many entries were skipped in the loop that reset every cpu buffer, so add cond_resched() to avoid it. Cc: stable@vger.kernel.org Fixes: 2f26ebd549b9a ("tracing: use timestamp to determine start of latency traces") Link: https://lore.kernel.org/20240827124654.3817443-1-zhengyejian@huaweicloud.com Suggested-by: Steven Rostedt Signed-off-by: Zheng Yejian Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index fd398af792b4a5..be878005e34499 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -4156,6 +4156,8 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) break; entries++; ring_buffer_iter_advance(buf_iter); + /* This could be a big loop */ + cond_resched(); } per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; From b4fdabffae14cca2c80d99bd81f3f27239ac7f5e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 5 Sep 2024 11:33:59 -0400 Subject: [PATCH 042/268] tracing/timerlat: Add interface_lock around clearing of kthread in stop_kthread() commit 5bfbcd1ee57b607fd29e4645c7f350dd385dd9ad upstream. The timerlat interface will get and put the task that is part of the "kthread" field of the osn_var to keep it around until all references are released. But here's a race in the "stop_kthread()" code that will call put_task_struct() on the kthread if it is not a kernel thread. This can race with the releasing of the references to that task struct and the put_task_struct() can be called twice when it should have been called just once. Take the interface_lock() in stop_kthread() to synchronize this change. But to do so, the function stop_per_cpu_kthreads() needs to change the loop from for_each_online_cpu() to for_each_possible_cpu() and remove the cpu_read_lock(), as the interface_lock can not be taken while the cpu locks are held. The only side effect of this change is that it may do some extra work, as the per_cpu variables of the offline CPUs would not be set anyway, and would simply be skipped in the loop. Remove unneeded "return;" in stop_kthread(). Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Tomas Glozar Cc: John Kacur Cc: "Luis Claudio R. Goncalves" Link: https://lore.kernel.org/20240905113359.2b934242@gandalf.local.home Fixes: e88ed227f639e ("tracing/timerlat: Add user-space interface") Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_osnoise.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 32993e380c2ff9..5b06f67879f5fa 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -1953,8 +1953,12 @@ static void stop_kthread(unsigned int cpu) { struct task_struct *kthread; + mutex_lock(&interface_lock); kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; if (kthread) { + per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + mutex_unlock(&interface_lock); + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { kthread_stop(kthread); @@ -1967,8 +1971,8 @@ static void stop_kthread(unsigned int cpu) kill_pid(kthread->thread_pid, SIGKILL, 1); put_task_struct(kthread); } - per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; } else { + mutex_unlock(&interface_lock); /* if no workload, just return */ if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { /* @@ -1976,7 +1980,6 @@ static void stop_kthread(unsigned int cpu) */ per_cpu(per_cpu_osnoise_var, cpu).sampling = false; barrier(); - return; } } } @@ -1991,12 +1994,8 @@ static void stop_per_cpu_kthreads(void) { int cpu; - cpus_read_lock(); - - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) stop_kthread(cpu); - - cpus_read_unlock(); } /* From 4a594acc12d5954cdc71d4450a386748bf3d136a Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 13 Aug 2024 22:25:22 +0200 Subject: [PATCH 043/268] userfaultfd: don't BUG_ON() if khugepaged yanks our page table commit 4828d207dc5161dc7ddf9a4f6dcfd80c7dd7d20a upstream. Since khugepaged was changed to allow retracting page tables in file mappings without holding the mmap lock, these BUG_ON()s are wrong - get rid of them. We could also remove the preceding "if (unlikely(...))" block, but then we could reach pte_offset_map_lock() with transhuge pages not just for file mappings but also for anonymous mappings - which would probably be fine but I think is not necessarily expected. Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-2-5efa61078a41@google.com Fixes: 1d65b771bc08 ("mm/khugepaged: retract_page_tables() without mmap or vma lock") Signed-off-by: Jann Horn Reviewed-by: Qi Zheng Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Pavel Emelyanov Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e76faba1027971..b5b5cb1f922c8b 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -717,9 +717,10 @@ static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm, err = -EFAULT; break; } - - BUG_ON(pmd_none(*dst_pmd)); - BUG_ON(pmd_trans_huge(*dst_pmd)); + /* + * For shmem mappings, khugepaged is allowed to remove page + * tables under us; pte_offset_map_lock() will deal with that. + */ err = mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, src_addr, flags, &folio); From 3c6b4bcf37845c9359aed926324bed66bdd2448d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 13 Aug 2024 22:25:21 +0200 Subject: [PATCH 044/268] userfaultfd: fix checks for huge PMDs commit 71c186efc1b2cf1aeabfeff3b9bd5ac4c5ac14d8 upstream. Patch series "userfaultfd: fix races around pmd_trans_huge() check", v2. The pmd_trans_huge() code in mfill_atomic() is wrong in three different ways depending on kernel version: 1. The pmd_trans_huge() check is racy and can lead to a BUG_ON() (if you hit the right two race windows) - I've tested this in a kernel build with some extra mdelay() calls. See the commit message for a description of the race scenario. On older kernels (before 6.5), I think the same bug can even theoretically lead to accessing transhuge page contents as a page table if you hit the right 5 narrow race windows (I haven't tested this case). 2. As pointed out by Qi Zheng, pmd_trans_huge() is not sufficient for detecting PMDs that don't point to page tables. On older kernels (before 6.5), you'd just have to win a single fairly wide race to hit this. I've tested this on 6.1 stable by racing migration (with a mdelay() patched into try_to_migrate()) against UFFDIO_ZEROPAGE - on my x86 VM, that causes a kernel oops in ptlock_ptr(). 3. On newer kernels (>=6.5), for shmem mappings, khugepaged is allowed to yank page tables out from under us (though I haven't tested that), so I think the BUG_ON() checks in mfill_atomic() are just wrong. I decided to write two separate fixes for these (one fix for bugs 1+2, one fix for bug 3), so that the first fix can be backported to kernels affected by bugs 1+2. This patch (of 2): This fixes two issues. I discovered that the following race can occur: mfill_atomic other thread ============ ============ pmdp_get_lockless() [reads none pmd] __pte_alloc [no-op] BUG_ON(pmd_none(*dst_pmd)) I have experimentally verified this in a kernel with extra mdelay() calls; the BUG_ON(pmd_none(*dst_pmd)) triggers. On kernels newer than commit 0d940a9b270b ("mm/pgtable: allow pte_offset_map[_lock]() to fail"), this can't lead to anything worse than a BUG_ON(), since the page table access helpers are actually designed to deal with page tables concurrently disappearing; but on older kernels (<=6.4), I think we could probably theoretically race past the two BUG_ON() checks and end up treating a hugepage as a page table. The second issue is that, as Qi Zheng pointed out, there are other types of huge PMDs that pmd_trans_huge() can't catch: devmap PMDs and swap PMDs (in particular, migration PMDs). On <=6.4, this is worse than the first issue: If mfill_atomic() runs on a PMD that contains a migration entry (which just requires winning a single, fairly wide race), it will pass the PMD to pte_offset_map_lock(), which assumes that the PMD points to a page table. Breakage follows: First, the kernel tries to take the PTE lock (which will crash or maybe worse if there is no "struct page" for the address bits in the migration entry PMD - I think at least on X86 there usually is no corresponding "struct page" thanks to the PTE inversion mitigation, amd64 looks different). If that didn't crash, the kernel would next try to write a PTE into what it wrongly thinks is a page table. As part of fixing these issues, get rid of the check for pmd_trans_huge() before __pte_alloc() - that's redundant, we're going to have to check for that after the __pte_alloc() anyway. Backport note: pmdp_get_lockless() is pmd_read_atomic() in older kernels. Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-0-5efa61078a41@google.com Link: https://lkml.kernel.org/r/20240813-uffd-thp-flip-fix-v2-1-5efa61078a41@google.com Fixes: c1a4de99fada ("userfaultfd: mcopy_atomic|mfill_zeropage: UFFDIO_COPY|UFFDIO_ZEROPAGE preparation") Signed-off-by: Jann Horn Acked-by: David Hildenbrand Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Jann Horn Cc: Pavel Emelyanov Cc: Qi Zheng Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/userfaultfd.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index b5b5cb1f922c8b..92fe2a76f4b512 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -699,21 +699,23 @@ static __always_inline ssize_t mfill_atomic(struct mm_struct *dst_mm, } dst_pmdval = pmdp_get_lockless(dst_pmd); - /* - * If the dst_pmd is mapped as THP don't - * override it and just be strict. - */ - if (unlikely(pmd_trans_huge(dst_pmdval))) { - err = -EEXIST; - break; - } if (unlikely(pmd_none(dst_pmdval)) && unlikely(__pte_alloc(dst_mm, dst_pmd))) { err = -ENOMEM; break; } - /* If an huge pmd materialized from under us fail */ - if (unlikely(pmd_trans_huge(*dst_pmd))) { + dst_pmdval = pmdp_get_lockless(dst_pmd); + /* + * If the dst_pmd is THP don't override it and just be strict. + * (This includes the case where the PMD used to be THP and + * changed back to none after __pte_alloc().) + */ + if (unlikely(!pmd_present(dst_pmdval) || pmd_trans_huge(dst_pmdval) || + pmd_devmap(dst_pmdval))) { + err = -EEXIST; + break; + } + if (unlikely(pmd_bad(dst_pmdval))) { err = -EFAULT; break; } From e0d724932ad12e3528f4ce97fc0f6078d0cce4bc Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Mon, 26 Aug 2024 19:20:56 +0800 Subject: [PATCH 045/268] fscache: delete fscache_cookie_lru_timer when fscache exits to avoid UAF commit 72a6e22c604c95ddb3b10b5d3bb85b6ff4dbc34f upstream. The fscache_cookie_lru_timer is initialized when the fscache module is inserted, but is not deleted when the fscache module is removed. If timer_reduce() is called before removing the fscache module, the fscache_cookie_lru_timer will be added to the timer list of the current cpu. Afterwards, a use-after-free will be triggered in the softIRQ after removing the fscache module, as follows: ================================================================== BUG: unable to handle page fault for address: fffffbfff803c9e9 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 21ffea067 P4D 21ffea067 PUD 21ffe6067 PMD 110a7c067 PTE 0 Oops: Oops: 0000 [#1] PREEMPT SMP KASAN PTI CPU: 1 UID: 0 PID: 0 Comm: swapper/1 Tainted: G W 6.11.0-rc3 #855 Tainted: [W]=WARN RIP: 0010:__run_timer_base.part.0+0x254/0x8a0 Call Trace: tmigr_handle_remote_up+0x627/0x810 __walk_groups.isra.0+0x47/0x140 tmigr_handle_remote+0x1fa/0x2f0 handle_softirqs+0x180/0x590 irq_exit_rcu+0x84/0xb0 sysvec_apic_timer_interrupt+0x6e/0x90 asm_sysvec_apic_timer_interrupt+0x1a/0x20 RIP: 0010:default_idle+0xf/0x20 default_idle_call+0x38/0x60 do_idle+0x2b5/0x300 cpu_startup_entry+0x54/0x60 start_secondary+0x20d/0x280 common_startup_64+0x13e/0x148 Modules linked in: [last unloaded: netfs] ================================================================== Therefore delete fscache_cookie_lru_timer when removing the fscahe module. Fixes: 12bb21a29c19 ("fscache: Implement cookie user counting and resource pinning") Cc: stable@kernel.org Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240826112056.2458299-1-libaokun@huaweicloud.com Acked-by: David Howells Signed-off-by: Christian Brauner Signed-off-by: Greg Kroah-Hartman --- fs/fscache/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fscache/main.c b/fs/fscache/main.c index dad85fd84f6f9f..7a60cd96e87ea2 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -114,6 +114,7 @@ static void __exit fscache_exit(void) kmem_cache_destroy(fscache_cookie_jar); fscache_proc_cleanup(); + timer_shutdown_sync(&fscache_cookie_lru_timer); destroy_workqueue(fscache_wq); pr_notice("Unloaded\n"); } From 05e08297c3c298d8ec28e5a5adb55840312dd87e Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 4 Sep 2024 13:16:05 -0400 Subject: [PATCH 046/268] eventfs: Use list_del_rcu() for SRCU protected list variable commit d2603279c7d645bf0d11fa253b23f1ab48fc8d3c upstream. Chi Zhiling reported: We found a null pointer accessing in tracefs[1], the reason is that the variable 'ei_child' is set to LIST_POISON1, that means the list was removed in eventfs_remove_rec. so when access the ei_child->is_freed, the panic triggered. by the way, the following script can reproduce this panic loop1 (){ while true do echo "p:kp submit_bio" > /sys/kernel/debug/tracing/kprobe_events echo "" > /sys/kernel/debug/tracing/kprobe_events done } loop2 (){ while true do tree /sys/kernel/debug/tracing/events/kprobes/ done } loop1 & loop2 [1]: [ 1147.959632][T17331] Unable to handle kernel paging request at virtual address dead000000000150 [ 1147.968239][T17331] Mem abort info: [ 1147.971739][T17331] ESR = 0x0000000096000004 [ 1147.976172][T17331] EC = 0x25: DABT (current EL), IL = 32 bits [ 1147.982171][T17331] SET = 0, FnV = 0 [ 1147.985906][T17331] EA = 0, S1PTW = 0 [ 1147.989734][T17331] FSC = 0x04: level 0 translation fault [ 1147.995292][T17331] Data abort info: [ 1147.998858][T17331] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 1148.005023][T17331] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 1148.010759][T17331] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 1148.016752][T17331] [dead000000000150] address between user and kernel address ranges [ 1148.024571][T17331] Internal error: Oops: 0000000096000004 [#1] SMP [ 1148.030825][T17331] Modules linked in: team_mode_loadbalance team nlmon act_gact cls_flower sch_ingress bonding tls macvlan dummy ib_core bridge stp llc veth amdgpu amdxcp mfd_core gpu_sched drm_exec drm_buddy radeon crct10dif_ce video drm_suballoc_helper ghash_ce drm_ttm_helper sha2_ce ttm sha256_arm64 i2c_algo_bit sha1_ce sbsa_gwdt cp210x drm_display_helper cec sr_mod cdrom drm_kms_helper binfmt_misc sg loop fuse drm dm_mod nfnetlink ip_tables autofs4 [last unloaded: tls] [ 1148.072808][T17331] CPU: 3 PID: 17331 Comm: ls Tainted: G W ------- ---- 6.6.43 #2 [ 1148.081751][T17331] Source Version: 21b3b386e948bedd29369af66f3e98ab01b1c650 [ 1148.088783][T17331] Hardware name: Greatwall GW-001M1A-FTF/GW-001M1A-FTF, BIOS KunLun BIOS V4.0 07/16/2020 [ 1148.098419][T17331] pstate: 20000005 (nzCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 1148.106060][T17331] pc : eventfs_iterate+0x2c0/0x398 [ 1148.111017][T17331] lr : eventfs_iterate+0x2fc/0x398 [ 1148.115969][T17331] sp : ffff80008d56bbd0 [ 1148.119964][T17331] x29: ffff80008d56bbf0 x28: ffff001ff5be2600 x27: 0000000000000000 [ 1148.127781][T17331] x26: ffff001ff52ca4e0 x25: 0000000000009977 x24: dead000000000100 [ 1148.135598][T17331] x23: 0000000000000000 x22: 000000000000000b x21: ffff800082645f10 [ 1148.143415][T17331] x20: ffff001fddf87c70 x19: ffff80008d56bc90 x18: 0000000000000000 [ 1148.151231][T17331] x17: 0000000000000000 x16: 0000000000000000 x15: ffff001ff52ca4e0 [ 1148.159048][T17331] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 [ 1148.166864][T17331] x11: 0000000000000000 x10: 0000000000000000 x9 : ffff8000804391d0 [ 1148.174680][T17331] x8 : 0000000180000000 x7 : 0000000000000018 x6 : 0000aaab04b92862 [ 1148.182498][T17331] x5 : 0000aaab04b92862 x4 : 0000000080000000 x3 : 0000000000000068 [ 1148.190314][T17331] x2 : 000000000000000f x1 : 0000000000007ea8 x0 : 0000000000000001 [ 1148.198131][T17331] Call trace: [ 1148.201259][T17331] eventfs_iterate+0x2c0/0x398 [ 1148.205864][T17331] iterate_dir+0x98/0x188 [ 1148.210036][T17331] __arm64_sys_getdents64+0x78/0x160 [ 1148.215161][T17331] invoke_syscall+0x78/0x108 [ 1148.219593][T17331] el0_svc_common.constprop.0+0x48/0xf0 [ 1148.224977][T17331] do_el0_svc+0x24/0x38 [ 1148.228974][T17331] el0_svc+0x40/0x168 [ 1148.232798][T17331] el0t_64_sync_handler+0x120/0x130 [ 1148.237836][T17331] el0t_64_sync+0x1a4/0x1a8 [ 1148.242182][T17331] Code: 54ffff6c f9400676 910006d6 f9000676 (b9405300) [ 1148.248955][T17331] ---[ end trace 0000000000000000 ]--- The issue is that list_del() is used on an SRCU protected list variable before the synchronization occurs. This can poison the list pointers while there is a reader iterating the list. This is simply fixed by using list_del_rcu() that is specifically made for this purpose. Link: https://lore.kernel.org/linux-trace-kernel/20240829085025.3600021-1-chizhiling@163.com/ Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20240904131605.640d42b1@gandalf.local.home Fixes: 43aa6f97c2d03 ("eventfs: Get rid of dentry pointers without refcounts") Reported-by: Chi Zhiling Tested-by: Chi Zhiling Signed-off-by: Steven Rostedt (Google) Signed-off-by: Greg Kroah-Hartman --- fs/tracefs/event_inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index aa54be1ce12428..73a08db3b7a851 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -935,7 +935,7 @@ static void eventfs_remove_rec(struct eventfs_inode *ei, int level) list_for_each_entry(ei_child, &ei->children, list) eventfs_remove_rec(ei_child, level + 1); - list_del(&ei->list); + list_del_rcu(&ei->list); free_ei(ei); } From 9e0bff4900b5d412a9bafe4baeaa6facd34f671c Mon Sep 17 00:00:00 2001 From: Souradeep Chakrabarti Date: Mon, 2 Sep 2024 05:43:47 -0700 Subject: [PATCH 047/268] net: mana: Fix error handling in mana_create_txq/rxq's NAPI cleanup commit b6ecc662037694488bfff7c9fd21c405df8411f2 upstream. Currently napi_disable() gets called during rxq and txq cleanup, even before napi is enabled and hrtimer is initialized. It causes kernel panic. ? page_fault_oops+0x136/0x2b0 ? page_counter_cancel+0x2e/0x80 ? do_user_addr_fault+0x2f2/0x640 ? refill_obj_stock+0xc4/0x110 ? exc_page_fault+0x71/0x160 ? asm_exc_page_fault+0x27/0x30 ? __mmdrop+0x10/0x180 ? __mmdrop+0xec/0x180 ? hrtimer_active+0xd/0x50 hrtimer_try_to_cancel+0x2c/0xf0 hrtimer_cancel+0x15/0x30 napi_disable+0x65/0x90 mana_destroy_rxq+0x4c/0x2f0 mana_create_rxq.isra.0+0x56c/0x6d0 ? mana_uncfg_vport+0x50/0x50 mana_alloc_queues+0x21b/0x320 ? skb_dequeue+0x5f/0x80 Cc: stable@vger.kernel.org Fixes: e1b5683ff62e ("net: mana: Move NAPI from EQ to CQ") Signed-off-by: Souradeep Chakrabarti Reviewed-by: Haiyang Zhang Reviewed-by: Shradha Gupta Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/microsoft/mana/mana_en.c | 22 +++++++++++-------- include/net/mana/mana.h | 2 ++ 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 765c5fc158f8f2..d8cce3771af213 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1858,10 +1858,12 @@ static void mana_destroy_txq(struct mana_port_context *apc) for (i = 0; i < apc->num_queues; i++) { napi = &apc->tx_qp[i].tx_cq.napi; - napi_synchronize(napi); - napi_disable(napi); - netif_napi_del(napi); - + if (apc->tx_qp[i].txq.napi_initialized) { + napi_synchronize(napi); + napi_disable(napi); + netif_napi_del(napi); + apc->tx_qp[i].txq.napi_initialized = false; + } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); mana_deinit_cq(apc, &apc->tx_qp[i].tx_cq); @@ -1917,6 +1919,7 @@ static int mana_create_txq(struct mana_port_context *apc, txq->ndev = net; txq->net_txq = netdev_get_tx_queue(net, i); txq->vp_offset = apc->tx_vp_offset; + txq->napi_initialized = false; skb_queue_head_init(&txq->pending_skbs); memset(&spec, 0, sizeof(spec)); @@ -1983,6 +1986,7 @@ static int mana_create_txq(struct mana_port_context *apc, netif_napi_add_tx(net, &cq->napi, mana_poll); napi_enable(&cq->napi); + txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); } @@ -1994,7 +1998,7 @@ static int mana_create_txq(struct mana_port_context *apc, } static void mana_destroy_rxq(struct mana_port_context *apc, - struct mana_rxq *rxq, bool validate_state) + struct mana_rxq *rxq, bool napi_initialized) { struct gdma_context *gc = apc->ac->gdma_dev->gdma_context; @@ -2009,15 +2013,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc, napi = &rxq->rx_cq.napi; - if (validate_state) + if (napi_initialized) { napi_synchronize(napi); - napi_disable(napi); + napi_disable(napi); + netif_napi_del(napi); + } xdp_rxq_info_unreg(&rxq->xdp_rxq); - netif_napi_del(napi); - mana_destroy_wq_obj(apc, GDMA_RQ, rxq->rxobj); mana_deinit_cq(apc, &rxq->rx_cq); diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 37ccce56a73e87..28e110f733ffd3 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -97,6 +97,8 @@ struct mana_txq { atomic_t pending_sends; + bool napi_initialized; + struct mana_stats_tx stats; }; From ec36815215d76b1eb791ba66170b3a0d1d937b49 Mon Sep 17 00:00:00 2001 From: Matt Johnston Date: Thu, 29 Aug 2024 15:43:46 +0800 Subject: [PATCH 048/268] net: mctp-serial: Fix missing escapes on transmit commit f962e8361adfa84e8252d3fc3e5e6bb879f029b1 upstream. 0x7d and 0x7e bytes are meant to be escaped in the data portion of frames, but this didn't occur since next_chunk_len() had an off-by-one error. That also resulted in the final byte of a payload being written as a separate tty write op. The chunk prior to an escaped byte would be one byte short, and the next call would never test the txpos+1 case, which is where the escaped byte was located. That meant it never hit the escaping case in mctp_serial_tx_work(). Example Input: 01 00 08 c8 7e 80 02 Previous incorrect chunks from next_chunk_len(): 01 00 08 c8 7e 80 02 With this fix: 01 00 08 c8 7e 80 02 Cc: stable@vger.kernel.org Fixes: a0c2ccd9b5ad ("mctp: Add MCTP-over-serial transport binding") Signed-off-by: Matt Johnston Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/mctp/mctp-serial.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c index 5bf6fdff701cd6..346e6ad36054eb 100644 --- a/drivers/net/mctp/mctp-serial.c +++ b/drivers/net/mctp/mctp-serial.c @@ -91,8 +91,8 @@ static int next_chunk_len(struct mctp_serial *dev) * will be those non-escaped bytes, and does not include the escaped * byte. */ - for (i = 1; i + dev->txpos + 1 < dev->txlen; i++) { - if (needs_escape(dev->txbuf[dev->txpos + i + 1])) + for (i = 1; i + dev->txpos < dev->txlen; i++) { + if (needs_escape(dev->txbuf[dev->txpos + i])) break; } From 55c834bc9f822af0b799ced3deb4fa9944e77d31 Mon Sep 17 00:00:00 2001 From: Mitchell Levy Date: Mon, 12 Aug 2024 13:44:12 -0700 Subject: [PATCH 049/268] x86/fpu: Avoid writing LBR bit to IA32_XSS unless supported commit 2848ff28d180bd63a95da8e5dcbcdd76c1beeb7b upstream. There are two distinct CPU features related to the use of XSAVES and LBR: whether LBR is itself supported and whether XSAVES supports LBR. The LBR subsystem correctly checks both in intel_pmu_arch_lbr_init(), but the XSTATE subsystem does not. The LBR bit is only removed from xfeatures_mask_independent when LBR is not supported by the CPU, but there is no validation of XSTATE support. If XSAVES does not support LBR the write to IA32_XSS causes a #GP fault, leaving the state of IA32_XSS unchanged, i.e. zero. The fault is handled with a warning and the boot continues. Consequently the next XRSTORS which tries to restore supervisor state fails with #GP because the RFBM has zero for all supervisor features, which does not match the XCOMP_BV field. As XFEATURE_MASK_FPSTATE includes supervisor features setting up the FPU causes a #GP, which ends up in fpu_reset_from_exception_fixup(). That fails due to the same problem resulting in recursive #GPs until the kernel runs out of stack space and double faults. Prevent this by storing the supported independent features in fpu_kernel_cfg during XSTATE initialization and use that cached value for retrieving the independent feature bits to be written into IA32_XSS. [ tglx: Massaged change log ] Fixes: f0dccc9da4c0 ("x86/fpu/xstate: Support dynamic supervisor feature for LBR") Suggested-by: Thomas Gleixner Signed-off-by: Mitchell Levy Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240812-xsave-lbr-fix-v3-1-95bac1bf62f4@gmail.com Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/fpu/types.h | 7 +++++++ arch/x86/kernel/fpu/xstate.c | 3 +++ arch/x86/kernel/fpu/xstate.h | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index eb810074f1e745..fd5fb43d920b44 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -589,6 +589,13 @@ struct fpu_state_config { * even without XSAVE support, i.e. legacy features FP + SSE */ u64 legacy_features; + /* + * @independent_features: + * + * Features that are supported by XSAVES, but not managed as part of + * the FPU core, such as LBR + */ + u64 independent_features; }; /* FPU state configuration information */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 571a43b3105df4..255ff8f6c52705 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -788,6 +788,9 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) goto out_disable; } + fpu_kernel_cfg.independent_features = fpu_kernel_cfg.max_features & + XFEATURE_MASK_INDEPENDENT; + /* * Clear XSAVE features that are disabled in the normal CPUID. */ diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 19ca623ffa2ac7..544224611e23c5 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -64,9 +64,9 @@ static inline u64 xfeatures_mask_supervisor(void) static inline u64 xfeatures_mask_independent(void) { if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) - return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR; + return fpu_kernel_cfg.independent_features & ~XFEATURE_MASK_LBR; - return XFEATURE_MASK_INDEPENDENT; + return fpu_kernel_cfg.independent_features; } /* XSAVE/XRSTOR wrapper functions */ From 73d20d08d39edd690fd7a88a52a07c3083ce2bca Mon Sep 17 00:00:00 2001 From: Yuntao Wang Date: Tue, 13 Aug 2024 09:48:27 +0800 Subject: [PATCH 050/268] x86/apic: Make x2apic_disable() work correctly commit 0ecc5be200c84e67114f3640064ba2bae3ba2f5a upstream. x2apic_disable() clears x2apic_state and x2apic_mode unconditionally, even when the state is X2APIC_ON_LOCKED, which prevents the kernel to disable it thereby creating inconsistent state. Due to the early state check for X2APIC_ON, the code path which warns about a locked X2APIC cannot be reached. Test for state < X2APIC_ON instead and move the clearing of the state and mode variables to the place which actually disables X2APIC. [ tglx: Massaged change log. Added Fixes tag. Moved clearing so it's at the right place for back ports ] Fixes: a57e456a7b28 ("x86/apic: Fix fallout from x2apic cleanup") Signed-off-by: Yuntao Wang Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240813014827.895381-1-yuntao.wang@linux.dev Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/apic/apic.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 06d5a7eeee81a9..f2e605ee75d966 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1812,12 +1812,9 @@ static __init void apic_set_fixmap(bool read_apic); static __init void x2apic_disable(void) { - u32 x2apic_id, state = x2apic_state; + u32 x2apic_id; - x2apic_mode = 0; - x2apic_state = X2APIC_DISABLED; - - if (state != X2APIC_ON) + if (x2apic_state < X2APIC_ON) return; x2apic_id = read_apic_id(); @@ -1830,6 +1827,10 @@ static __init void x2apic_disable(void) } __x2apic_disable(); + + x2apic_mode = 0; + x2apic_state = X2APIC_DISABLED; + /* * Don't reread the APIC ID as it was already done from * check_x2apic() and the APIC driver still is a x2APIC variant, From 94479011f4f551b4b1e010776a131512402b27bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Sep 2024 14:24:38 -0400 Subject: [PATCH 051/268] Revert "drm/amdgpu: align pp_power_profile_mode with kernel docs" commit 1a8d845470941f1b6de1b392227530c097dc5e0c upstream. This reverts commit 8f614469de248a4bc55fb07e55d5f4c340c75b11. This breaks some manual setting of the profile mode in certain cases. Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3600 Signed-off-by: Alex Deucher (cherry picked from commit 7a199557643e993d4e7357860624b8aa5d8f4340) Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index c9a9c758531bcc..c0d7fe5102e6d3 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -1883,7 +1883,8 @@ static int smu_adjust_power_state_dynamic(struct smu_context *smu, smu_dpm_ctx->dpm_level = level; } - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) { index = fls(smu->workload_mask); index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0; workload[0] = smu->workload_setting[index]; @@ -1962,7 +1963,8 @@ static int smu_switch_power_profile(void *handle, workload[0] = smu->workload_setting[index]; } - if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) + if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL && + smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) smu_bump_power_profile_mode(smu, workload, 0); return 0; From c8219a27fa43a2cbf99f5176f6dddfe73e7a24ae Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 20 Aug 2024 20:07:44 -0700 Subject: [PATCH 052/268] tcp_bpf: fix return value of tcp_bpf_sendmsg() commit fe1910f9337bd46a9343967b547ccab26b4b2c6e upstream. When we cork messages in psock->cork, the last message triggers the flushing will result in sending a sk_msg larger than the current message size. In this case, in tcp_bpf_send_verdict(), 'copied' becomes negative at least in the following case: 468 case __SK_DROP: 469 default: 470 sk_msg_free_partial(sk, msg, tosend); 471 sk_msg_apply_bytes(psock, tosend); 472 *copied -= (tosend + delta); // <==== HERE 473 return -EACCES; Therefore, it could lead to the following BUG with a proper value of 'copied' (thanks to syzbot). We should not use negative 'copied' as a return value here. ------------[ cut here ]------------ kernel BUG at net/socket.c:733! Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Modules linked in: CPU: 0 UID: 0 PID: 3265 Comm: syz-executor510 Not tainted 6.11.0-rc3-syzkaller-00060-gd07b43284ab3 #0 Hardware name: linux,dummy-virt (DT) pstate: 61400009 (nZCv daif +PAN -UAO -TCO +DIT -SSBS BTYPE=--) pc : sock_sendmsg_nosec net/socket.c:733 [inline] pc : sock_sendmsg_nosec net/socket.c:728 [inline] pc : __sock_sendmsg+0x5c/0x60 net/socket.c:745 lr : sock_sendmsg_nosec net/socket.c:730 [inline] lr : __sock_sendmsg+0x54/0x60 net/socket.c:745 sp : ffff800088ea3b30 x29: ffff800088ea3b30 x28: fbf00000062bc900 x27: 0000000000000000 x26: ffff800088ea3bc0 x25: ffff800088ea3bc0 x24: 0000000000000000 x23: f9f00000048dc000 x22: 0000000000000000 x21: ffff800088ea3d90 x20: f9f00000048dc000 x19: ffff800088ea3d90 x18: 0000000000000001 x17: 0000000000000000 x16: 0000000000000000 x15: 000000002002ffaf x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: ffff8000815849c0 x9 : ffff8000815b49c0 x8 : 0000000000000000 x7 : 000000000000003f x6 : 0000000000000000 x5 : 00000000000007e0 x4 : fff07ffffd239000 x3 : fbf00000062bc900 x2 : 0000000000000000 x1 : 0000000000000000 x0 : 00000000fffffdef Call trace: sock_sendmsg_nosec net/socket.c:733 [inline] __sock_sendmsg+0x5c/0x60 net/socket.c:745 ____sys_sendmsg+0x274/0x2ac net/socket.c:2597 ___sys_sendmsg+0xac/0x100 net/socket.c:2651 __sys_sendmsg+0x84/0xe0 net/socket.c:2680 __do_sys_sendmsg net/socket.c:2689 [inline] __se_sys_sendmsg net/socket.c:2687 [inline] __arm64_sys_sendmsg+0x24/0x30 net/socket.c:2687 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x48/0x110 arch/arm64/kernel/syscall.c:49 el0_svc_common.constprop.0+0x40/0xe0 arch/arm64/kernel/syscall.c:132 do_el0_svc+0x1c/0x28 arch/arm64/kernel/syscall.c:151 el0_svc+0x34/0xec arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x100/0x12c arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x19c/0x1a0 arch/arm64/kernel/entry.S:598 Code: f9404463 d63f0060 3108441f 54fffe81 (d4210000) ---[ end trace 0000000000000000 ]--- Fixes: 4f738adba30a ("bpf: create tcp_bpf_ulp allowing BPF to monitor socket TX/RX data") Reported-by: syzbot+58c03971700330ce14d8@syzkaller.appspotmail.com Cc: Jakub Sitnicki Signed-off-by: Cong Wang Reviewed-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/20240821030744.320934-1-xiyou.wangcong@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 53b0d62fd2c2db..fe6178715ba05f 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -577,7 +577,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) err = sk_stream_error(sk, msg->msg_flags, err); release_sock(sk); sk_psock_put(sk, psock); - return copied ? copied : err; + return copied > 0 ? copied : err; } enum { From 18a5a16940464b301ea91bf5da3a324aedb347b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Sep 2024 14:44:18 +0000 Subject: [PATCH 053/268] ila: call nf_unregister_net_hooks() sooner commit 031ae72825cef43e4650140b800ad58bf7a6a466 upstream. syzbot found an use-after-free Read in ila_nf_input [1] Issue here is that ila_xlat_exit_net() frees the rhashtable, then call nf_unregister_net_hooks(). It should be done in the reverse way, with a synchronize_rcu(). This is a good match for a pre_exit() method. [1] BUG: KASAN: use-after-free in rht_key_hashfn include/linux/rhashtable.h:159 [inline] BUG: KASAN: use-after-free in __rhashtable_lookup include/linux/rhashtable.h:604 [inline] BUG: KASAN: use-after-free in rhashtable_lookup include/linux/rhashtable.h:646 [inline] BUG: KASAN: use-after-free in rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 Read of size 4 at addr ffff888064620008 by task ksoftirqd/0/16 CPU: 0 UID: 0 PID: 16 Comm: ksoftirqd/0 Not tainted 6.11.0-rc4-syzkaller-00238-g2ad6d23f465a #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 Call Trace: __dump_stack lib/dump_stack.c:93 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:119 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 rht_key_hashfn include/linux/rhashtable.h:159 [inline] __rhashtable_lookup include/linux/rhashtable.h:604 [inline] rhashtable_lookup include/linux/rhashtable.h:646 [inline] rhashtable_lookup_fast+0x77a/0x9b0 include/linux/rhashtable.h:672 ila_lookup_wildcards net/ipv6/ila/ila_xlat.c:132 [inline] ila_xlat_addr net/ipv6/ila/ila_xlat.c:652 [inline] ila_nf_input+0x1fe/0x3c0 net/ipv6/ila/ila_xlat.c:190 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xc3/0x220 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] NF_HOOK+0x29e/0x450 include/linux/netfilter.h:312 __netif_receive_skb_one_core net/core/dev.c:5661 [inline] __netif_receive_skb+0x1ea/0x650 net/core/dev.c:5775 process_backlog+0x662/0x15b0 net/core/dev.c:6108 __napi_poll+0xcb/0x490 net/core/dev.c:6772 napi_poll net/core/dev.c:6841 [inline] net_rx_action+0x89b/0x1240 net/core/dev.c:6963 handle_softirqs+0x2c4/0x970 kernel/softirq.c:554 run_ksoftirqd+0xca/0x130 kernel/softirq.c:928 smpboot_thread_fn+0x544/0xa30 kernel/smpboot.c:164 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 The buggy address belongs to the physical page: page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x64620 flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff) page_type: 0xbfffffff(buddy) raw: 00fff00000000000 ffffea0000959608 ffffea00019d9408 0000000000000000 raw: 0000000000000000 0000000000000003 00000000bfffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as freed page last allocated via order 3, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 5242, tgid 5242 (syz-executor), ts 73611328570, free_ts 618981657187 set_page_owner include/linux/page_owner.h:32 [inline] post_alloc_hook+0x1f3/0x230 mm/page_alloc.c:1493 prep_new_page mm/page_alloc.c:1501 [inline] get_page_from_freelist+0x2e4c/0x2f10 mm/page_alloc.c:3439 __alloc_pages_noprof+0x256/0x6c0 mm/page_alloc.c:4695 __alloc_pages_node_noprof include/linux/gfp.h:269 [inline] alloc_pages_node_noprof include/linux/gfp.h:296 [inline] ___kmalloc_large_node+0x8b/0x1d0 mm/slub.c:4103 __kmalloc_large_node_noprof+0x1a/0x80 mm/slub.c:4130 __do_kmalloc_node mm/slub.c:4146 [inline] __kmalloc_node_noprof+0x2d2/0x440 mm/slub.c:4164 __kvmalloc_node_noprof+0x72/0x190 mm/util.c:650 bucket_table_alloc lib/rhashtable.c:186 [inline] rhashtable_init_noprof+0x534/0xa60 lib/rhashtable.c:1071 ila_xlat_init_net+0xa0/0x110 net/ipv6/ila/ila_xlat.c:613 ops_init+0x359/0x610 net/core/net_namespace.c:139 setup_net+0x515/0xca0 net/core/net_namespace.c:343 copy_net_ns+0x4e2/0x7b0 net/core/net_namespace.c:508 create_new_namespaces+0x425/0x7b0 kernel/nsproxy.c:110 unshare_nsproxy_namespaces+0x124/0x180 kernel/nsproxy.c:228 ksys_unshare+0x619/0xc10 kernel/fork.c:3328 __do_sys_unshare kernel/fork.c:3399 [inline] __se_sys_unshare kernel/fork.c:3397 [inline] __x64_sys_unshare+0x38/0x40 kernel/fork.c:3397 page last free pid 11846 tgid 11846 stack trace: reset_page_owner include/linux/page_owner.h:25 [inline] free_pages_prepare mm/page_alloc.c:1094 [inline] free_unref_page+0xd22/0xea0 mm/page_alloc.c:2612 __folio_put+0x2c8/0x440 mm/swap.c:128 folio_put include/linux/mm.h:1486 [inline] free_large_kmalloc+0x105/0x1c0 mm/slub.c:4565 kfree+0x1c4/0x360 mm/slub.c:4588 rhashtable_free_and_destroy+0x7c6/0x920 lib/rhashtable.c:1169 ila_xlat_exit_net+0x55/0x110 net/ipv6/ila/ila_xlat.c:626 ops_exit_list net/core/net_namespace.c:173 [inline] cleanup_net+0x802/0xcc0 net/core/net_namespace.c:640 process_one_work kernel/workqueue.c:3231 [inline] process_scheduled_works+0xa2c/0x1830 kernel/workqueue.c:3312 worker_thread+0x86d/0xd40 kernel/workqueue.c:3390 kthread+0x2f0/0x390 kernel/kthread.c:389 ret_from_fork+0x4b/0x80 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 Memory state around the buggy address: ffff88806461ff00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88806461ff80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff888064620000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff888064620080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff888064620100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Tom Herbert Reviewed-by: Florian Westphal Link: https://patch.msgid.link/20240904144418.1162839-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ila/ila.h | 1 + net/ipv6/ila/ila_main.c | 6 ++++++ net/ipv6/ila/ila_xlat.c | 13 +++++++++---- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index ad5f6f6ba33302..85b92917849bff 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -108,6 +108,7 @@ int ila_lwt_init(void); void ila_lwt_fini(void); int ila_xlat_init_net(struct net *net); +void ila_xlat_pre_exit_net(struct net *net); void ila_xlat_exit_net(struct net *net); int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ipv6/ila/ila_main.c b/net/ipv6/ila/ila_main.c index 69caed07315f0c..976c78efbae170 100644 --- a/net/ipv6/ila/ila_main.c +++ b/net/ipv6/ila/ila_main.c @@ -71,6 +71,11 @@ static __net_init int ila_init_net(struct net *net) return err; } +static __net_exit void ila_pre_exit_net(struct net *net) +{ + ila_xlat_pre_exit_net(net); +} + static __net_exit void ila_exit_net(struct net *net) { ila_xlat_exit_net(net); @@ -78,6 +83,7 @@ static __net_exit void ila_exit_net(struct net *net) static struct pernet_operations ila_net_ops = { .init = ila_init_net, + .pre_exit = ila_pre_exit_net, .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 67e8c9440977a4..534a4498e280d7 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -619,6 +619,15 @@ int ila_xlat_init_net(struct net *net) return 0; } +void ila_xlat_pre_exit_net(struct net *net) +{ + struct ila_net *ilan = net_generic(net, ila_net_id); + + if (ilan->xlat.hooks_registered) + nf_unregister_net_hooks(net, ila_nf_hook_ops, + ARRAY_SIZE(ila_nf_hook_ops)); +} + void ila_xlat_exit_net(struct net *net) { struct ila_net *ilan = net_generic(net, ila_net_id); @@ -626,10 +635,6 @@ void ila_xlat_exit_net(struct net *net) rhashtable_free_and_destroy(&ilan->xlat.rhash_table, ila_free_cb, NULL); free_bucket_spinlocks(ilan->xlat.locks); - - if (ilan->xlat.hooks_registered) - nf_unregister_net_hooks(net, ila_nf_hook_ops, - ARRAY_SIZE(ila_nf_hook_ops)); } static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) From d4a9039a7b3d8005b90c7b1a55a306444f0e5447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 3 Sep 2024 18:08:45 +0200 Subject: [PATCH 054/268] sched: sch_cake: fix bulk flow accounting logic for host fairness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 546ea84d07e3e324644025e2aae2d12ea4c5896e upstream. In sch_cake, we keep track of the count of active bulk flows per host, when running in dst/src host fairness mode, which is used as the round-robin weight when iterating through flows. The count of active bulk flows is updated whenever a flow changes state. This has a peculiar interaction with the hash collision handling: when a hash collision occurs (after the set-associative hashing), the state of the hash bucket is simply updated to match the new packet that collided, and if host fairness is enabled, that also means assigning new per-host state to the flow. For this reason, the bulk flow counters of the host(s) assigned to the flow are decremented, before new state is assigned (and the counters, which may not belong to the same host anymore, are incremented again). Back when this code was introduced, the host fairness mode was always enabled, so the decrement was unconditional. When the configuration flags were introduced the *increment* was made conditional, but the *decrement* was not. Which of course can lead to a spurious decrement (and associated wrap-around to U16_MAX). AFAICT, when host fairness is disabled, the decrement and wrap-around happens as soon as a hash collision occurs (which is not that common in itself, due to the set-associative hashing). However, in most cases this is harmless, as the value is only used when host fairness mode is enabled. So in order to trigger an array overflow, sch_cake has to first be configured with host fairness disabled, and while running in this mode, a hash collision has to occur to cause the overflow. Then, the qdisc has to be reconfigured to enable host fairness, which leads to the array out-of-bounds because the wrapped-around value is retained and used as an array index. It seems that syzbot managed to trigger this, which is quite impressive in its own right. This patch fixes the issue by introducing the same conditional check on decrement as is used on increment. The original bug predates the upstreaming of cake, but the commit listed in the Fixes tag touched that code, meaning that this patch won't apply before that. Fixes: 712639929912 ("sch_cake: Make the dual modes fairer") Reported-by: syzbot+7fe7b81d602cc1e6b94d@syzkaller.appspotmail.com Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20240903160846.20909-1-toke@redhat.com Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_cake.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9cff99558694dc..30955dd45779e2 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -786,12 +786,15 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, * queue, accept the collision, update the host tags. */ q->way_collisions++; - if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { - q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; - q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; - } allocate_src = cake_dsrc(flow_mode); allocate_dst = cake_ddst(flow_mode); + + if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { + if (allocate_src) + q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; + if (allocate_dst) + q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; + } found: /* reserve queue for future packets in same flow */ reduced_hash = outer_hash + k; From 9d8c3a585d564d776ee60d4aabec59b404be7403 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 10 Aug 2024 15:52:42 +0900 Subject: [PATCH 055/268] nilfs2: fix missing cleanup on rollforward recovery error commit 5787fcaab9eb5930f5378d6a1dd03d916d146622 upstream. In an error injection test of a routine for mount-time recovery, KASAN found a use-after-free bug. It turned out that if data recovery was performed using partial logs created by dsync writes, but an error occurred before starting the log writer to create a recovered checkpoint, the inodes whose data had been recovered were left in the ns_dirty_files list of the nilfs object and were not freed. Fix this issue by cleaning up inodes that have read the recovery data if the recovery routine fails midway before the log writer starts. Link: https://lkml.kernel.org/r/20240810065242.3701-1-konishi.ryusuke@gmail.com Fixes: 0f3e1c7f23f8 ("nilfs2: recovery functions") Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/recovery.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index a9b8d77c8c1d55..ce30b51ac593cd 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -708,6 +708,33 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, brelse(bh); } +/** + * nilfs_abort_roll_forward - cleaning up after a failed rollforward recovery + * @nilfs: nilfs object + */ +static void nilfs_abort_roll_forward(struct the_nilfs *nilfs) +{ + struct nilfs_inode_info *ii, *n; + LIST_HEAD(head); + + /* Abandon inodes that have read recovery data */ + spin_lock(&nilfs->ns_inode_lock); + list_splice_init(&nilfs->ns_dirty_files, &head); + spin_unlock(&nilfs->ns_inode_lock); + if (list_empty(&head)) + return; + + set_nilfs_purging(nilfs); + list_for_each_entry_safe(ii, n, &head, i_dirty) { + spin_lock(&nilfs->ns_inode_lock); + list_del_init(&ii->i_dirty); + spin_unlock(&nilfs->ns_inode_lock); + + iput(&ii->vfs_inode); + } + clear_nilfs_purging(nilfs); +} + /** * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint * @nilfs: nilfs object @@ -766,15 +793,19 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, if (unlikely(err)) { nilfs_err(sb, "error %d writing segment for recovery", err); - goto failed; + goto put_root; } nilfs_finish_roll_forward(nilfs, ri); } - failed: +put_root: nilfs_put_root(root); return err; + +failed: + nilfs_abort_roll_forward(nilfs); + goto put_root; } /** From 8c6e43b3d5f109cf9c61bc188fcc8175404e924f Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 11 Aug 2024 19:03:20 +0900 Subject: [PATCH 056/268] nilfs2: protect references to superblock parameters exposed in sysfs commit 683408258917541bdb294cd717c210a04381931e upstream. The superblock buffers of nilfs2 can not only be overwritten at runtime for modifications/repairs, but they are also regularly swapped, replaced during resizing, and even abandoned when degrading to one side due to backing device issues. So, accessing them requires mutual exclusion using the reader/writer semaphore "nilfs->ns_sem". Some sysfs attribute show methods read this superblock buffer without the necessary mutual exclusion, which can cause problems with pointer dereferencing and memory access, so fix it. Link: https://lkml.kernel.org/r/20240811100320.9913-1-konishi.ryusuke@gmail.com Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/ group") Signed-off-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/sysfs.c | 43 +++++++++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 379d22e28ed62c..905c7eadf9676d 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -836,9 +836,15 @@ ssize_t nilfs_dev_revision_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u32 major = le32_to_cpu(sbp[0]->s_rev_level); - u16 minor = le16_to_cpu(sbp[0]->s_minor_rev_level); + struct nilfs_super_block *raw_sb; + u32 major; + u16 minor; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + major = le32_to_cpu(raw_sb->s_rev_level); + minor = le16_to_cpu(raw_sb->s_minor_rev_level); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%d.%d\n", major, minor); } @@ -856,8 +862,13 @@ ssize_t nilfs_dev_device_size_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; - u64 dev_size = le64_to_cpu(sbp[0]->s_dev_size); + struct nilfs_super_block *raw_sb; + u64 dev_size; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + dev_size = le64_to_cpu(raw_sb->s_dev_size); + up_read(&nilfs->ns_sem); return sysfs_emit(buf, "%llu\n", dev_size); } @@ -879,9 +890,15 @@ ssize_t nilfs_dev_uuid_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; - return sysfs_emit(buf, "%pUb\n", sbp[0]->s_uuid); + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = sysfs_emit(buf, "%pUb\n", raw_sb->s_uuid); + up_read(&nilfs->ns_sem); + + return len; } static @@ -889,10 +906,16 @@ ssize_t nilfs_dev_volume_name_show(struct nilfs_dev_attr *attr, struct the_nilfs *nilfs, char *buf) { - struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_super_block *raw_sb; + ssize_t len; + + down_read(&nilfs->ns_sem); + raw_sb = nilfs->ns_sbp[0]; + len = scnprintf(buf, sizeof(raw_sb->s_volume_name), "%s\n", + raw_sb->s_volume_name); + up_read(&nilfs->ns_sem); - return scnprintf(buf, sizeof(sbp[0]->s_volume_name), "%s\n", - sbp[0]->s_volume_name); + return len; } static const char dev_readme_str[] = From 0a1a961bde4351dc047ffdeb2f1311ca16a700cc Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 14 Aug 2024 19:11:19 +0900 Subject: [PATCH 057/268] nilfs2: fix state management in error path of log writing function commit 6576dd6695f2afca3f4954029ac4a64f82ba60ab upstream. After commit a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write") was applied, the log writing function nilfs_segctor_do_construct() was able to issue I/O requests continuously even if user data blocks were split into multiple logs across segments, but two potential flaws were introduced in its error handling. First, if nilfs_segctor_begin_construction() fails while creating the second or subsequent logs, the log writing function returns without calling nilfs_segctor_abort_construction(), so the writeback flag set on pages/folios will remain uncleared. This causes page cache operations to hang waiting for the writeback flag. For example, truncate_inode_pages_final(), which is called via nilfs_evict_inode() when an inode is evicted from memory, will hang. Second, the NILFS_I_COLLECTED flag set on normal inodes remain uncleared. As a result, if the next log write involves checkpoint creation, that's fine, but if a partial log write is performed that does not, inodes with NILFS_I_COLLECTED set are erroneously removed from the "sc_dirty_files" list, and their data and b-tree blocks may not be written to the device, corrupting the block mapping. Fix these issues by uniformly calling nilfs_segctor_abort_construction() on failure of each step in the loop in nilfs_segctor_do_construct(), having it clean up logs and segment usages according to progress, and correcting the conditions for calling nilfs_redirty_inodes() to ensure that the NILFS_I_COLLECTED flag is cleared. Link: https://lkml.kernel.org/r/20240814101119.4070-1-konishi.ryusuke@gmail.com Fixes: a694291a6211 ("nilfs2: separate wait function from nilfs_segctor_write") Signed-off-by: Ryusuke Konishi Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index e10f8a777ab06f..0610cb12c11ca0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1835,6 +1835,9 @@ static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, nilfs_abort_logs(&logs, ret ? : err); list_splice_tail_init(&sci->sc_segbufs, &logs); + if (list_empty(&logs)) + return; /* if the first segment buffer preparation failed */ + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); nilfs_free_incomplete_logs(&logs, nilfs); @@ -2079,7 +2082,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) err = nilfs_segctor_begin_construction(sci, nilfs); if (unlikely(err)) - goto out; + goto failed; /* Update time stamp */ sci->sc_seg_ctime = ktime_get_real_seconds(); @@ -2142,10 +2145,9 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) return err; failed_to_write: - if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_redirty_inodes(&sci->sc_dirty_files); - failed: + if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_IFILE) + nilfs_redirty_inodes(&sci->sc_dirty_files); if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); nilfs_segctor_abort_construction(sci, nilfs, err); From 337266ada863a4232c9f8634deedc298a145521c Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Tue, 20 Aug 2024 14:59:52 -0700 Subject: [PATCH 058/268] drm/i915: Do not attempt to load the GSC multiple times commit 59d3cfdd7f9655a0400ac453bf92199204f8b2a1 upstream. If the GSC FW fails to load the GSC HW hangs permanently; the only ways to recover it are FLR or D3cold entry, with the former only being supported on driver unload and the latter only on DGFX, for which we don't need to load the GSC. Therefore, if GSC fails to load there is no need to try again because the HW is stuck in the error state and the submission to load the FW would just hang the GSCCS. Note that, due to wa_14015076503, on MTL the GuC escalates all GSCCS hangs to full GT resets, which would trigger a new attempt to load the GSC FW in the post-reset HW re-init; this issue is also fixed by not attempting to load the GSC FW after an error. Fixes: 15bd4a67e914 ("drm/i915/gsc: GSC firmware loading") Signed-off-by: Daniele Ceraolo Spurio Cc: Daniele Ceraolo Spurio Cc: Alan Previn Cc: John Harrison Cc: Rodrigo Vivi Cc: # v6.3+ Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240820215952.2290807-1-daniele.ceraolospurio@intel.com (cherry picked from commit 03ded4d432a1fb7bb6c44c5856d14115f6f6c3b9) Signed-off-by: Joonas Lahtinen Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c index 0d3b22a7436595..e251e061d1adb7 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_uc.c @@ -304,7 +304,7 @@ void intel_gsc_uc_load_start(struct intel_gsc_uc *gsc) { struct intel_gt *gt = gsc_uc_to_gt(gsc); - if (!intel_uc_fw_is_loadable(&gsc->fw)) + if (!intel_uc_fw_is_loadable(&gsc->fw) || intel_uc_fw_is_in_error(&gsc->fw)) return; if (intel_gsc_uc_fw_init_done(gsc)) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 9a431726c8d5b1..ac7b3aad2222e8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -258,6 +258,11 @@ static inline bool intel_uc_fw_is_running(struct intel_uc_fw *uc_fw) return __intel_uc_fw_status(uc_fw) == INTEL_UC_FIRMWARE_RUNNING; } +static inline bool intel_uc_fw_is_in_error(struct intel_uc_fw *uc_fw) +{ + return intel_uc_fw_status_to_error(__intel_uc_fw_status(uc_fw)) != 0; +} + static inline bool intel_uc_fw_is_overridden(const struct intel_uc_fw *uc_fw) { return uc_fw->user_overridden; From 4a67c7c03830b730f538b5ad114a44a08c7b272e Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 16 Jun 2024 09:34:44 +0200 Subject: [PATCH 059/268] ALSA: control: Apply sanity check of input values for user elements [ Upstream commit 50ed081284fe2bfd1f25e8b92f4f6a4990e73c0a ] Although we have already a mechanism for sanity checks of input values for control writes, it's not applied unless the kconfig CONFIG_SND_CTL_INPUT_VALIDATION is set due to the performance reason. Nevertheless, it still makes sense to apply the same check for user elements despite of its cost, as that's the only way to filter out the invalid values; the user controls are handled solely in ALSA core code, and there is no corresponding driver, after all. This patch adds the same input value validation for user control elements at its put callback. The kselftest will be happier with this change, as the incorrect values will be bailed out now with errors. For other normal controls, the check is applied still only when CONFIG_SND_CTL_INPUT_VALIDATION is set. Reported-by: Paul Menzel Closes: https://lore.kernel.org/r/1d44be36-9bb9-4d82-8953-5ae2a4f09405@molgen.mpg.de Reviewed-by: Jaroslav Kysela Reviewed-by: Mark Brown Reviewed-by: Takashi Sakamoto Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/20240616073454.16512-4-tiwai@suse.de Signed-off-by: Sasha Levin --- sound/core/control.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/core/control.c b/sound/core/control.c index 59c8658966d4cb..dd4bdb39782cda 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1553,12 +1553,16 @@ static int snd_ctl_elem_user_get(struct snd_kcontrol *kcontrol, static int snd_ctl_elem_user_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { - int change; + int err, change; struct user_element *ue = kcontrol->private_data; unsigned int size = ue->elem_data_size; char *dst = ue->elem_data + snd_ctl_get_ioff(kcontrol, &ucontrol->id) * size; + err = sanity_check_input_values(ue->card, ucontrol, &ue->info, false); + if (err < 0) + return err; + change = memcmp(&ucontrol->value, dst, size) != 0; if (change) memcpy(dst, &ucontrol->value, size); From fd05943b054b0d9b8b2276333a73803a5ffd47ad Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sun, 16 Jun 2024 09:34:47 +0200 Subject: [PATCH 060/268] ALSA: hda: Add input value sanity checks to HDMI channel map controls [ Upstream commit 6278056e42d953e207e2afd416be39d09ed2d496 ] Add a simple sanity check to HD-audio HDMI Channel Map controls. Although the value might not be accepted for the actual connection, we can filter out some bogus values beforehand, and that should be enough for making kselftest happier. Reviewed-by: Jaroslav Kysela Signed-off-by: Takashi Iwai Link: https://lore.kernel.org/20240616073454.16512-7-tiwai@suse.de Signed-off-by: Sasha Levin --- sound/hda/hdmi_chmap.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index 5d8e1d944b0afb..7b276047f85a7d 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -753,6 +753,20 @@ static int hdmi_chmap_ctl_get(struct snd_kcontrol *kcontrol, return 0; } +/* a simple sanity check for input values to chmap kcontrol */ +static int chmap_value_check(struct hdac_chmap *hchmap, + const struct snd_ctl_elem_value *ucontrol) +{ + int i; + + for (i = 0; i < hchmap->channels_max; i++) { + if (ucontrol->value.integer.value[i] < 0 || + ucontrol->value.integer.value[i] > SNDRV_CHMAP_LAST) + return -EINVAL; + } + return 0; +} + static int hdmi_chmap_ctl_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { @@ -764,6 +778,10 @@ static int hdmi_chmap_ctl_put(struct snd_kcontrol *kcontrol, unsigned char chmap[8], per_pin_chmap[8]; int i, err, ca, prepared = 0; + err = chmap_value_check(hchmap, ucontrol); + if (err < 0) + return err; + /* No monitor is connected in dyn_pcm_assign. * It's invalid to setup the chmap */ From b366b1e1dda7f7539fd8dbca9312d0803980f4d2 Mon Sep 17 00:00:00 2001 From: Aaradhana Sahu Date: Tue, 11 Jun 2024 08:40:17 +0530 Subject: [PATCH 061/268] wifi: ath12k: fix uninitialize symbol error on ath12k_peer_assoc_h_he() [ Upstream commit 19b77e7c656a3e125319cc3ef347b397cf042bf6 ] Smatch throws following errors drivers/net/wireless/ath/ath12k/mac.c:1922 ath12k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. drivers/net/wireless/ath/ath12k/mac.c:1922 ath12k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_160'. drivers/net/wireless/ath/ath12k/mac.c:1924 ath12k_peer_assoc_h_he() error: uninitialized symbol 'rx_mcs_80'. In ath12k_peer_assoc_h_he() rx_mcs_80 and rx_mcs_160 variables remain uninitialized in the following conditions: 1. Whenever the value of mcs_80 become equal to IEEE80211_HE_MCS_NOT_SUPPORTED then rx_mcs_80 remains uninitialized. 2. Whenever phy capability is not supported 160 channel width and value of mcs_160 become equal to IEEE80211_HE_MCS_NOT_SUPPORTED then rx_mcs_160 remains uninitialized. Initialize these variables during declaration. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.1.1-00188-QCAHKSWPL_SILICONZ-1 Signed-off-by: Aaradhana Sahu Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240611031017.297927-3-quic_aarasahu@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ba6fc27f4a1a17..618eeaf6e33154 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -1614,7 +1614,9 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, { const struct ieee80211_sta_he_cap *he_cap = &sta->deflink.he_cap; int i; - u8 ampdu_factor, rx_mcs_80, rx_mcs_160, max_nss; + u8 ampdu_factor, max_nss; + u8 rx_mcs_80 = IEEE80211_HE_MCS_NOT_SUPPORTED; + u8 rx_mcs_160 = IEEE80211_HE_MCS_NOT_SUPPORTED; u16 mcs_160_map, mcs_80_map; bool support_160; u16 v; From 838c2cfdb6be7d7d8c06c711edf893eb34ca2e7c Mon Sep 17 00:00:00 2001 From: Ajith C Date: Thu, 13 Jun 2024 11:05:28 +0530 Subject: [PATCH 062/268] wifi: ath12k: fix firmware crash due to invalid peer nss [ Upstream commit db163a463bb93cd3e37e1e7b10b9726fb6f95857 ] Currently, if the access point receives an association request containing an Extended HE Capabilities Information Element with an invalid MCS-NSS, it triggers a firmware crash. This issue arises when EHT-PHY capabilities shows support for a bandwidth and MCS-NSS set for that particular bandwidth is filled by zeros and due to this, driver obtains peer_nss as 0 and sending this value to firmware causes crash. Address this issue by implementing a validation step for the peer_nss value before passing it to the firmware. If the value is greater than zero, proceed with forwarding it to the firmware. However, if the value is invalid, reject the association request to prevent potential firmware crashes. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Ajith C Acked-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240613053528.2541645-1-quic_ajithc@quicinc.com Signed-off-by: Sasha Levin --- drivers/net/wireless/ath/ath12k/mac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 618eeaf6e33154..dd2a7c95517be2 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3357,6 +3357,11 @@ static int ath12k_station_assoc(struct ath12k *ar, ath12k_peer_assoc_prepare(ar, vif, sta, &peer_arg, reassoc); + if (peer_arg.peer_nss < 1) { + ath12k_warn(ar->ab, + "invalid peer NSS %d\n", peer_arg.peer_nss); + return -EINVAL; + } ret = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); if (ret) { ath12k_warn(ar->ab, "failed to run peer assoc for STA %pM vdev %i: %d\n", From b181e96e8043072658dfbb16a88c68df34872b29 Mon Sep 17 00:00:00 2001 From: Konstantin Andreev Date: Mon, 17 Jun 2024 01:44:30 +0300 Subject: [PATCH 063/268] smack: unix sockets: fix accept()ed socket label [ Upstream commit e86cac0acdb1a74f608bacefe702f2034133a047 ] When a process accept()s connection from a unix socket (either stream or seqpacket) it gets the socket with the label of the connecting process. For example, if a connecting process has a label 'foo', the accept()ed socket will also have 'in' and 'out' labels 'foo', regardless of the label of the listener process. This is because kernel creates unix child sockets in the context of the connecting process. I do not see any obvious way for the listener to abuse alien labels coming with the new socket, but, to be on the safe side, it's better fix new socket labels. Signed-off-by: Konstantin Andreev Signed-off-by: Casey Schaufler Signed-off-by: Sasha Levin --- security/smack/smack_lsm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 98c2bdbfcaed60..4625674f0e95b8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3769,12 +3769,18 @@ static int smack_unix_stream_connect(struct sock *sock, } } - /* - * Cross reference the peer labels for SO_PEERSEC. - */ if (rc == 0) { + /* + * Cross reference the peer labels for SO_PEERSEC. + */ nsp->smk_packet = ssp->smk_out; ssp->smk_packet = osp->smk_out; + + /* + * new/child/established socket must inherit listening socket labels + */ + nsp->smk_out = osp->smk_out; + nsp->smk_in = osp->smk_in; } return rc; From 3c9e7909df159717486198b2046429318ab88b19 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Mon, 10 Jun 2024 20:42:23 +0800 Subject: [PATCH 064/268] bpf, verifier: Correct tail_call_reachable for bpf prog [ Upstream commit 01793ed86b5d7df1e956520b5474940743eb7ed8 ] It's confusing to inspect 'prog->aux->tail_call_reachable' with drgn[0], when bpf prog has tail call but 'tail_call_reachable' is false. This patch corrects 'tail_call_reachable' when bpf prog has tail call. Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20240610124224.34673-2-hffilwlqm@gmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3f1a9cd7fc9ec7..9d5699942273ee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3064,8 +3064,10 @@ static int check_subprogs(struct bpf_verifier_env *env) if (code == (BPF_JMP | BPF_CALL) && insn[i].src_reg == 0 && - insn[i].imm == BPF_FUNC_tail_call) + insn[i].imm == BPF_FUNC_tail_call) { subprog[cur_subprog].has_tail_call = true; + subprog[cur_subprog].tail_call_reachable = true; + } if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) subprog[cur_subprog].has_ld_abs = true; From 53f17409abf61f66b6f05aff795e938e5ba811d1 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 21 Jun 2024 21:54:50 +0300 Subject: [PATCH 065/268] ELF: fix kernel.randomize_va_space double read [ Upstream commit 2a97388a807b6ab5538aa8f8537b2463c6988bd2 ] ELF loader uses "randomize_va_space" twice. It is sysctl and can change at any moment, so 2 loads could see 2 different values in theory with unpredictable consequences. Issue exactly one load for consistent value across one exec. Signed-off-by: Alexey Dobriyan Link: https://lore.kernel.org/r/3329905c-7eb8-400a-8f0a-d87cff979b5b@p183 Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- fs/binfmt_elf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7b3d2d4914073f..fb2c8d14327ae1 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1008,7 +1008,8 @@ static int load_elf_binary(struct linux_binprm *bprm) if (elf_read_implies_exec(*elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space); + if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); @@ -1300,7 +1301,7 @@ static int load_elf_binary(struct linux_binprm *bprm) mm->end_data = end_data; mm->start_stack = bprm->p; - if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { + if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) { /* * For architectures with ELF randomization, when executing * a loader directly (i.e. no interpreter listed in ELF From 9a173212a3180b90673635f0ec88afdc9eba17c8 Mon Sep 17 00:00:00 2001 From: Rakesh Ughreja Date: Tue, 26 Mar 2024 08:01:02 +0200 Subject: [PATCH 066/268] accel/habanalabs/gaudi2: unsecure edma max outstanding register [ Upstream commit 3309887c6ff8ca2ac05a74e1ee5d1c44829f63f2 ] Netowrk EDMAs uses more outstanding transfers so this needs to be programmed by EDMA firmware. Signed-off-by: Rakesh Ughreja Reviewed-by: Ofir Bitton Signed-off-by: Ofir Bitton Signed-off-by: Sasha Levin --- drivers/accel/habanalabs/gaudi2/gaudi2_security.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c index 908710524dc9ee..493e556cd31b74 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2_security.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2_security.c @@ -479,6 +479,7 @@ static const u32 gaudi2_pb_dcr0_edma0_unsecured_regs[] = { mmDCORE0_EDMA0_CORE_CTX_TE_NUMROWS, mmDCORE0_EDMA0_CORE_CTX_IDX, mmDCORE0_EDMA0_CORE_CTX_IDX_INC, + mmDCORE0_EDMA0_CORE_WR_COMP_MAX_OUTSTAND, mmDCORE0_EDMA0_CORE_RD_LBW_RATE_LIM_CFG, mmDCORE0_EDMA0_QM_CQ_CFG0_0, mmDCORE0_EDMA0_QM_CQ_CFG0_1, From 29ac5a9b6e9cad81d2816bdec163992742b43355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 21 Jun 2024 11:38:28 +0200 Subject: [PATCH 067/268] irqchip/armada-370-xp: Do not allow mapping IRQ 0 and 1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 3cef738208e5c3cb7084e208caf9bbf684f24feb ] IRQs 0 (IPI) and 1 (MSI) are handled internally by this driver, generic_handle_domain_irq() is never called for these IRQs. Disallow mapping these IRQs. [ Marek: changed commit message ] Signed-off-by: Pali Rohár Signed-off-by: Marek Behún Signed-off-by: Thomas Gleixner Reviewed-by: Andrew Lunn Signed-off-by: Sasha Levin --- drivers/irqchip/irq-armada-370-xp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index a55528469278c7..91a42e2d7a1319 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -566,6 +566,10 @@ static struct irq_chip armada_370_xp_irq_chip = { static int armada_370_xp_mpic_irq_map(struct irq_domain *h, unsigned int virq, irq_hw_number_t hw) { + /* IRQs 0 and 1 cannot be mapped, they are handled internally */ + if (hw <= 1) + return -EINVAL; + armada_370_xp_irq_mask(irq_get_irq_data(virq)); if (!is_percpu_irq(hw)) writel(hw, per_cpu_int_base + From 9c2450cf5d6516353ede636350e4eab630e96e24 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 20 Jun 2024 13:56:22 -0700 Subject: [PATCH 068/268] af_unix: Remove put_pid()/put_cred() in copy_peercred(). [ Upstream commit e4bd881d987121dbf1a288641491955a53d9f8f7 ] When (AF_UNIX, SOCK_STREAM) socket connect()s to a listening socket, the listener's sk_peer_pid/sk_peer_cred are copied to the client in copy_peercred(). Then, the client's sk_peer_pid and sk_peer_cred are always NULL, so we need not call put_pid() and put_cred() there. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- net/unix/af_unix.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b7f62442d82686..dca4429014db15 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -693,9 +693,6 @@ static void init_peercred(struct sock *sk) static void copy_peercred(struct sock *sk, struct sock *peersk) { - const struct cred *old_cred; - struct pid *old_pid; - if (sk < peersk) { spin_lock(&sk->sk_peer_lock); spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); @@ -703,16 +700,12 @@ static void copy_peercred(struct sock *sk, struct sock *peersk) spin_lock(&peersk->sk_peer_lock); spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); } - old_pid = sk->sk_peer_pid; - old_cred = sk->sk_peer_cred; + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); spin_unlock(&peersk->sk_peer_lock); - - put_pid(old_pid); - put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) From 4ebd15ab4b85a72fc875a323b4ad2c9ca954fca8 Mon Sep 17 00:00:00 2001 From: Brian Johannesmeyer Date: Thu, 23 May 2024 23:50:29 +0200 Subject: [PATCH 069/268] x86/kmsan: Fix hook for unaligned accesses [ Upstream commit bf6ab33d8487f5e2a0998ce75286eae65bb0a6d6 ] When called with a 'from' that is not 4-byte-aligned, string_memcpy_fromio() calls the movs() macro to copy the first few bytes, so that 'from' becomes 4-byte-aligned before calling rep_movs(). This movs() macro modifies 'to', and the subsequent line modifies 'n'. As a result, on unaligned accesses, kmsan_unpoison_memory() uses the updated (aligned) values of 'to' and 'n'. Hence, it does not unpoison the entire region. Save the original values of 'to' and 'n', and pass those to kmsan_unpoison_memory(), so that the entire region is unpoisoned. Signed-off-by: Brian Johannesmeyer Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Alexander Potapenko Link: https://lore.kernel.org/r/20240523215029.4160518-1-bjohannesmeyer@gmail.com Signed-off-by: Sasha Levin --- arch/x86/lib/iomem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/lib/iomem.c b/arch/x86/lib/iomem.c index e0411a3774d495..5eecb45d05d5da 100644 --- a/arch/x86/lib/iomem.c +++ b/arch/x86/lib/iomem.c @@ -25,6 +25,9 @@ static __always_inline void rep_movs(void *to, const void *from, size_t n) static void string_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) { + const void *orig_to = to; + const size_t orig_n = n; + if (unlikely(!n)) return; @@ -39,7 +42,7 @@ static void string_memcpy_fromio(void *to, const volatile void __iomem *from, si } rep_movs(to, (const void *)from, n); /* KMSAN must treat values read from devices as initialized. */ - kmsan_unpoison_memory(to, n); + kmsan_unpoison_memory(orig_to, orig_n); } static void string_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) From 75758ca26cb5f55440207229ae084a5eb2aa8055 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Sun, 16 Jun 2024 23:40:52 +0100 Subject: [PATCH 070/268] iommu: sun50i: clear bypass register [ Upstream commit 927c70c93d929f4c2dcaf72f51b31bb7d118a51a ] The Allwinner H6 IOMMU has a bypass register, which allows to circumvent the page tables for each possible master. The reset value for this register is 0, which disables the bypass. The Allwinner H616 IOMMU resets this register to 0x7f, which activates the bypass for all masters, which is not what we want. Always clear this register to 0, to enforce the usage of page tables, and make this driver compatible with the H616 in this respect. Signed-off-by: Jernej Skrabec Signed-off-by: Andre Przywara Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20240616224056.29159-2-andre.przywara@arm.com Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/sun50i-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 74c5cb93e90027..94bd7f25f6f26b 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -449,6 +449,7 @@ static int sun50i_iommu_enable(struct sun50i_iommu *iommu) IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(5)); + iommu_write(iommu, IOMMU_BYPASS_REG, 0); iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK); iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE), IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | From 54921e9a7a8ee3c9c5fbb12c62d65eca5ec15461 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 31 May 2024 11:48:47 +0800 Subject: [PATCH 071/268] netfilter: nf_conncount: fix wrong variable type [ Upstream commit 0b88d1654d556264bcd24a9cb6383f0888e30131 ] Now there is a issue is that code checks reports a warning: implicit narrowing conversion from type 'unsigned int' to small type 'u8' (the 'keylen' variable). Fix it by removing the 'keylen' variable. Signed-off-by: Yunjian Wang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conncount.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 5d8ed6c90b7ef4..5885810da412fa 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -321,7 +321,6 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -333,7 +332,7 @@ insert_tree(struct net *net, rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); parent = *rbnode; - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -378,7 +377,7 @@ insert_tree(struct net *net, conn->tuple = *tuple; conn->zone = *zone; - memcpy(rbconn->key, key, sizeof(u32) * keylen); + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); @@ -403,7 +402,6 @@ count_tree(struct net *net, struct rb_node *parent; struct nf_conncount_rb *rbconn; unsigned int hash; - u8 keylen = data->keylen; hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; root = &data->root[hash]; @@ -414,7 +412,7 @@ count_tree(struct net *net, rbconn = rb_entry(parent, struct nf_conncount_rb, node); - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { parent = rcu_dereference_raw(parent->rb_left); } else if (diff > 0) { From 415f3634d53c7fb4cf07d2f5a0be7f2e15e6da33 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 25 Jun 2024 19:51:09 +0300 Subject: [PATCH 072/268] wifi: iwlwifi: mvm: use IWL_FW_CHECK for link ID check [ Upstream commit 9215152677d4b321801a92b06f6d5248b2b4465f ] The lookup function iwl_mvm_rcu_fw_link_id_to_link_conf() is normally called with input from the firmware, so it should use IWL_FW_CHECK() instead of WARN_ON(). Signed-off-by: Johannes Berg Signed-off-by: Miri Korenblit Link: https://patch.msgid.link/20240625194805.4ea8fb7c47d4.I1c22af213f97f69bfc14674502511c1bc504adfb@changeid Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index c780e5ffcd5960..bace9d01fd5832 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1318,7 +1318,8 @@ iwl_mvm_rcu_dereference_vif_id(struct iwl_mvm *mvm, u8 vif_id, bool rcu) static inline struct ieee80211_bss_conf * iwl_mvm_rcu_fw_link_id_to_link_conf(struct iwl_mvm *mvm, u8 link_id, bool rcu) { - if (WARN_ON(link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf))) + if (IWL_FW_CHECK(mvm, link_id >= ARRAY_SIZE(mvm->link_id_to_link_conf), + "erroneous FW link ID: %d\n", link_id)) return NULL; if (rcu) From a56330761950cb83de1dfb348479f20c56c95f90 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 20 Jun 2024 12:52:17 +0200 Subject: [PATCH 073/268] udf: Avoid excessive partition lengths [ Upstream commit ebbe26fd54a9621994bc16b14f2ba8f84c089693 ] Avoid mounting filesystems where the partition would overflow the 32-bits used for block number. Also refuse to mount filesystems where the partition length is so large we cannot safely index bits in a block bitmap. Link: https://patch.msgid.link/20240620130403.14731-1-jack@suse.cz Signed-off-by: Jan Kara Signed-off-by: Sasha Levin --- fs/udf/super.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/fs/udf/super.c b/fs/udf/super.c index e0080fda2526be..3c78535f406b00 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1080,12 +1080,19 @@ static int udf_fill_partdesc_info(struct super_block *sb, struct udf_part_map *map; struct udf_sb_info *sbi = UDF_SB(sb); struct partitionHeaderDesc *phd; + u32 sum; int err; map = &sbi->s_partmaps[p_index]; map->s_partition_len = le32_to_cpu(p->partitionLength); /* blocks */ map->s_partition_root = le32_to_cpu(p->partitionStartingLocation); + if (check_add_overflow(map->s_partition_root, map->s_partition_len, + &sum)) { + udf_err(sb, "Partition %d has invalid location %u + %u\n", + p_index, map->s_partition_root, map->s_partition_len); + return -EFSCORRUPTED; + } if (p->accessType == cpu_to_le32(PD_ACCESS_TYPE_READ_ONLY)) map->s_partition_flags |= UDF_PART_FLAG_READ_ONLY; @@ -1141,6 +1148,14 @@ static int udf_fill_partdesc_info(struct super_block *sb, bitmap->s_extPosition = le32_to_cpu( phd->unallocSpaceBitmap.extPosition); map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_BITMAP; + /* Check whether math over bitmap won't overflow. */ + if (check_add_overflow(map->s_partition_len, + sizeof(struct spaceBitmapDesc) << 3, + &sum)) { + udf_err(sb, "Partition %d is too long (%u)\n", p_index, + map->s_partition_len); + return -EFSCORRUPTED; + } udf_debug("unallocSpaceBitmap (part %d) @ %u\n", p_index, bitmap->s_extPosition); } From d670934d4fc9981e0a8b4eb571a634ca55fbd532 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Thu, 30 May 2024 10:55:12 +0300 Subject: [PATCH 074/268] fs/ntfs3: One more reason to mark inode bad [ Upstream commit a0dde5d7a58b6bf9184ef3d8c6e62275c3645584 ] In addition to returning an error, mark the node as bad. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/frecord.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 45b687aff700be..f7c381730b396f 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -1601,8 +1601,10 @@ int ni_delete_all(struct ntfs_inode *ni) asize = le32_to_cpu(attr->size); roff = le16_to_cpu(attr->nres.run_off); - if (roff > asize) + if (roff > asize) { + _ntfs_bad_inode(&ni->vfs_inode); return -EINVAL; + } /* run==1 means unpack and deallocate. */ run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, From 1741021fc1209125a8155de5f4184685f98fbdb7 Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Wed, 27 Mar 2024 09:04:42 -0700 Subject: [PATCH 075/268] riscv: kprobes: Use patch_text_nosync() for insn slots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b1756750a397f36ddc857989d31887c3f5081fb0 ] These instructions are not yet visible to the rest of the system, so there is no need to do the whole stop_machine() dance. Reviewed-by: Björn Töpel Signed-off-by: Samuel Holland Link: https://lore.kernel.org/r/20240327160520.791322-4-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/probes/kprobes.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 2f08c14a933d05..fecbbcf40ac3fe 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -28,9 +28,8 @@ static void __kprobes arch_prepare_ss_slot(struct kprobe *p) p->ainsn.api.restore = (unsigned long)p->addr + offset; - patch_text(p->ainsn.api.insn, &p->opcode, 1); - patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset), - &insn, 1); + patch_text_nosync(p->ainsn.api.insn, &p->opcode, 1); + patch_text_nosync(p->ainsn.api.insn + offset, &insn, 1); } static void __kprobes arch_prepare_simulate(struct kprobe *p) From 6ae2e315a3888c5979bcf90ce049c9069ecd4930 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Wed, 26 Jun 2024 12:59:13 +0200 Subject: [PATCH 076/268] media: vivid: fix wrong sizeimage value for mplane [ Upstream commit 0fd7c0c2c156270dceb8c15fad3120cdce03e539 ] In several places a division by fmt->vdownsampling[p] was missing in the sizeimage[p] calculation, causing incorrect behavior for multiplanar formats were some planes are smaller than the first plane. Found by new v4l2-compliance tests. Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vivid/vivid-vid-cap.c | 5 +++-- drivers/media/test-drivers/vivid/vivid-vid-out.c | 16 +++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index 3a06df35a2d7ce..a81c17634daa3f 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -106,8 +106,9 @@ static int vid_cap_queue_setup(struct vb2_queue *vq, if (*nplanes != buffers) return -EINVAL; for (p = 0; p < buffers; p++) { - if (sizes[p] < tpg_g_line_width(&dev->tpg, p) * h + - dev->fmt_cap->data_offset[p]) + if (sizes[p] < tpg_g_line_width(&dev->tpg, p) * h / + dev->fmt_cap->vdownsampling[p] + + dev->fmt_cap->data_offset[p]) return -EINVAL; } } else { diff --git a/drivers/media/test-drivers/vivid/vivid-vid-out.c b/drivers/media/test-drivers/vivid/vivid-vid-out.c index 184a6df2c29fe2..d05f547a587cd2 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-out.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-out.c @@ -63,14 +63,16 @@ static int vid_out_queue_setup(struct vb2_queue *vq, if (sizes[0] < size) return -EINVAL; for (p = 1; p < planes; p++) { - if (sizes[p] < dev->bytesperline_out[p] * h + - vfmt->data_offset[p]) + if (sizes[p] < dev->bytesperline_out[p] * h / + vfmt->vdownsampling[p] + + vfmt->data_offset[p]) return -EINVAL; } } else { for (p = 0; p < planes; p++) - sizes[p] = p ? dev->bytesperline_out[p] * h + - vfmt->data_offset[p] : size; + sizes[p] = p ? dev->bytesperline_out[p] * h / + vfmt->vdownsampling[p] + + vfmt->data_offset[p] : size; } if (vq->num_buffers + *nbuffers < 2) @@ -127,7 +129,7 @@ static int vid_out_buf_prepare(struct vb2_buffer *vb) for (p = 0; p < planes; p++) { if (p) - size = dev->bytesperline_out[p] * h; + size = dev->bytesperline_out[p] * h / vfmt->vdownsampling[p]; size += vb->planes[p].data_offset; if (vb2_get_plane_payload(vb, p) < size) { @@ -334,8 +336,8 @@ int vivid_g_fmt_vid_out(struct file *file, void *priv, for (p = 0; p < mp->num_planes; p++) { mp->plane_fmt[p].bytesperline = dev->bytesperline_out[p]; mp->plane_fmt[p].sizeimage = - mp->plane_fmt[p].bytesperline * mp->height + - fmt->data_offset[p]; + mp->plane_fmt[p].bytesperline * mp->height / + fmt->vdownsampling[p] + fmt->data_offset[p]; } for (p = fmt->buffers; p < fmt->planes; p++) { unsigned stride = dev->bytesperline_out[p]; From 56b7104b823bda22b379cca7d140f67a476de6d5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Jun 2024 20:29:18 +0300 Subject: [PATCH 077/268] leds: spi-byte: Call of_node_put() on error path [ Upstream commit 7f9ab862e05c5bc755f65bf6db7edcffb3b49dfc ] Add a missing call to of_node_put(np) on error. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240606173037.3091598-2-andriy.shevchenko@linux.intel.com Signed-off-by: Lee Jones Signed-off-by: Sasha Levin --- drivers/leds/leds-spi-byte.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/leds/leds-spi-byte.c b/drivers/leds/leds-spi-byte.c index 9d91f21842f2b5..afe9bff7c7c163 100644 --- a/drivers/leds/leds-spi-byte.c +++ b/drivers/leds/leds-spi-byte.c @@ -91,7 +91,6 @@ static int spi_byte_probe(struct spi_device *spi) dev_err(dev, "Device must have exactly one LED sub-node."); return -EINVAL; } - child = of_get_next_available_child(dev_of_node(dev), NULL); led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL); if (!led) @@ -107,11 +106,13 @@ static int spi_byte_probe(struct spi_device *spi) led->ldev.max_brightness = led->cdef->max_value - led->cdef->off_value; led->ldev.brightness_set_blocking = spi_byte_brightness_set_blocking; + child = of_get_next_available_child(dev_of_node(dev), NULL); state = of_get_property(child, "default-state", NULL); if (state) { if (!strcmp(state, "on")) { led->ldev.brightness = led->ldev.max_brightness; } else if (strcmp(state, "off")) { + of_node_put(child); /* all other cases except "off" */ dev_err(dev, "default-state can only be 'on' or 'off'"); return -EINVAL; @@ -122,9 +123,12 @@ static int spi_byte_probe(struct spi_device *spi) ret = devm_led_classdev_register(&spi->dev, &led->ldev); if (ret) { + of_node_put(child); mutex_destroy(&led->mutex); return ret; } + + of_node_put(child); spi_set_drvdata(spi, led); return 0; From d22d72e2bf9cc8b20cfbaa7c4065fa17d5e5575d Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Mon, 17 Jun 2024 14:26:09 +0200 Subject: [PATCH 078/268] wifi: brcmsmac: advertise MFP_CAPABLE to enable WPA3 [ Upstream commit dbb5265a5d7cca1cdba7736dba313ab7d07bc19d ] After being asked about support for WPA3 for BCM43224 chipset it was found that all it takes is setting the MFP_CAPABLE flag and mac80211 will take care of all that is needed [1]. Link: https://lore.kernel.org/linux-wireless/20200526155909.5807-2-Larry.Finger@lwfinger.net/ [1] Signed-off-by: Arend van Spriel Tested-by: Reijer Boekhoff Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240617122609.349582-1-arend.vanspriel@broadcom.com Signed-off-by: Sasha Levin --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index 543e93ec49d228..9ab669487de4de 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -1086,6 +1086,7 @@ static int ieee_hw_init(struct ieee80211_hw *hw) ieee80211_hw_set(hw, AMPDU_AGGREGATION); ieee80211_hw_set(hw, SIGNAL_DBM); ieee80211_hw_set(hw, REPORTS_TX_ACK_STATUS); + ieee80211_hw_set(hw, MFP_CAPABLE); hw->extra_tx_headroom = brcms_c_get_header_len(); hw->queues = N_TX_QUEUES; From 4292441b87b8b3e72f8b9308a63fa660bdcaeb6a Mon Sep 17 00:00:00 2001 From: Shantanu Goel Date: Thu, 6 Jun 2024 23:32:57 -0400 Subject: [PATCH 079/268] usb: uas: set host status byte on data completion error [ Upstream commit 9d32685a251a754f1823d287df233716aa23bcb9 ] Set the host status byte when a data completion error is encountered otherwise the upper layer may end up using the invalid zero'ed data. The following output was observed from scsi/sd.c prior to this fix. [ 11.872824] sd 0:0:0:1: [sdf] tag#9 data cmplt err -75 uas-tag 1 inflight: [ 11.872826] sd 0:0:0:1: [sdf] tag#9 CDB: Read capacity(16) 9e 10 00 00 00 00 00 00 00 00 00 00 00 20 00 00 [ 11.872830] sd 0:0:0:1: [sdf] Sector size 0 reported, assuming 512. Signed-off-by: Shantanu Goel Acked-by: Oliver Neukum Link: https://lore.kernel.org/r/87msnx4ec6.fsf@yahoo.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/storage/uas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 451d9569163a7e..f794cb39cc3130 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -422,6 +422,7 @@ static void uas_data_cmplt(struct urb *urb) uas_log_cmd_state(cmnd, "data cmplt err", status); /* error: no data transfered */ scsi_set_resid(cmnd, sdb->length); + set_host_byte(cmnd, DID_ERROR); } else { scsi_set_resid(cmnd, sdb->length - urb->actual_length); } From b2a50ffdd1a079869a62198a8d1441355c513c7c Mon Sep 17 00:00:00 2001 From: Ma Ke Date: Tue, 25 Jun 2024 10:23:06 +0800 Subject: [PATCH 080/268] usb: gadget: aspeed_udc: validate endpoint index for ast udc [ Upstream commit ee0d382feb44ec0f445e2ad63786cd7f3f6a8199 ] We should verify the bound of the array to assure that host may not manipulate the index to point past endpoint array. Found by static analysis. Signed-off-by: Ma Ke Reviewed-by: Andrew Jeffery Acked-by: Andrew Jeffery Link: https://lore.kernel.org/r/20240625022306.2568122-1-make24@iscas.ac.cn Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/udc/aspeed_udc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/udc/aspeed_udc.c b/drivers/usb/gadget/udc/aspeed_udc.c index fc2ead0fe6217e..4868286574a1c9 100644 --- a/drivers/usb/gadget/udc/aspeed_udc.c +++ b/drivers/usb/gadget/udc/aspeed_udc.c @@ -1009,6 +1009,8 @@ static void ast_udc_getstatus(struct ast_udc_dev *udc) break; case USB_RECIP_ENDPOINT: epnum = crq.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (epnum >= AST_UDC_NUM_ENDPOINTS) + goto stall; status = udc->ep[epnum].stopped; break; default: From 874e3bb302f97b94ac548959ec4f925b8e7b45e2 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Fri, 7 Jun 2024 09:21:30 -0600 Subject: [PATCH 081/268] drm/amd/display: Run DC_LOG_DC after checking link->link_enc [ Upstream commit 3a82f62b0d9d7687eac47603bb6cd14a50fa718b ] [WHAT] The DC_LOG_DC should be run after link->link_enc is checked, not before. This fixes 1 REVERSE_INULL issue reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/dc/link/link_factory.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/link/link_factory.c b/drivers/gpu/drm/amd/display/dc/link/link_factory.c index 2c366866f5700c..33bb96f770b866 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_factory.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_factory.c @@ -629,14 +629,14 @@ static bool construct_phy(struct dc_link *link, link->link_enc = link->dc->res_pool->funcs->link_enc_create(dc_ctx, &enc_init_data); - DC_LOG_DC("BIOS object table - DP_IS_USB_C: %d", link->link_enc->features.flags.bits.DP_IS_USB_C); - DC_LOG_DC("BIOS object table - IS_DP2_CAPABLE: %d", link->link_enc->features.flags.bits.IS_DP2_CAPABLE); - if (!link->link_enc) { DC_ERROR("Failed to create link encoder!\n"); goto link_enc_create_fail; } + DC_LOG_DC("BIOS object table - DP_IS_USB_C: %d", link->link_enc->features.flags.bits.DP_IS_USB_C); + DC_LOG_DC("BIOS object table - IS_DP2_CAPABLE: %d", link->link_enc->features.flags.bits.IS_DP2_CAPABLE); + /* Update link encoder tracking variables. These are used for the dynamic * assignment of link encoders to streams. */ From 1bd1fe1109fcd9213494283b01d9421f58e0b6c5 Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 11 Jun 2024 10:36:49 -0600 Subject: [PATCH 082/268] drm/amd/display: Check HDCP returned status [ Upstream commit 5d93060d430b359e16e7c555c8f151ead1ac614b ] [WHAT & HOW] Check mod_hdcp_execute_and_set() return values in authenticated_dp. This fixes 3 CHECKED_RETURN issues reported by Coverity. Reviewed-by: Rodrigo Siqueira Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../amd/display/modules/hdcp/hdcp1_execution.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 1ddb4f5eac8e53..93c0455766ddb0 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -433,17 +433,20 @@ static enum mod_hdcp_status authenticated_dp(struct mod_hdcp *hdcp, } if (status == MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, + if (!mod_hdcp_execute_and_set(mod_hdcp_read_bstatus, &input->bstatus_read, &status, - hdcp, "bstatus_read"); + hdcp, "bstatus_read")) + goto out; if (status == MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_execute_and_set(check_link_integrity_dp, + if (!mod_hdcp_execute_and_set(check_link_integrity_dp, &input->link_integrity_check, &status, - hdcp, "link_integrity_check"); + hdcp, "link_integrity_check")) + goto out; if (status == MOD_HDCP_STATUS_SUCCESS) - mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, + if (!mod_hdcp_execute_and_set(check_no_reauthentication_request_dp, &input->reauth_request_check, &status, - hdcp, "reauth_request_check"); + hdcp, "reauth_request_check")) + goto out; out: return status; } From 8bc7b3ce33e64c74211ed17aec823fc4e523426a Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 21 Jun 2024 17:53:30 +0800 Subject: [PATCH 083/268] drm/amdgpu: Fix smatch static checker warning [ Upstream commit bdbdc7cecd00305dc844a361f9883d3a21022027 ] adev->gfx.imu.funcs could be NULL Signed-off-by: Hawking Zhang Reviewed-by: Likun Gao Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c81e98f0d17ffc..c813cd7b015e14 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4269,11 +4269,11 @@ static int gfx_v11_0_hw_init(void *handle) /* RLC autoload sequence 1: Program rlc ram */ if (adev->gfx.imu.funcs->program_rlc_ram) adev->gfx.imu.funcs->program_rlc_ram(adev); + /* rlc autoload firmware */ + r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); + if (r) + return r; } - /* rlc autoload firmware */ - r = gfx_v11_0_rlc_backdoor_autoload_enable(adev); - if (r) - return r; } else { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { if (adev->gfx.imu.funcs && (amdgpu_dpm > 0)) { From 2521ba3cfa1d1c541e1ba1a32d1b43ad5a8e412f Mon Sep 17 00:00:00 2001 From: Danijel Slivka Date: Mon, 24 Jun 2024 07:58:24 +0200 Subject: [PATCH 084/268] drm/amdgpu: clear RB_OVERFLOW bit when enabling interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit afbf7955ff01e952dbdd465fa25a2ba92d00291c ] Why: Setting IH_RB_WPTR register to 0 will not clear the RB_OVERFLOW bit if RB_ENABLE is not set. How to fix: Set WPTR_OVERFLOW_CLEAR bit after RB_ENABLE bit is set. The RB_ENABLE bit is required to be set, together with WPTR_OVERFLOW_ENABLE bit so that setting WPTR_OVERFLOW_CLEAR bit would clear the RB_OVERFLOW. Signed-off-by: Danijel Slivka Reviewed-by: Christian König Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/ih_v6_0.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index f432dc72df6a9e..725b1a585088db 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -135,6 +135,34 @@ static int ih_v6_0_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + + if (enable) { + /* Unset the CLEAR_OVERFLOW bit to make sure the next step + * is switching the bit from 0 to 1 + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + } + + /* Clear RB_OVERFLOW bit */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + } + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + } + /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); From 4676bacc6eafe1ad53413c3e42781cd324550c4b Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 24 Jun 2024 12:52:59 +0300 Subject: [PATCH 085/268] media: vivid: don't set HDMI TX controls if there are no HDMI outputs [ Upstream commit 17763960b1784578e8fe915304b330922f646209 ] When setting the EDID it would attempt to update two controls that are only present if there is an HDMI output configured. If there isn't any (e.g. when the vivid module is loaded with node_types=1), then calling VIDIOC_S_EDID would crash. Fix this by first checking if outputs are present. Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/test-drivers/vivid/vivid-vid-cap.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/media/test-drivers/vivid/vivid-vid-cap.c b/drivers/media/test-drivers/vivid/vivid-vid-cap.c index a81c17634daa3f..99325bfed64310 100644 --- a/drivers/media/test-drivers/vivid/vivid-vid-cap.c +++ b/drivers/media/test-drivers/vivid/vivid-vid-cap.c @@ -1557,8 +1557,10 @@ int vidioc_s_edid(struct file *file, void *_fh, return -EINVAL; if (edid->blocks == 0) { dev->edid_blocks = 0; - v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, 0); - v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, 0); + if (dev->num_outputs) { + v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, 0); + v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, 0); + } phys_addr = CEC_PHYS_ADDR_INVALID; goto set_phys_addr; } @@ -1582,8 +1584,10 @@ int vidioc_s_edid(struct file *file, void *_fh, display_present |= dev->display_present[i] << j++; - v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, display_present); - v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, display_present); + if (dev->num_outputs) { + v4l2_ctrl_s_ctrl(dev->ctrl_tx_edid_present, display_present); + v4l2_ctrl_s_ctrl(dev->ctrl_tx_hotplug, display_present); + } set_phys_addr: /* TODO: a proper hotplug detect cycle should be emulated here */ From 4ef01846c686d3638c7093195392de13f9ae70e6 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Mon, 24 Jun 2024 12:38:58 +0000 Subject: [PATCH 086/268] vfio/spapr: Always clear TCEs before unsetting the window [ Upstream commit 4ba2fdff2eb174114786784926d0efb6903c88a6 ] The PAPR expects the TCE table to have no entries at the time of unset window(i.e. remove-pe). The TCE clear right now is done before freeing the iommu table. On pSeries, the unset window makes those entries inaccessible to the OS and the H_PUT/GET calls fail on them with H_CONSTRAINED. On PowerNV, this has no side effect as the TCE clear can be done before the DMA window removal as well. Signed-off-by: Shivaprasad G Bhat Signed-off-by: Michael Ellerman Link: https://msgid.link/171923273535.1397.1236742071894414895.stgit@linux.ibm.com Signed-off-by: Sasha Levin --- drivers/vfio/vfio_iommu_spapr_tce.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index a94ec6225d31ad..5f9e7e47707839 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -364,7 +364,6 @@ static void tce_iommu_release(void *iommu_data) if (!tbl) continue; - tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); tce_iommu_free_table(container, tbl); } @@ -720,6 +719,8 @@ static long tce_iommu_remove_window(struct tce_container *container, BUG_ON(!tbl->it_size); + tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); + /* Detach groups from IOMMUs */ list_for_each_entry(tcegrp, &container->group_list, next) { table_group = iommu_group_get_iommudata(tcegrp->grp); @@ -738,7 +739,6 @@ static long tce_iommu_remove_window(struct tce_container *container, } /* Free table */ - tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size); tce_iommu_free_table(container, tbl); container->tables[num] = NULL; @@ -1197,9 +1197,14 @@ static void tce_iommu_release_ownership(struct tce_container *container, return; } - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) - if (container->tables[i]) + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + if (container->tables[i]) { + tce_iommu_clear(container, container->tables[i], + container->tables[i]->it_offset, + container->tables[i]->it_size); table_group->ops->unset_window(table_group, i); + } + } } static long tce_iommu_take_ownership(struct tce_container *container, From 7b645e687086fa1e3739f93c3e5281f8691843c8 Mon Sep 17 00:00:00 2001 From: Eric Joyner Date: Mon, 17 Jun 2024 14:46:25 +0200 Subject: [PATCH 087/268] ice: Check all ice_vsi_rebuild() errors in function [ Upstream commit d47bf9a495cf424fad674321d943123dc12b926d ] Check the return value from ice_vsi_rebuild() and prevent the usage of incorrectly configured VSI. Reviewed-by: Michal Swiatkowski Reviewed-by: Przemek Kitszel Signed-off-by: Eric Joyner Signed-off-by: Karen Ostrowska Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b168a37a5dfff6..3f254ce0d6db76 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3979,13 +3979,17 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) /* set for the next time the netdev is started */ if (!netif_running(vsi->netdev)) { - ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + if (err) + goto rebuild_err; dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n"); goto done; } ice_vsi_close(vsi); - ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + if (err) + goto rebuild_err; ice_for_each_traffic_class(i) { if (vsi->tc_cfg.ena_tc & BIT(i)) @@ -3996,6 +4000,11 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); + goto done; + +rebuild_err: + dev_err(ice_pf_to_dev(pf), "Error during VSI rebuild: %d. Unload and reload the driver.\n", + err); done: clear_bit(ICE_CFG_BUSY, pf->state); return err; From 576d0fb6f8d4bd4695e70eee173a1b9c7bae9572 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Fri, 28 Jun 2024 13:45:29 +0200 Subject: [PATCH 088/268] PCI: keystone: Add workaround for Errata #i2037 (AM65x SR 1.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 86f271f22bbb6391410a07e08d6ca3757fda01fa ] Errata #i2037 in AM65x/DRA80xM Processors Silicon Revision 1.0 (SPRZ452D_July 2018_Revised December 2019 [1]) mentions when an inbound PCIe TLP spans more than two internal AXI 128-byte bursts, the bus may corrupt the packet payload and the corrupt data may cause associated applications or the processor to hang. The workaround for Errata #i2037 is to limit the maximum read request size and maximum payload size to 128 bytes. Add workaround for Errata #i2037 here. The errata and workaround is applicable only to AM65x SR 1.0 and later versions of the silicon will have this fixed. [1] -> https://www.ti.com/lit/er/sprz452i/sprz452i.pdf Link: https://lore.kernel.org/linux-pci/16e1fcae-1ea7-46be-b157-096e05661b15@siemens.com Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Achal Verma Signed-off-by: Vignesh Raghavendra Signed-off-by: Jan Kiszka Signed-off-by: Krzysztof Wilczyński Reviewed-by: Siddharth Vadapalli Signed-off-by: Sasha Levin --- drivers/pci/controller/dwc/pci-keystone.c | 44 ++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pci-keystone.c b/drivers/pci/controller/dwc/pci-keystone.c index 54a3c7f29f78a6..c1dedc83759c60 100644 --- a/drivers/pci/controller/dwc/pci-keystone.c +++ b/drivers/pci/controller/dwc/pci-keystone.c @@ -34,6 +34,11 @@ #define PCIE_DEVICEID_SHIFT 16 /* Application registers */ +#define PID 0x000 +#define RTL GENMASK(15, 11) +#define RTL_SHIFT 11 +#define AM6_PCI_PG1_RTL_VER 0x15 + #define CMD_STATUS 0x004 #define LTSSM_EN_VAL BIT(0) #define OB_XLAT_EN_VAL BIT(1) @@ -104,6 +109,8 @@ #define to_keystone_pcie(x) dev_get_drvdata((x)->dev) +#define PCI_DEVICE_ID_TI_AM654X 0xb00c + struct ks_pcie_of_data { enum dw_pcie_device_mode mode; const struct dw_pcie_host_ops *host_ops; @@ -518,7 +525,11 @@ static int ks_pcie_start_link(struct dw_pcie *pci) static void ks_pcie_quirk(struct pci_dev *dev) { struct pci_bus *bus = dev->bus; + struct keystone_pcie *ks_pcie; + struct device *bridge_dev; struct pci_dev *bridge; + u32 val; + static const struct pci_device_id rc_pci_devids[] = { { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2HK), .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, @@ -530,6 +541,11 @@ static void ks_pcie_quirk(struct pci_dev *dev) .class = PCI_CLASS_BRIDGE_PCI_NORMAL, .class_mask = ~0, }, { 0, }, }; + static const struct pci_device_id am6_pci_devids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_AM654X), + .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + { 0, }, + }; if (pci_is_root_bus(bus)) bridge = dev; @@ -551,10 +567,36 @@ static void ks_pcie_quirk(struct pci_dev *dev) */ if (pci_match_id(rc_pci_devids, bridge)) { if (pcie_get_readrq(dev) > 256) { - dev_info(&dev->dev, "limiting MRRS to 256\n"); + dev_info(&dev->dev, "limiting MRRS to 256 bytes\n"); pcie_set_readrq(dev, 256); } } + + /* + * Memory transactions fail with PCI controller in AM654 PG1.0 + * when MRRS is set to more than 128 bytes. Force the MRRS to + * 128 bytes in all downstream devices. + */ + if (pci_match_id(am6_pci_devids, bridge)) { + bridge_dev = pci_get_host_bridge_device(dev); + if (!bridge_dev && !bridge_dev->parent) + return; + + ks_pcie = dev_get_drvdata(bridge_dev->parent); + if (!ks_pcie) + return; + + val = ks_pcie_app_readl(ks_pcie, PID); + val &= RTL; + val >>= RTL_SHIFT; + if (val != AM6_PCI_PG1_RTL_VER) + return; + + if (pcie_get_readrq(dev) > 128) { + dev_info(&dev->dev, "limiting MRRS to 128 bytes\n"); + pcie_set_readrq(dev, 128); + } + } } DECLARE_PCI_FIXUP_ENABLE(PCI_ANY_ID, PCI_ANY_ID, ks_pcie_quirk); From 6970213c7e7dcbc68b578fef247062955ab8af01 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 9 Jun 2024 16:47:53 -0700 Subject: [PATCH 089/268] Input: ili210x - use kvmalloc() to allocate buffer for firmware update [ Upstream commit 17f5eebf6780eee50f887542e1833fda95f53e4d ] Allocating a contiguous buffer of 64K may fail if memory is sufficiently fragmented, and may cause OOM kill of an unrelated process. However we do not need to have contiguous memory. We also do not need to zero out the buffer since it will be overwritten with firmware data. Switch to using kvmalloc() instead of kzalloc(). Link: https://lore.kernel.org/r/20240609234757.610273-1-dmitry.torokhov@gmail.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/touchscreen/ili210x.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/touchscreen/ili210x.c b/drivers/input/touchscreen/ili210x.c index ae7ba0c419f5a7..6a77babcf7228e 100644 --- a/drivers/input/touchscreen/ili210x.c +++ b/drivers/input/touchscreen/ili210x.c @@ -597,7 +597,7 @@ static int ili251x_firmware_to_buffer(const struct firmware *fw, * once, copy them all into this buffer at the right locations, and then * do all operations on this linear buffer. */ - fw_buf = kzalloc(SZ_64K, GFP_KERNEL); + fw_buf = kvmalloc(SZ_64K, GFP_KERNEL); if (!fw_buf) return -ENOMEM; @@ -627,7 +627,7 @@ static int ili251x_firmware_to_buffer(const struct firmware *fw, return 0; err_big: - kfree(fw_buf); + kvfree(fw_buf); return error; } @@ -870,7 +870,7 @@ static ssize_t ili210x_firmware_update_store(struct device *dev, ili210x_hardware_reset(priv->reset_gpio); dev_dbg(dev, "Firmware update ended, error=%i\n", error); enable_irq(client->irq); - kfree(fwbuf); + kvfree(fwbuf); return error; } From b4987d0236d5370742ac32bc4025839f69432395 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Fri, 21 Jun 2024 09:35:22 +0800 Subject: [PATCH 090/268] media: qcom: camss: Add check for v4l2_fwnode_endpoint_parse [ Upstream commit 4caf6d93d9f2c11d6441c64e1c549c445fa322ed ] Add check for the return value of v4l2_fwnode_endpoint_parse() and return the error if it fails in order to catch the error. Signed-off-by: Chen Ni Signed-off-by: Hans Verkuil Signed-off-by: Sasha Levin --- drivers/media/platform/qcom/camss/camss.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/qcom/camss/camss.c b/drivers/media/platform/qcom/camss/camss.c index c6df862c79e39b..8fa6750b5b421b 100644 --- a/drivers/media/platform/qcom/camss/camss.c +++ b/drivers/media/platform/qcom/camss/camss.c @@ -1038,8 +1038,11 @@ static int camss_of_parse_endpoint_node(struct device *dev, struct v4l2_mbus_config_mipi_csi2 *mipi_csi2; struct v4l2_fwnode_endpoint vep = { { 0 } }; unsigned int i; + int ret; - v4l2_fwnode_endpoint_parse(of_fwnode_handle(node), &vep); + ret = v4l2_fwnode_endpoint_parse(of_fwnode_handle(node), &vep); + if (ret) + return ret; csd->interface.csiphy_id = vep.base.port; From dd48992a8a90e787df1cf76b2518e8212c94e0d0 Mon Sep 17 00:00:00 2001 From: Jules Irenge Date: Sun, 12 May 2024 23:31:21 +0100 Subject: [PATCH 091/268] pcmcia: Use resource_size function on resource object [ Upstream commit 24a025497e7e883bd2adef5d0ece1e9b9268009f ] Cocinnele reports a warning WARNING: Suspicious code. resource_size is maybe missing with root The root cause is the function resource_size is not used when needed Use resource_size() on variable "root" of type resource Signed-off-by: Jules Irenge Signed-off-by: Dominik Brodowski Signed-off-by: Sasha Levin --- drivers/pcmcia/yenta_socket.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 1365eaa20ff49a..ff169124929cc2 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -638,11 +638,11 @@ static int yenta_search_one_res(struct resource *root, struct resource *res, start = PCIBIOS_MIN_CARDBUS_IO; end = ~0U; } else { - unsigned long avail = root->end - root->start; + unsigned long avail = resource_size(root); int i; size = BRIDGE_MEM_MAX; - if (size > avail/8) { - size = (avail+1)/8; + if (size > (avail - 1) / 8) { + size = avail / 8; /* round size down to next power of 2 */ i = 0; while ((size /= 2) != 0) From dfafee0a7b51c7c9612edd2d991401294964d02f Mon Sep 17 00:00:00 2001 From: Alex Hung Date: Tue, 18 Jun 2024 16:21:20 -0600 Subject: [PATCH 092/268] drm/amd/display: Check denominator pbn_div before used [ Upstream commit 116a678f3a9abc24f5c9d2525b7393d18d9eb58e ] [WHAT & HOW] A denominator cannot be 0, and is checked before used. This fixes 1 DIVIDE_BY_ZERO issue reported by Coverity. Reviewed-by: Harry Wentland Signed-off-by: Jerry Zuo Signed-off-by: Alex Hung Tested-by: Daniel Wheeler Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 44c1556838240b..f0ebf686b06f22 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -6937,7 +6937,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state, } } - if (j == dc_state->stream_count) + if (j == dc_state->stream_count || pbn_div == 0) continue; slot_num = DIV_ROUND_UP(pbn, pbn_div); From 9a41def4c48f92d386fdadc332a91c379257aa6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 1 Jun 2024 16:36:27 -0400 Subject: [PATCH 093/268] drm/amdgpu: check for LINEAR_ALIGNED correctly in check_tiling_flags_gfx6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 11317d2963fa79767cd7c6231a00a9d77f2e0f54 ] Fix incorrect check. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 578aeba49ea8e2..82ad2b01f2e9ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -909,8 +909,7 @@ static int check_tiling_flags_gfx6(struct amdgpu_framebuffer *afb) { u64 micro_tile_mode; - /* Zero swizzle mode means linear */ - if (AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0) + if (AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) == 1) /* LINEAR_ALIGNED */ return 0; micro_tile_mode = AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE); From 4377b79323df62eb5d310354f19b4d130ff58d50 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 22 Jul 2024 12:28:42 -0700 Subject: [PATCH 094/268] can: bcm: Remove proc entry when dev is unregistered. [ Upstream commit 76fe372ccb81b0c89b6cd2fec26e2f38c958be85 ] syzkaller reported a warning in bcm_connect() below. [0] The repro calls connect() to vxcan1, removes vxcan1, and calls connect() with ifindex == 0. Calling connect() for a BCM socket allocates a proc entry. Then, bcm_sk(sk)->bound is set to 1 to prevent further connect(). However, removing the bound device resets bcm_sk(sk)->bound to 0 in bcm_notify(). The 2nd connect() tries to allocate a proc entry with the same name and sets NULL to bcm_sk(sk)->bcm_proc_read, leaking the original proc entry. Since the proc entry is available only for connect()ed sockets, let's clean up the entry when the bound netdev is unregistered. [0]: proc_dir_entry 'can-bcm/2456' already registered WARNING: CPU: 1 PID: 394 at fs/proc/generic.c:376 proc_register+0x645/0x8f0 fs/proc/generic.c:375 Modules linked in: CPU: 1 PID: 394 Comm: syz-executor403 Not tainted 6.10.0-rc7-g852e42cc2dd4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014 RIP: 0010:proc_register+0x645/0x8f0 fs/proc/generic.c:375 Code: 00 00 00 00 00 48 85 ed 0f 85 97 02 00 00 4d 85 f6 0f 85 9f 02 00 00 48 c7 c7 9b cb cf 87 48 89 de 4c 89 fa e8 1c 6f eb fe 90 <0f> 0b 90 90 48 c7 c7 98 37 99 89 e8 cb 7e 22 05 bb 00 00 00 10 48 RSP: 0018:ffa0000000cd7c30 EFLAGS: 00010246 RAX: 9e129be1950f0200 RBX: ff1100011b51582c RCX: ff1100011857cd80 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000002 RBP: 0000000000000000 R08: ffd400000000000f R09: ff1100013e78cac0 R10: ffac800000cd7980 R11: ff1100013e12b1f0 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: ff1100011a99a2ec FS: 00007fbd7086f740(0000) GS:ff1100013fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000200071c0 CR3: 0000000118556004 CR4: 0000000000771ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe07f0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: proc_create_net_single+0x144/0x210 fs/proc/proc_net.c:220 bcm_connect+0x472/0x840 net/can/bcm.c:1673 __sys_connect_file net/socket.c:2049 [inline] __sys_connect+0x5d2/0x690 net/socket.c:2066 __do_sys_connect net/socket.c:2076 [inline] __se_sys_connect net/socket.c:2073 [inline] __x64_sys_connect+0x8f/0x100 net/socket.c:2073 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xd9/0x1c0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7fbd708b0e5d Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 9f 1b 00 f7 d8 64 89 01 48 RSP: 002b:00007fff8cd33f08 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fbd708b0e5d RDX: 0000000000000010 RSI: 0000000020000040 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000040 R09: 0000000000000040 R10: 0000000000000040 R11: 0000000000000246 R12: 00007fff8cd34098 R13: 0000000000401280 R14: 0000000000406de8 R15: 00007fbd70ab9000 remove_proc_entry: removing non-empty directory 'net/can-bcm', leaking at least '2456' Fixes: ffd980f976e7 ("[CAN]: Add broadcast manager (bcm) protocol") Reported-by: syzkaller Signed-off-by: Kuniyuki Iwashima Reviewed-by: Simon Horman Link: https://lore.kernel.org/all/20240722192842.37421-1-kuniyu@amazon.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/bcm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/can/bcm.c b/net/can/bcm.c index 9168114fc87f7b..00208ee13e578b 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1428,6 +1428,10 @@ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { +#if IS_ENABLED(CONFIG_PROC_FS) + if (sock_net(sk)->can.bcmproc_dir && bo->bcm_proc_read) + remove_proc_entry(bo->procname, sock_net(sk)->can.bcmproc_dir); +#endif bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; From db5aca78e2608b20d4d3783cf9947de48977ba45 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 5 Aug 2024 15:01:58 +0100 Subject: [PATCH 095/268] can: m_can: Release irq on error in m_can_open [ Upstream commit 06d4ef3056a7ac31be331281bb7a6302ef5a7f8a ] It appears that the irq requested in m_can_open() may be leaked if an error subsequently occurs: if m_can_start() fails. Address this by calling free_irq in the unwind path for such cases. Flagged by Smatch. Compile tested only. Fixes: eaacfeaca7ad ("can: m_can: Call the RAM init directly from m_can_chip_config") Acked-by: Marc Kleine-Budde Signed-off-by: Simon Horman Link: https://lore.kernel.org/all/20240805-mcan-irq-v2-1-7154c0484819@kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/m_can/m_can.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 2395b1225cc8a7..fb77fd74de27fb 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -1871,7 +1871,7 @@ static int m_can_open(struct net_device *dev) /* start the m_can controller */ err = m_can_start(dev); if (err) - goto exit_irq_fail; + goto exit_start_fail; if (!cdev->is_peripheral) napi_enable(&cdev->napi); @@ -1880,6 +1880,9 @@ static int m_can_open(struct net_device *dev) return 0; +exit_start_fail: + if (cdev->is_peripheral || dev->irq) + free_irq(dev->irq, dev); exit_irq_fail: if (cdev->is_peripheral) destroy_workqueue(cdev->tx_wq); From 0e52907493fd53538acd82e8687f9eb27fc5dc7d Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 5 Jul 2024 17:28:27 +0200 Subject: [PATCH 096/268] can: mcp251xfd: fix ring configuration when switching from CAN-CC to CAN-FD mode [ Upstream commit 50ea5449c56310d2d31c28ba91a59232116d3c1e ] If the ring (rx, tx) and/or coalescing parameters (rx-frames-irq, tx-frames-irq) have been configured while the interface was in CAN-CC mode, but the interface is brought up in CAN-FD mode, the ring parameters might be too big. Use the default CAN-FD values in this case. Fixes: 9263c2e92be9 ("can: mcp251xfd: ring: add support for runtime configurable RX/TX ring parameters") Link: https://lore.kernel.org/all/20240805-mcp251xfd-fix-ringconfig-v1-1-72086f0ca5ee@pengutronix.de Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c | 11 +++++++++- .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 20 ++++++++++++++++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c index 9e8e82cdba4612..61b0d6fa52dd80 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ram.c @@ -97,7 +97,16 @@ void can_ram_get_layout(struct can_ram_layout *layout, if (ring) { u8 num_rx_coalesce = 0, num_tx_coalesce = 0; - num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, ring->rx_pending); + /* If the ring parameters have been configured in + * CAN-CC mode, but and we are in CAN-FD mode now, + * they might be to big. Use the default CAN-FD values + * in this case. + */ + num_rx = ring->rx_pending; + if (num_rx > layout->max_rx) + num_rx = layout->default_rx; + + num_rx = can_ram_rounddown_pow_of_two(config, &config->rx, 0, num_rx); /* The ethtool doc says: * To disable coalescing, set usecs = 0 and max_frames = 1. diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 4cb79a4f246124..3a941a71c78fb8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -468,11 +468,25 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) /* switching from CAN-2.0 to CAN-FD mode or vice versa */ if (fd_mode != test_bit(MCP251XFD_FLAGS_FD_MODE, priv->flags)) { + const struct ethtool_ringparam ring = { + .rx_pending = priv->rx_obj_num, + .tx_pending = priv->tx->obj_num, + }; + const struct ethtool_coalesce ec = { + .rx_coalesce_usecs_irq = priv->rx_coalesce_usecs_irq, + .rx_max_coalesced_frames_irq = priv->rx_obj_num_coalesce_irq, + .tx_coalesce_usecs_irq = priv->tx_coalesce_usecs_irq, + .tx_max_coalesced_frames_irq = priv->tx_obj_num_coalesce_irq, + }; struct can_ram_layout layout; - can_ram_get_layout(&layout, &mcp251xfd_ram_config, NULL, NULL, fd_mode); - priv->rx_obj_num = layout.default_rx; - tx_ring->obj_num = layout.default_tx; + can_ram_get_layout(&layout, &mcp251xfd_ram_config, &ring, &ec, fd_mode); + + priv->rx_obj_num = layout.cur_rx; + priv->rx_obj_num_coalesce_irq = layout.rx_coalesce; + + tx_ring->obj_num = layout.cur_tx; + priv->tx_obj_num_coalesce_irq = layout.tx_coalesce; } if (fd_mode) { From 4de4e53bbd133217e5f74750a6a99e96ffd0c2de Mon Sep 17 00:00:00 2001 From: Matthew Maurer Date: Thu, 28 Sep 2023 20:49:25 +0000 Subject: [PATCH 097/268] rust: Use awk instead of recent xargs [ Upstream commit 45f97e6385cad6d0e48a27ddcd08793bb4d35851 ] `awk` is already required by the kernel build, and the `xargs` feature used in current Rust detection is not present in all `xargs` (notably, toybox based xargs, used in the Android kernel build). Signed-off-by: Matthew Maurer Reviewed-by: Alice Ryhl Tested-by: Alice Ryhl Reviewed-by: Martin Rodriguez Reboredo Link: https://lore.kernel.org/r/20230928205045.2375899-1-mmaurer@google.com Signed-off-by: Miguel Ojeda Stable-dep-of: b8673d56935c ("rust: kbuild: fix export of bss symbols") Signed-off-by: Sasha Levin --- rust/Makefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rust/Makefile b/rust/Makefile index e5619f25b55cac..7e33ebe8a9f48e 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -363,9 +363,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers.c FORCE quiet_cmd_exports = EXPORTS $@ cmd_exports = \ $(NM) -p --defined-only $< \ - | grep -E ' (T|R|D) ' | cut -d ' ' -f 3 \ - | xargs -Isymbol \ - echo 'EXPORT_SYMBOL_RUST_GPL(symbol);' > $@ + | awk '/ (T|R|D) / {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) From b4b2115d1f500986ff36720b7ffbfacb410bf7c3 Mon Sep 17 00:00:00 2001 From: Andreas Hindborg Date: Thu, 15 Aug 2024 07:49:30 +0000 Subject: [PATCH 098/268] rust: kbuild: fix export of bss symbols [ Upstream commit b8673d56935c32a4e0a1a0b40951fdd313dbf340 ] Symbols in the bss segment are not currently exported. This is a problem for Rust modules that link against statics, that are resident in the kernel image. Thus export symbols in the bss segment. Fixes: 2f7ab1267dc9 ("Kbuild: add Rust support") Signed-off-by: Andreas Hindborg Reviewed-by: Alice Ryhl Tested-by: Alice Ryhl Reviewed-by: Gary Guo Link: https://lore.kernel.org/r/20240815074519.2684107-2-nmi@metaspace.dk [ Reworded slightly. - Miguel ] Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- rust/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/Makefile b/rust/Makefile index 7e33ebe8a9f48e..333b9a482473d6 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -363,7 +363,7 @@ $(obj)/bindings/bindings_helpers_generated.rs: $(src)/helpers.c FORCE quiet_cmd_exports = EXPORTS $@ cmd_exports = \ $(NM) -p --defined-only $< \ - | awk '/ (T|R|D) / {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ + | awk '/ (T|R|D|B) / {printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3}' > $@ $(obj)/exports_core_generated.h: $(obj)/core.o FORCE $(call if_changed,exports) From 0a9423f99ded38b32bd1864347332cc557e6b141 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 28 Aug 2024 21:08:25 +0100 Subject: [PATCH 099/268] cifs: Fix FALLOC_FL_ZERO_RANGE to preflush buffered part of target region [ Upstream commit 91d1dfae464987aaf6c79ff51d8674880fb3be77 ] Under certain conditions, the range to be cleared by FALLOC_FL_ZERO_RANGE may only be buffered locally and not yet have been flushed to the server. For example: xfs_io -f -t -c "pwrite -S 0x41 0 4k" \ -c "pwrite -S 0x42 4k 4k" \ -c "fzero 0 4k" \ -c "pread -v 0 8k" /xfstest.test/foo will write two 4KiB blocks of data, which get buffered in the pagecache, and then fallocate() is used to clear the first 4KiB block on the server - but we don't flush the data first, which means the EOF position on the server is wrong, and so the FSCTL_SET_ZERO_DATA RPC fails (and xfs_io ignores the error), but then when we try to read it, we see the old data. Fix this by preflushing any part of the target region that above the server's idea of the EOF position to force the server to update its EOF position. Note, however, that we don't want to simply expand the file by moving the EOF before doing the FSCTL_SET_ZERO_DATA[*] because someone else might see the zeroed region or if the RPC fails we then have to try to clean it up or risk getting corruption. [*] And we have to move the EOF first otherwise FSCTL_SET_ZERO_DATA won't do what we want. This fixes the generic/008 xfstest. [!] Note: A better way to do this might be to split the operation into two parts: we only do FSCTL_SET_ZERO_DATA for the part of the range below the server's EOF and then, if that worked, invalidate the buffered pages for the part above the range. Fixes: 6b69040247e1 ("cifs/smb3: Fix data inconsistent when zero file range") Signed-off-by: David Howells cc: Steve French cc: Zhang Xiaoxu cc: Pavel Shilovsky cc: Paulo Alcantara cc: Shyam Prasad N cc: Rohith Surabattula cc: Jeff Layton cc: linux-cifs@vger.kernel.org cc: linux-mm@kvack.org Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2ops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 012d6ec12a6918..acd5d7d7935257 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -3186,13 +3186,15 @@ static long smb3_zero_data(struct file *file, struct cifs_tcon *tcon, } static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, - loff_t offset, loff_t len, bool keep_size) + unsigned long long offset, unsigned long long len, + bool keep_size) { struct cifs_ses *ses = tcon->ses; struct inode *inode = file_inode(file); struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifsFileInfo *cfile = file->private_data; - unsigned long long new_size; + struct netfs_inode *ictx = netfs_inode(inode); + unsigned long long i_size, new_size, remote_size; long rc; unsigned int xid; @@ -3204,6 +3206,16 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); + i_size = i_size_read(inode); + remote_size = ictx->remote_i_size; + if (offset + len >= remote_size && offset < i_size) { + unsigned long long top = umin(offset + len, i_size); + + rc = filemap_write_and_wait_range(inode->i_mapping, offset, top - 1); + if (rc < 0) + goto zero_range_exit; + } + /* * We zero the range through ioctl, so we need remove the page caches * first, otherwise the data may be inconsistent with the server. From 4dde04370503a83a8cd7e9f37a3ddc6c9a88c35d Mon Sep 17 00:00:00 2001 From: Daiwei Li Date: Tue, 13 Aug 2024 21:55:53 -0700 Subject: [PATCH 100/268] igb: Fix not clearing TimeSync interrupts for 82580 [ Upstream commit ba8cf80724dbc09825b52498e4efacb563935408 ] 82580 NICs have a hardware bug that makes it necessary to write into the TSICR (TimeSync Interrupt Cause) register to clear it: https://lore.kernel.org/all/CDCB8BE0.1EC2C%25matthew.vick@intel.com/ Add a conditional so only for 82580 we write into the TSICR register, so we don't risk losing events for other models. Without this change, when running ptp4l with an Intel 82580 card, I get the following output: > timed out while polling for tx timestamp increasing tx_timestamp_timeout or > increasing kworker priority may correct this issue, but a driver bug likely > causes it This goes away with this change. This (partially) reverts commit ee14cc9ea19b ("igb: Fix missing time sync events"). Fixes: ee14cc9ea19b ("igb: Fix missing time sync events") Closes: https://lore.kernel.org/intel-wired-lan/CAN0jFd1kO0MMtOh8N2Ztxn6f7vvDKp2h507sMryobkBKe=xk=w@mail.gmail.com/ Tested-by: Daiwei Li Suggested-by: Vinicius Costa Gomes Signed-off-by: Daiwei Li Acked-by: Vinicius Costa Gomes Reviewed-by: Kurt Kanzenbach Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igb/igb_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 8c8894ef33886d..fa268d7bd1bc3c 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6985,10 +6985,20 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) static void igb_tsync_interrupt(struct igb_adapter *adapter) { + const u32 mask = (TSINTR_SYS_WRAP | E1000_TSICR_TXTS | + TSINTR_TT0 | TSINTR_TT1 | + TSINTR_AUTT0 | TSINTR_AUTT1); struct e1000_hw *hw = &adapter->hw; u32 tsicr = rd32(E1000_TSICR); struct ptp_clock_event event; + if (hw->mac.type == e1000_82580) { + /* 82580 has a hardware bug that requires an explicit + * write to clear the TimeSync interrupt cause. + */ + wr32(E1000_TSICR, tsicr & mask); + } + if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; if (adapter->ptp_caps.pps) From efe8effe138044a4747d1112ebb8c454d1663723 Mon Sep 17 00:00:00 2001 From: Dawid Osuchowski Date: Wed, 21 Aug 2024 18:06:40 +0200 Subject: [PATCH 101/268] ice: Add netif_device_attach/detach into PF reset flow [ Upstream commit d11a67634227f9f9da51938af085fb41a733848f ] Ethtool callbacks can be executed while reset is in progress and try to access deleted resources, e.g. getting coalesce settings can result in a NULL pointer dereference seen below. Reproduction steps: Once the driver is fully initialized, trigger reset: # echo 1 > /sys/class/net//device/reset when reset is in progress try to get coalesce settings using ethtool: # ethtool -c BUG: kernel NULL pointer dereference, address: 0000000000000020 PGD 0 P4D 0 Oops: Oops: 0000 [#1] PREEMPT SMP PTI CPU: 11 PID: 19713 Comm: ethtool Tainted: G S 6.10.0-rc7+ #7 RIP: 0010:ice_get_q_coalesce+0x2e/0xa0 [ice] RSP: 0018:ffffbab1e9bcf6a8 EFLAGS: 00010206 RAX: 000000000000000c RBX: ffff94512305b028 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9451c3f2e588 RDI: ffff9451c3f2e588 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: ffff9451c3f2e580 R11: 000000000000001f R12: ffff945121fa9000 R13: ffffbab1e9bcf760 R14: 0000000000000013 R15: ffffffff9e65dd40 FS: 00007faee5fbe740(0000) GS:ffff94546fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000020 CR3: 0000000106c2e005 CR4: 00000000001706f0 Call Trace: ice_get_coalesce+0x17/0x30 [ice] coalesce_prepare_data+0x61/0x80 ethnl_default_doit+0xde/0x340 genl_family_rcv_msg_doit+0xf2/0x150 genl_rcv_msg+0x1b3/0x2c0 netlink_rcv_skb+0x5b/0x110 genl_rcv+0x28/0x40 netlink_unicast+0x19c/0x290 netlink_sendmsg+0x222/0x490 __sys_sendto+0x1df/0x1f0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x82/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e RIP: 0033:0x7faee60d8e27 Calling netif_device_detach() before reset makes the net core not call the driver when ethtool command is issued, the attempt to execute an ethtool command during reset will result in the following message: netlink error: No such device instead of NULL pointer dereference. Once reset is done and ice_rebuild() is executing, the netif_device_attach() is called to allow for ethtool operations to occur again in a safe manner. Fixes: fcea6f3da546 ("ice: Add stats and ethtool support") Suggested-by: Jakub Kicinski Reviewed-by: Igor Bagnucki Signed-off-by: Dawid Osuchowski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Reviewed-by: Michal Schmidt Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3f254ce0d6db76..ffe6e74b9fea24 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -606,6 +606,9 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); } } + + if (vsi->netdev) + netif_device_detach(vsi->netdev); skip: /* clear SW filtering DB */ @@ -7295,6 +7298,7 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf) */ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) { + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct device *dev = ice_pf_to_dev(pf); struct ice_hw *hw = &pf->hw; bool dvm; @@ -7447,6 +7451,9 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) ice_rebuild_arfs(pf); } + if (vsi && vsi->netdev) + netif_device_attach(vsi->netdev); + ice_update_pf_netdev_link(pf); /* tell the firmware we are up */ From dc2694e474b708115128fe5418765b37108cb381 Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Fri, 30 Aug 2024 09:54:28 +0300 Subject: [PATCH 102/268] platform/x86: dell-smbios: Fix error path in dell_smbios_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ffc17e1479e8e9459b7afa80e5d9d40d0dd78abb ] In case of error in build_tokens_sysfs(), all the memory that has been allocated is freed at end of this function. But then free_group() is called which performs memory deallocation again. Also, instead of free_group() call, there should be exit_dell_smbios_smm() and exit_dell_smbios_wmi() calls, since there is initialization, but there is no release of resources in case of an error. Fix these issues by replacing free_group() call with exit_dell_smbios_wmi() and exit_dell_smbios_smm(). Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 33b9ca1e53b4 ("platform/x86: dell-smbios: Add a sysfs interface for SMBIOS tokens") Signed-off-by: Aleksandr Mishin Link: https://lore.kernel.org/r/20240830065428.9544-1-amishin@t-argos.ru Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen Signed-off-by: Sasha Levin --- drivers/platform/x86/dell/dell-smbios-base.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index 86b95206cb1bd6..6fb538a1386894 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -590,7 +590,10 @@ static int __init dell_smbios_init(void) return 0; fail_sysfs: - free_group(platform_device); + if (!wmi) + exit_dell_smbios_wmi(); + if (!smm) + exit_dell_smbios_smm(); fail_create_group: platform_device_del(platform_device); From c5e236744db66c2eb02ac674c5e4cae0fa2abc1a Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Fri, 30 Aug 2024 07:35:12 -0700 Subject: [PATCH 103/268] regulator: core: Stub devm_regulator_bulk_get_const() if !CONFIG_REGULATOR [ Upstream commit 1a5caec7f80ca2e659c03f45378ee26915f4eda2 ] When adding devm_regulator_bulk_get_const() I missed adding a stub for when CONFIG_REGULATOR is not enabled. Under certain conditions (like randconfig testing) this can cause the compiler to reports errors like: error: implicit declaration of function 'devm_regulator_bulk_get_const'; did you mean 'devm_regulator_bulk_get_enable'? Add the stub. Fixes: 1de452a0edda ("regulator: core: Allow drivers to define their init data as const") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408301813.TesFuSbh-lkp@intel.com/ Cc: Neil Armstrong Signed-off-by: Douglas Anderson Link: https://patch.msgid.link/20240830073511.1.Ib733229a8a19fad8179213c05e1af01b51e42328@changeid Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- include/linux/regulator/consumer.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 2c526c8d10cc46..25d0684d37b3e3 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -489,6 +489,14 @@ static inline int of_regulator_bulk_get_all(struct device *dev, struct device_no return 0; } +static inline int devm_regulator_bulk_get_const( + struct device *dev, int num_consumers, + const struct regulator_bulk_data *in_consumers, + struct regulator_bulk_data **out_consumers) +{ + return 0; +} + static inline int regulator_bulk_enable(int num_consumers, struct regulator_bulk_data *consumers) { From c1fb6226795d175800719cd3f91d5764e8e72b71 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 14 Jun 2024 17:15:19 +0200 Subject: [PATCH 104/268] can: kvaser_pciefd: Skip redundant NULL pointer check in ISR [ Upstream commit ac765219c2c4e44f29063724c8d36435a3e61985 ] This check is already done at the creation of the net devices in kvaser_pciefd_setup_can_ctrls called from kvaser_pciefd_probe. If it fails, the driver won't load, so there should be no need to repeat the check inside the ISR. The number of channels is read from the FPGA and should be trusted. Signed-off-by: Martin Jocic Link: https://lore.kernel.org/all/20240614151524.2718287-3-martin.jocic@kvaser.com Signed-off-by: Marc Kleine-Budde Stable-dep-of: dd885d90c047 ("can: kvaser_pciefd: Use a single write when releasing RX buffers") Signed-off-by: Sasha Levin --- drivers/net/can/kvaser_pciefd.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a57005faa04f51..076fc2f5b34bba 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1641,12 +1641,6 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { - if (!pcie->can[i]) { - dev_err(&pcie->pci->dev, - "IRQ mask points to unallocated controller\n"); - break; - } - /* Check that mask matches channel (i) IRQ mask */ if (board_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); From 42408507362deb546cd0f04109da0b83d074722b Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 14 Jun 2024 17:15:20 +0200 Subject: [PATCH 105/268] can: kvaser_pciefd: Remove unnecessary comment [ Upstream commit 11d186697ceb10b68c6a1fd505635346b1ccd055 ] The code speaks for itself. Signed-off-by: Martin Jocic Link: https://lore.kernel.org/all/20240614151524.2718287-4-martin.jocic@kvaser.com Signed-off-by: Marc Kleine-Budde Stable-dep-of: dd885d90c047 ("can: kvaser_pciefd: Use a single write when releasing RX buffers") Signed-off-by: Sasha Levin --- drivers/net/can/kvaser_pciefd.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 076fc2f5b34bba..00cfa23a8dcf2c 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1641,7 +1641,6 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { - /* Check that mask matches channel (i) IRQ mask */ if (board_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } From 00e4c69422c05724bd826554cfcce3c32b2bf3b0 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 14 Jun 2024 17:15:23 +0200 Subject: [PATCH 106/268] can: kvaser_pciefd: Rename board_irq to pci_irq [ Upstream commit cbf88a6ba7bb6ce0d3131b119298f73bd7b18459 ] Rename the variable name board_irq in the ISR to pci_irq to be more specific and to match the macro by which it is read. Signed-off-by: Martin Jocic Link: https://lore.kernel.org/all/20240614151524.2718287-7-martin.jocic@kvaser.com Signed-off-by: Marc Kleine-Budde Stable-dep-of: dd885d90c047 ("can: kvaser_pciefd: Use a single write when releasing RX buffers") Signed-off-by: Sasha Levin --- drivers/net/can/kvaser_pciefd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 00cfa23a8dcf2c..a933b4c8165c5d 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1631,17 +1631,17 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) { struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; - u32 board_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); + u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); int i; - if (!(board_irq & irq_mask->all)) + if (!(pci_irq & irq_mask->all)) return IRQ_NONE; - if (board_irq & irq_mask->kcan_rx0) + if (pci_irq & irq_mask->kcan_rx0) kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { - if (board_irq & irq_mask->kcan_tx[i]) + if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } From 6587b387cd543716050cf10f318c64f52e982613 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Thu, 20 Jun 2024 20:13:19 +0200 Subject: [PATCH 107/268] can: kvaser_pciefd: Move reset of DMA RX buffers to the end of the ISR [ Upstream commit 48f827d4f48f5243e37b9240029ce3f456d1f490 ] A new interrupt is triggered by resetting the DMA RX buffers. Since MSI interrupts are faster than legacy interrupts, the reset of the DMA buffers must be moved to the very end of the ISR, otherwise a new MSI interrupt will be masked by the current one. Signed-off-by: Martin Jocic Link: https://lore.kernel.org/all/20240620181320.235465-2-martin.jocic@kvaser.com Signed-off-by: Marc Kleine-Budde Stable-dep-of: dd885d90c047 ("can: kvaser_pciefd: Use a single write when releasing RX buffers") Signed-off-by: Sasha Levin --- drivers/net/can/kvaser_pciefd.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index a933b4c8165c5d..96b6e3d13e67d8 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1580,23 +1580,15 @@ static int kvaser_pciefd_read_buffer(struct kvaser_pciefd *pcie, int dma_buf) return res; } -static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) +static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) { u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) kvaser_pciefd_read_buffer(pcie, 0); - /* Reset DMA buffer 0 */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } - if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { + if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) kvaser_pciefd_read_buffer(pcie, 1); - /* Reset DMA buffer 1 */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } if (irq & KVASER_PCIEFD_SRB_IRQ_DOF0 || irq & KVASER_PCIEFD_SRB_IRQ_DOF1 || @@ -1605,6 +1597,7 @@ static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie) dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq); iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG); + return irq; } static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can) @@ -1632,19 +1625,32 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev; const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); + u32 srb_irq = 0; int i; if (!(pci_irq & irq_mask->all)) return IRQ_NONE; if (pci_irq & irq_mask->kcan_rx0) - kvaser_pciefd_receive_irq(pcie); + srb_irq = kvaser_pciefd_receive_irq(pcie); for (i = 0; i < pcie->nr_channels; i++) { if (pci_irq & irq_mask->kcan_tx[i]) kvaser_pciefd_transmit_irq(pcie->can[i]); } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { + /* Reset DMA buffer 0, may trigger new interrupt */ + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, + KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + } + + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { + /* Reset DMA buffer 1, may trigger new interrupt */ + iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, + KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); + } + return IRQ_HANDLED; } From 503901d3c9e5e285733b90895f10b8f8036dfa01 Mon Sep 17 00:00:00 2001 From: Martin Jocic Date: Fri, 30 Aug 2024 17:31:13 +0200 Subject: [PATCH 108/268] can: kvaser_pciefd: Use a single write when releasing RX buffers [ Upstream commit dd885d90c047dbdd2773c1d33954cbd8747d81e2 ] Kvaser's PCIe cards uses the KCAN FPGA IP block which has dual 4K buffers for incoming messages shared by all (currently up to eight) channels. While the driver processes messages in one buffer, new incoming messages are stored in the other and so on. The design of KCAN is such that a buffer must be fully read and then released. Releasing a buffer will make the FPGA switch buffers. If the other buffer contains at least one incoming message the FPGA will also instantly issue a new interrupt, if not the interrupt will be issued after receiving the first new message. With IRQx interrupts, it takes a little time for the interrupt to happen, enough for any previous ISR call to do it's business and return, but MSI interrupts are way faster so this time is reduced to almost nothing. So with MSI, releasing the buffer HAS to be the very last action of the ISR before returning, otherwise the new interrupt might be "masked" by the kernel because the previous ISR call hasn't returned. And the interrupts are edge-triggered so we cannot loose one, or the ping-pong reading process will stop. This is why this patch modifies the driver to use a single write to the SRB_CMD register before returning. Signed-off-by: Martin Jocic Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20240830153113.2081440-1-martin.jocic@kvaser.com Fixes: 26ad340e582d ("can: kvaser_pciefd: Add driver for Kvaser PCIEcan devices") Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/kvaser_pciefd.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 96b6e3d13e67d8..c490b4ba065ba4 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -1626,6 +1626,7 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask; u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie)); u32 srb_irq = 0; + u32 srb_release = 0; int i; if (!(pci_irq & irq_mask->all)) @@ -1639,17 +1640,14 @@ static irqreturn_t kvaser_pciefd_irq_handler(int irq, void *dev) kvaser_pciefd_transmit_irq(pcie->can[i]); } - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) { - /* Reset DMA buffer 0, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0; - if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) { - /* Reset DMA buffer 1, may trigger new interrupt */ - iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, - KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); - } + if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1) + srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1; + + if (srb_release) + iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG); return IRQ_HANDLED; } From a22cbf1e085b47dfd0668e5aeef2214a3341d790 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Wed, 21 Aug 2024 15:43:40 -0700 Subject: [PATCH 109/268] Bluetooth: qca: If memdump doesn't work, re-enable IBS [ Upstream commit 8ae22de9d2eae3c432de64bf2b3a5a69cf1d1124 ] On systems in the field, we are seeing this sometimes in the kernel logs: Bluetooth: qca_controller_memdump() hci0: hci_devcd_init Return:-95 This means that _something_ decided that it wanted to get a memdump but then hci_devcd_init() returned -EOPNOTSUPP (AKA -95). The cleanup code in qca_controller_memdump() when we get back an error from hci_devcd_init() undoes most things but forgets to clear QCA_IBS_DISABLED. One side effect of this is that, during the next suspend, qca_suspend() will always get a timeout. Let's fix it so that we clear the bit. Fixes: 06d3fdfcdf5c ("Bluetooth: hci_qca: Add qcom devcoredump support") Reviewed-by: Guenter Roeck Reviewed-by: Stephen Boyd Signed-off-by: Douglas Anderson Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/hci_qca.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 9082456d80fbf2..7a552387129ef0 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1090,6 +1090,7 @@ static void qca_controller_memdump(struct work_struct *work) qca->memdump_state = QCA_MEMDUMP_COLLECTED; cancel_delayed_work(&qca->ctrl_memdump_timeout); clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + clear_bit(QCA_IBS_DISABLED, &qca->flags); mutex_unlock(&qca->hci_memdump_lock); return; } From 9cd7289bcc2449445bf694542f77f3745e0bc943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Mon, 8 Jan 2024 23:46:07 +0100 Subject: [PATCH 110/268] Bluetooth: hci_event: Use HCI error defines instead of magic values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 79c0868ad65a8fc7cdfaa5f2b77a4b70d0b0ea16 ] We have error defines already, so let's use them. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_event.c | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index d2a280a42f3b8f..1c427dd2d4184f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -644,6 +644,7 @@ enum { #define HCI_ERROR_PIN_OR_KEY_MISSING 0x06 #define HCI_ERROR_MEMORY_EXCEEDED 0x07 #define HCI_ERROR_CONNECTION_TIMEOUT 0x08 +#define HCI_ERROR_COMMAND_DISALLOWED 0x0c #define HCI_ERROR_REJ_LIMITED_RESOURCES 0x0d #define HCI_ERROR_REJ_BAD_ADDR 0x0f #define HCI_ERROR_INVALID_PARAMETERS 0x12 @@ -652,6 +653,7 @@ enum { #define HCI_ERROR_REMOTE_POWER_OFF 0x15 #define HCI_ERROR_LOCAL_HOST_TERM 0x16 #define HCI_ERROR_PAIRING_NOT_ALLOWED 0x18 +#define HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE 0x1e #define HCI_ERROR_INVALID_LL_PARAMS 0x1e #define HCI_ERROR_UNSPECIFIED 0x1f #define HCI_ERROR_ADVERTISING_TIMEOUT 0x3c diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 727f040b652979..dc80c1560357ab 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -93,11 +93,11 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, /* It is possible that we receive Inquiry Complete event right * before we receive Inquiry Cancel Command Complete event, in * which case the latter event should have status of Command - * Disallowed (0x0c). This should not be treated as error, since + * Disallowed. This should not be treated as error, since * we actually achieve what Inquiry Cancel wants to achieve, * which is to end the last Inquiry session. */ - if (rp->status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) { + if (rp->status == HCI_ERROR_COMMAND_DISALLOWED && !test_bit(HCI_INQUIRY, &hdev->flags)) { bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command"); rp->status = 0x00; } @@ -2285,7 +2285,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (status) { if (conn && conn->state == BT_CONNECT) { - if (status != 0x0c || conn->attempt > 2) { + if (status != HCI_ERROR_COMMAND_DISALLOWED || conn->attempt > 2) { conn->state = BT_CLOSED; hci_connect_cfm(conn, status); hci_conn_del(conn); @@ -6430,7 +6430,7 @@ static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, void *data, * transition into connected state and mark it as * successful. */ - if (!conn->out && ev->status == 0x1a && + if (!conn->out && ev->status == HCI_ERROR_UNSUPPORTED_REMOTE_FEATURE && (hdev->le_features[0] & HCI_LE_PERIPHERAL_FEATURES)) status = 0x00; else From c57edb54827694cb40fe97a5307d78ce0812d4f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Tue, 6 Feb 2024 12:08:13 +0100 Subject: [PATCH 111/268] Bluetooth: hci_conn: Only do ACL connections sequentially MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 45340097ce6ea7e875674a5a7d24c95ecbc93ef9 ] Pretty much all bluetooth chipsets only support paging a single device at a time, and if they don't reject a secondary "Create Connection" request while another is still ongoing, they'll most likely serialize those requests in the firware. With commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests") we started adding some serialization of our own in case the adapter returns "Command Disallowed" HCI error. This commit was using the BT_CONNECT2 state for the serialization, this state is also used for a few more things (most notably to indicate we're waiting for an inquiry to cancel) and therefore a bit unreliable. Also not all BT firwares would respond with "Command Disallowed" on too many connection requests, some will also respond with "Hardware Failure" (BCM4378), and others will error out later and send a "Connect Complete" event with error "Rejected Limited Resources" (Marvell 88W8897). We can clean things up a bit and also make the serialization more reliable by using our hci_sync machinery to always do "Create Connection" requests in a sequential manner. This is very similar to what we're already doing for establishing LE connections, and it works well there. Note that this causes a test failure in mgmt-tester (test "Pair Device - Power off 1") because the hci_abort_conn_sync() changes the error we return on timeout of the "Create Connection". We'll fix this on the mgmt-tester side by adjusting the expected error for the test. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci.h | 1 + include/net/bluetooth/hci_sync.h | 3 ++ net/bluetooth/hci_conn.c | 69 ++++--------------------------- net/bluetooth/hci_sync.c | 70 ++++++++++++++++++++++++++++++++ 4 files changed, 83 insertions(+), 60 deletions(-) diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 1c427dd2d4184f..2129d071c3725f 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -430,6 +430,7 @@ enum { #define HCI_NCMD_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ #define HCI_ACL_TX_TIMEOUT msecs_to_jiffies(45000) /* 45 seconds */ #define HCI_AUTO_OFF_TIMEOUT msecs_to_jiffies(2000) /* 2 seconds */ +#define HCI_ACL_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_CONN_TIMEOUT msecs_to_jiffies(20000) /* 20 seconds */ #define HCI_LE_AUTOCONN_TIMEOUT msecs_to_jiffies(4000) /* 4 seconds */ diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 4cb048bdcb1e46..532230150cc9f1 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -140,3 +140,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason); int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); + +int hci_acl_create_connection_sync(struct hci_dev *hdev, + struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index dc1c07c7d4ff93..04fe901a47f7ea 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -178,64 +178,6 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_dev_put(hdev); } -static void hci_acl_create_connection(struct hci_conn *conn) -{ - struct hci_dev *hdev = conn->hdev; - struct inquiry_entry *ie; - struct hci_cp_create_conn cp; - - BT_DBG("hcon %p", conn); - - /* Many controllers disallow HCI Create Connection while it is doing - * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create - * Connection. This may cause the MGMT discovering state to become false - * without user space's request but it is okay since the MGMT Discovery - * APIs do not promise that discovery should be done forever. Instead, - * the user space monitors the status of MGMT discovering and it may - * request for discovery again when this flag becomes false. - */ - if (test_bit(HCI_INQUIRY, &hdev->flags)) { - /* Put this connection to "pending" state so that it will be - * executed after the inquiry cancel command complete event. - */ - conn->state = BT_CONNECT2; - hci_send_cmd(hdev, HCI_OP_INQUIRY_CANCEL, 0, NULL); - return; - } - - conn->state = BT_CONNECT; - conn->out = true; - conn->role = HCI_ROLE_MASTER; - - conn->attempt++; - - conn->link_policy = hdev->link_policy; - - memset(&cp, 0, sizeof(cp)); - bacpy(&cp.bdaddr, &conn->dst); - cp.pscan_rep_mode = 0x02; - - ie = hci_inquiry_cache_lookup(hdev, &conn->dst); - if (ie) { - if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { - cp.pscan_rep_mode = ie->data.pscan_rep_mode; - cp.pscan_mode = ie->data.pscan_mode; - cp.clock_offset = ie->data.clock_offset | - cpu_to_le16(0x8000); - } - - memcpy(conn->dev_class, ie->data.dev_class, 3); - } - - cp.pkt_type = cpu_to_le16(conn->pkt_type); - if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) - cp.role_switch = 0x01; - else - cp.role_switch = 0x00; - - hci_send_cmd(hdev, HCI_OP_CREATE_CONN, sizeof(cp), &cp); -} - int hci_disconnect(struct hci_conn *conn, __u8 reason) { BT_DBG("hcon %p", conn); @@ -1702,10 +1644,17 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl->conn_reason = conn_reason; if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + int err; + acl->sec_level = BT_SECURITY_LOW; acl->pending_sec_level = sec_level; acl->auth_type = auth_type; - hci_acl_create_connection(acl); + + err = hci_acl_create_connection_sync(hdev, acl); + if (err) { + hci_conn_del(acl); + return ERR_PTR(err); + } } return acl; @@ -2627,7 +2576,7 @@ void hci_conn_check_pending(struct hci_dev *hdev) conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); if (conn) - hci_acl_create_connection(conn); + hci_acl_create_connection_sync(hdev, conn); hci_dev_unlock(hdev); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 38fee34887d8a7..165c532fa2a2fc 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6577,3 +6577,73 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) return hci_cmd_sync_queue(hdev, _update_adv_data_sync, UINT_PTR(instance), NULL); } + +static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn = data; + struct inquiry_entry *ie; + struct hci_cp_create_conn cp; + int err; + + /* Many controllers disallow HCI Create Connection while it is doing + * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create + * Connection. This may cause the MGMT discovering state to become false + * without user space's request but it is okay since the MGMT Discovery + * APIs do not promise that discovery should be done forever. Instead, + * the user space monitors the status of MGMT discovering and it may + * request for discovery again when this flag becomes false. + */ + if (test_bit(HCI_INQUIRY, &hdev->flags)) { + err = __hci_cmd_sync_status(hdev, HCI_OP_INQUIRY_CANCEL, 0, + NULL, HCI_CMD_TIMEOUT); + if (err) + bt_dev_warn(hdev, "Failed to cancel inquiry %d", err); + } + + conn->state = BT_CONNECT; + conn->out = true; + conn->role = HCI_ROLE_MASTER; + + conn->attempt++; + + conn->link_policy = hdev->link_policy; + + memset(&cp, 0, sizeof(cp)); + bacpy(&cp.bdaddr, &conn->dst); + cp.pscan_rep_mode = 0x02; + + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) { + if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { + cp.pscan_rep_mode = ie->data.pscan_rep_mode; + cp.pscan_mode = ie->data.pscan_mode; + cp.clock_offset = ie->data.clock_offset | + cpu_to_le16(0x8000); + } + + memcpy(conn->dev_class, ie->data.dev_class, 3); + } + + cp.pkt_type = cpu_to_le16(conn->pkt_type); + if (lmp_rswitch_capable(hdev) && !(hdev->link_mode & HCI_LM_MASTER)) + cp.role_switch = 0x01; + else + cp.role_switch = 0x00; + + err = __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, + sizeof(cp), &cp, + HCI_EV_CONN_COMPLETE, + HCI_ACL_CONN_TIMEOUT, NULL); + + if (err == -ETIMEDOUT) + hci_abort_conn_sync(hdev, conn, HCI_ERROR_LOCAL_HOST_TERM); + + return err; +} + +int hci_acl_create_connection_sync(struct hci_dev *hdev, + struct hci_conn *conn) +{ + return hci_cmd_sync_queue(hdev, __hci_acl_create_connection_sync, + conn, NULL); +} From e78bd85af2978e003b618e1fc36b4d8f8903b4cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20Dre=C3=9Fler?= Date: Tue, 6 Feb 2024 12:08:14 +0100 Subject: [PATCH 112/268] Bluetooth: Remove pending ACL connection attempts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 4aa42119d971603dc9e4d8cf4f53d5fcf082ea7d ] With the last commit we moved to using the hci_sync queue for "Create Connection" requests, removing the need for retrying the paging after finished/failed "Create Connection" requests and after the end of inquiries. hci_conn_check_pending() was used to trigger this retry, we can remove it now. Note that we can also remove the special handling for COMMAND_DISALLOWED errors in the completion handler of "Create Connection", because "Create Connection" requests are now always serialized. This is somewhat reverting commit 4c67bc74f016 ("[Bluetooth] Support concurrent connect requests"). With this, the BT_CONNECT2 state of ACL hci_conn objects should now be back to meaning only one thing: That we received a "Connection Request" from another device (see hci_conn_request_evt), but the response to that is going to be deferred. Signed-off-by: Jonas Dreßler Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 1 - net/bluetooth/hci_conn.c | 16 ---------------- net/bluetooth/hci_event.c | 21 ++++----------------- 3 files changed, 4 insertions(+), 34 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 070c794e6a4251..850f0e46aecf82 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1417,7 +1417,6 @@ struct hci_conn *hci_conn_add_unset(struct hci_dev *hdev, int type, bdaddr_t *dst, u8 role); void hci_conn_del(struct hci_conn *conn); void hci_conn_hash_flush(struct hci_dev *hdev); -void hci_conn_check_pending(struct hci_dev *hdev); struct hci_chan *hci_chan_create(struct hci_conn *conn); void hci_chan_del(struct hci_chan *chan); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 04fe901a47f7ea..36731d047f1607 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2565,22 +2565,6 @@ void hci_conn_hash_flush(struct hci_dev *hdev) } } -/* Check pending connect attempts */ -void hci_conn_check_pending(struct hci_dev *hdev) -{ - struct hci_conn *conn; - - BT_DBG("hdev %s", hdev->name); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2); - if (conn) - hci_acl_create_connection_sync(hdev, conn); - - hci_dev_unlock(hdev); -} - static u32 get_link_mode(struct hci_conn *conn) { u32 link_mode = 0; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dc80c1560357ab..d81c7fccdd4045 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -118,8 +118,6 @@ static u8 hci_cc_inquiry_cancel(struct hci_dev *hdev, void *data, hci_discovery_set_state(hdev, DISCOVERY_STOPPED); hci_dev_unlock(hdev); - hci_conn_check_pending(hdev); - return rp->status; } @@ -150,8 +148,6 @@ static u8 hci_cc_exit_periodic_inq(struct hci_dev *hdev, void *data, hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ); - hci_conn_check_pending(hdev); - return rp->status; } @@ -2257,10 +2253,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) { bt_dev_dbg(hdev, "status 0x%2.2x", status); - if (status) { - hci_conn_check_pending(hdev); + if (status) return; - } if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY)) set_bit(HCI_INQUIRY, &hdev->flags); @@ -2285,12 +2279,9 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) if (status) { if (conn && conn->state == BT_CONNECT) { - if (status != HCI_ERROR_COMMAND_DISALLOWED || conn->attempt > 2) { - conn->state = BT_CLOSED; - hci_connect_cfm(conn, status); - hci_conn_del(conn); - } else - conn->state = BT_CONNECT2; + conn->state = BT_CLOSED; + hci_connect_cfm(conn, status); + hci_conn_del(conn); } } else { if (!conn) { @@ -2980,8 +2971,6 @@ static void hci_inquiry_complete_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, "status 0x%2.2x", ev->status); - hci_conn_check_pending(hdev); - if (!test_and_clear_bit(HCI_INQUIRY, &hdev->flags)) return; @@ -3228,8 +3217,6 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data, unlock: hci_dev_unlock(hdev); - - hci_conn_check_pending(hdev); } static void hci_reject_conn(struct hci_dev *hdev, bdaddr_t *bdaddr) From 98f66ea456d5cec528689282d154e2989618e876 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 9 Feb 2024 09:08:06 -0500 Subject: [PATCH 113/268] Bluetooth: hci_conn: Fix UAF Write in __hci_acl_create_connection_sync [ Upstream commit 5f641f03abccddd1a37233ff1b8e774b9ff1f4e8 ] This fixes the UAF on __hci_acl_create_connection_sync caused by connection abortion, it uses the same logic as to LE_LINK which uses hci_cmd_sync_cancel to prevent the callback to run if the connection is abort prematurely. Reported-by: syzbot+3f0a39be7a2035700868@syzkaller.appspotmail.com Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 3 +-- net/bluetooth/hci_conn.c | 3 ++- net/bluetooth/hci_sync.c | 16 ++++++++++------ 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 532230150cc9f1..37ca8477b3f47f 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -141,5 +141,4 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); -int hci_acl_create_connection_sync(struct hci_dev *hdev, - struct hci_conn *conn); +int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 36731d047f1607..d15c8ce4b4183d 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1650,7 +1650,7 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, acl->pending_sec_level = sec_level; acl->auth_type = auth_type; - err = hci_acl_create_connection_sync(hdev, acl); + err = hci_connect_acl_sync(hdev, acl); if (err) { hci_conn_del(acl); return ERR_PTR(err); @@ -2913,6 +2913,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) */ if (conn->state == BT_CONNECT && hdev->req_status == HCI_REQ_PEND) { switch (hci_skb_event(hdev->sent_cmd)) { + case HCI_EV_CONN_COMPLETE: case HCI_EV_LE_CONN_COMPLETE: case HCI_EV_LE_ENHANCED_CONN_COMPLETE: case HCI_EVT_LE_CIS_ESTABLISHED: diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 165c532fa2a2fc..19ceb7ce66bf29 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6578,13 +6578,18 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) UINT_PTR(instance), NULL); } -static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) +static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn = data; + struct hci_conn *conn; + u16 handle = PTR_UINT(data); struct inquiry_entry *ie; struct hci_cp_create_conn cp; int err; + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false @@ -6641,9 +6646,8 @@ static int __hci_acl_create_connection_sync(struct hci_dev *hdev, void *data) return err; } -int hci_acl_create_connection_sync(struct hci_dev *hdev, - struct hci_conn *conn) +int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue(hdev, __hci_acl_create_connection_sync, - conn, NULL); + return hci_cmd_sync_queue(hdev, hci_acl_create_conn_sync, + UINT_PTR(conn->handle), NULL); } From 1499f79995c7ee58e3bfeeff75f6d1b37dcda881 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Wed, 9 Aug 2023 13:43:53 -0700 Subject: [PATCH 114/268] Bluetooth: hci_sync: Add helper functions to manipulate cmd_sync queue [ Upstream commit 505ea2b295929e7be2b4e1bc86ee31cb7862fb01 ] This adds functions to queue, dequeue and lookup into the cmd_sync list. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 12 +++ net/bluetooth/hci_sync.c | 132 +++++++++++++++++++++++++++++-- 2 files changed, 136 insertions(+), 8 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 37ca8477b3f47f..24c0053d8f0cb2 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -50,6 +50,18 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +struct hci_cmd_sync_work_entry * +hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry); +bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, + hci_cmd_sync_work_func_t func, void *data, + hci_cmd_sync_work_destroy_t destroy); int hci_update_eir_sync(struct hci_dev *hdev); int hci_update_class_sync(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 19ceb7ce66bf29..ed18e35c70972f 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -651,6 +651,17 @@ void hci_cmd_sync_init(struct hci_dev *hdev) INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } +static void _hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry, + int err) +{ + if (entry->destroy) + entry->destroy(hdev, entry->data, err); + + list_del(&entry->list); + kfree(entry); +} + void hci_cmd_sync_clear(struct hci_dev *hdev) { struct hci_cmd_sync_work_entry *entry, *tmp; @@ -659,13 +670,8 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) cancel_work_sync(&hdev->reenable_adv_work); mutex_lock(&hdev->cmd_sync_work_lock); - list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { - if (entry->destroy) - entry->destroy(hdev, entry->data, -ECANCELED); - - list_del(&entry->list); - kfree(entry); - } + list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); mutex_unlock(&hdev->cmd_sync_work_lock); } @@ -757,6 +763,115 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, } EXPORT_SYMBOL(hci_cmd_sync_queue); +static struct hci_cmd_sync_work_entry * +_hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { + if (func && entry->func != func) + continue; + + if (data && entry->data != data) + continue; + + if (destroy && entry->destroy != destroy) + continue; + + return entry; + } + + return NULL; +} + +/* Queue HCI command entry once: + * + * - Lookup if an entry already exist and only if it doesn't creates a new entry + * and queue it. + */ +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) + return 0; + + return hci_cmd_sync_queue(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_queue_once); + +/* Lookup HCI command entry: + * + * - Return first entry that matches by function callback or data or + * destroy callback. + */ +struct hci_cmd_sync_work_entry * +hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + + mutex_lock(&hdev->cmd_sync_work_lock); + entry = _hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + mutex_unlock(&hdev->cmd_sync_work_lock); + + return entry; +} +EXPORT_SYMBOL(hci_cmd_sync_lookup_entry); + +/* Cancel HCI command entry */ +void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, + struct hci_cmd_sync_work_entry *entry) +{ + mutex_lock(&hdev->cmd_sync_work_lock); + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + mutex_unlock(&hdev->cmd_sync_work_lock); +} +EXPORT_SYMBOL(hci_cmd_sync_cancel_entry); + +/* Dequeue one HCI command entry: + * + * - Lookup and cancel first entry that matches. + */ +bool hci_cmd_sync_dequeue_once(struct hci_dev *hdev, + hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + + entry = hci_cmd_sync_lookup_entry(hdev, func, data, destroy); + if (!entry) + return false; + + hci_cmd_sync_cancel_entry(hdev, entry); + + return true; +} +EXPORT_SYMBOL(hci_cmd_sync_dequeue_once); + +/* Dequeue HCI command entry: + * + * - Lookup and cancel any entry that matches by function callback or data or + * destroy callback. + */ +bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + struct hci_cmd_sync_work_entry *entry; + bool ret = false; + + mutex_lock(&hdev->cmd_sync_work_lock); + while ((entry = _hci_cmd_sync_lookup_entry(hdev, func, data, + destroy))) { + _hci_cmd_sync_cancel_entry(hdev, entry, -ECANCELED); + ret = true; + } + mutex_unlock(&hdev->cmd_sync_work_lock); + + return ret; +} +EXPORT_SYMBOL(hci_cmd_sync_dequeue); + int hci_update_eir_sync(struct hci_dev *hdev) { struct hci_cp_write_eir cp; @@ -3048,7 +3163,8 @@ int hci_update_passive_scan(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; - return hci_cmd_sync_queue(hdev, update_passive_scan_sync, NULL, NULL); + return hci_cmd_sync_queue_once(hdev, update_passive_scan_sync, NULL, + NULL); } int hci_write_sc_support_sync(struct hci_dev *hdev, u8 val) From d948e1ffa1d40240d5c81af6dcbcb87b39cb8d3c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 13 Feb 2024 09:59:32 -0500 Subject: [PATCH 115/268] Bluetooth: hci_sync: Attempt to dequeue connection attempt [ Upstream commit 881559af5f5c545f6828e7c74d79813eb886d523 ] If connection is still queued/pending in the cmd_sync queue it means no command has been generated and it should be safe to just dequeue the callback when it is being aborted. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_core.h | 19 ++++++++ include/net/bluetooth/hci_sync.h | 10 +++-- net/bluetooth/hci_conn.c | 70 ++++++------------------------ net/bluetooth/hci_sync.c | 74 ++++++++++++++++++++++++++++---- 4 files changed, 102 insertions(+), 71 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 850f0e46aecf82..29f1549ee11145 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1041,6 +1041,24 @@ static inline unsigned int hci_conn_count(struct hci_dev *hdev) return c->acl_num + c->sco_num + c->le_num + c->iso_num; } +static inline bool hci_conn_valid(struct hci_dev *hdev, struct hci_conn *conn) +{ + struct hci_conn_hash *h = &hdev->conn_hash; + struct hci_conn *c; + + rcu_read_lock(); + + list_for_each_entry_rcu(c, &h->list, list) { + if (c == conn) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + static inline __u8 hci_conn_lookup_type(struct hci_dev *hdev, __u16 handle) { struct hci_conn_hash *h = &hdev->conn_hash; @@ -1430,6 +1448,7 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role); +void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status); struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst, u8 sec_level, u8 auth_type, enum conn_reasons conn_reason); diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 24c0053d8f0cb2..534c3386e714f9 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -50,11 +50,11 @@ int hci_cmd_sync_submit(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); -int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, - void *data, hci_cmd_sync_work_destroy_t destroy); void hci_cmd_sync_cancel_entry(struct hci_dev *hdev, struct hci_cmd_sync_work_entry *entry); bool hci_cmd_sync_dequeue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, @@ -141,8 +141,6 @@ struct hci_conn; int hci_abort_conn_sync(struct hci_dev *hdev, struct hci_conn *conn, u8 reason); -int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn); - int hci_le_create_cis_sync(struct hci_dev *hdev); int hci_le_remove_cig_sync(struct hci_dev *hdev, u8 handle); @@ -154,3 +152,7 @@ int hci_le_big_terminate_sync(struct hci_dev *hdev, u8 handle); int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle); int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn); + +int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index d15c8ce4b4183d..efa0881a90e1d7 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -68,7 +68,7 @@ static const struct sco_param esco_param_msbc[] = { }; /* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) +void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status) { struct hci_conn_params *params; struct hci_dev *hdev = conn->hdev; @@ -1143,6 +1143,9 @@ void hci_conn_del(struct hci_conn *conn) * rest of hci_conn_del. */ hci_conn_cleanup(conn); + + /* Dequeue callbacks using connection pointer as data */ + hci_cmd_sync_dequeue(hdev, NULL, conn, NULL); } struct hci_dev *hci_get_route(bdaddr_t *dst, bdaddr_t *src, uint8_t src_type) @@ -1276,53 +1279,6 @@ u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle) return 0; } -static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) -{ - struct hci_conn *conn; - u16 handle = PTR_UINT(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return; - - bt_dev_dbg(hdev, "err %d", err); - - hci_dev_lock(hdev); - - if (!err) { - hci_connect_le_scan_cleanup(conn, 0x00); - goto done; - } - - /* Check if connection is still pending */ - if (conn != hci_lookup_le_connect(hdev)) - goto done; - - /* Flush to make sure we send create conn cancel command if needed */ - flush_delayed_work(&conn->le_conn_timeout); - hci_conn_failed(conn, bt_status(err)); - -done: - hci_dev_unlock(hdev); -} - -static int hci_connect_le_sync(struct hci_dev *hdev, void *data) -{ - struct hci_conn *conn; - u16 handle = PTR_UINT(data); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - - bt_dev_dbg(hdev, "conn %p", conn); - - clear_bit(HCI_CONN_SCANNING, &conn->flags); - conn->state = BT_CONNECT; - - return hci_le_create_conn_sync(hdev, conn); -} - struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, u8 dst_type, bool dst_resolved, u8 sec_level, u16 conn_timeout, u8 role) @@ -1389,9 +1345,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, conn->sec_level = BT_SECURITY_LOW; conn->conn_timeout = conn_timeout; - err = hci_cmd_sync_queue(hdev, hci_connect_le_sync, - UINT_PTR(conn->handle), - create_le_conn_complete); + err = hci_connect_le_sync(hdev, conn); if (err) { hci_conn_del(conn); return ERR_PTR(err); @@ -2880,12 +2834,10 @@ u32 hci_conn_get_phy(struct hci_conn *conn) static int abort_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn; - u16 handle = PTR_UINT(data); + struct hci_conn *conn = data; - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; return hci_abort_conn_sync(hdev, conn, conn->abort_reason); } @@ -2920,8 +2872,10 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) hci_cmd_sync_cancel(hdev, ECANCELED); break; } + /* Cancel connect attempt if still queued/pending */ + } else if (!hci_cancel_connect_sync(hdev, conn)) { + return 0; } - return hci_cmd_sync_queue(hdev, abort_conn_sync, UINT_PTR(conn->handle), - NULL); + return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index ed18e35c70972f..3131daff56f4cf 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6370,12 +6370,21 @@ static int hci_le_ext_create_conn_sync(struct hci_dev *hdev, conn->conn_timeout, NULL); } -int hci_le_create_conn_sync(struct hci_dev *hdev, struct hci_conn *conn) +static int hci_le_create_conn_sync(struct hci_dev *hdev, void *data) { struct hci_cp_le_create_conn cp; struct hci_conn_params *params; u8 own_addr_type; int err; + struct hci_conn *conn = data; + + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + + bt_dev_dbg(hdev, "conn %p", conn); + + clear_bit(HCI_CONN_SCANNING, &conn->flags); + conn->state = BT_CONNECT; /* If requested to connect as peripheral use directed advertising */ if (conn->role == HCI_ROLE_SLAVE) { @@ -6696,16 +6705,11 @@ int hci_update_adv_data(struct hci_dev *hdev, u8 instance) static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) { - struct hci_conn *conn; - u16 handle = PTR_UINT(data); + struct hci_conn *conn = data; struct inquiry_entry *ie; struct hci_cp_create_conn cp; int err; - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - return 0; - /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false @@ -6764,6 +6768,58 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) { - return hci_cmd_sync_queue(hdev, hci_acl_create_conn_sync, - UINT_PTR(conn->handle), NULL); + return hci_cmd_sync_queue_once(hdev, hci_acl_create_conn_sync, conn, + NULL); +} + +static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) +{ + struct hci_conn *conn = data; + + bt_dev_dbg(hdev, "err %d", err); + + if (err == -ECANCELED) + return; + + hci_dev_lock(hdev); + + if (!err) { + hci_connect_le_scan_cleanup(conn, 0x00); + goto done; + } + + /* Check if connection is still pending */ + if (conn != hci_lookup_le_connect(hdev)) + goto done; + + /* Flush to make sure we send create conn cancel command if needed */ + flush_delayed_work(&conn->le_conn_timeout); + hci_conn_failed(conn, bt_status(err)); + +done: + hci_dev_unlock(hdev); +} + +int hci_connect_le_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + return hci_cmd_sync_queue_once(hdev, hci_le_create_conn_sync, conn, + create_le_conn_complete); +} + +int hci_cancel_connect_sync(struct hci_dev *hdev, struct hci_conn *conn) +{ + if (conn->state != BT_OPEN) + return -EINVAL; + + switch (conn->type) { + case ACL_LINK: + return !hci_cmd_sync_dequeue_once(hdev, + hci_acl_create_conn_sync, + conn, NULL); + case LE_LINK: + return !hci_cmd_sync_dequeue_once(hdev, hci_le_create_conn_sync, + conn, create_le_conn_complete); + } + + return -ENOENT; } From d56412ee7cf9b4ceccb43d8b78ad7ec624357729 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Aug 2024 15:47:30 -0400 Subject: [PATCH 116/268] Bluetooth: hci_sync: Introduce hci_cmd_sync_run/hci_cmd_sync_run_once [ Upstream commit c898f6d7b093bd71e66569cd6797c87d4056f44b ] This introduces hci_cmd_sync_run/hci_cmd_sync_run_once which acts like hci_cmd_sync_queue/hci_cmd_sync_queue_once but runs immediately when already on hdev->cmd_sync_work context. Signed-off-by: Luiz Augusto von Dentz Stable-dep-of: 227a0cdf4a02 ("Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT") Signed-off-by: Sasha Levin --- include/net/bluetooth/hci_sync.h | 4 +++ net/bluetooth/hci_sync.c | 42 ++++++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/include/net/bluetooth/hci_sync.h b/include/net/bluetooth/hci_sync.h index 534c3386e714f9..3cb2d10cac930b 100644 --- a/include/net/bluetooth/hci_sync.h +++ b/include/net/bluetooth/hci_sync.h @@ -52,6 +52,10 @@ int hci_cmd_sync_queue(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); +int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy); struct hci_cmd_sync_work_entry * hci_cmd_sync_lookup_entry(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy); diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 3131daff56f4cf..f3c51315eb16a7 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -114,7 +114,7 @@ static void hci_cmd_sync_add(struct hci_request *req, u16 opcode, u32 plen, skb_queue_tail(&req->cmd_q, skb); } -static int hci_cmd_sync_run(struct hci_request *req) +static int hci_req_sync_run(struct hci_request *req) { struct hci_dev *hdev = req->hdev; struct sk_buff *skb; @@ -164,7 +164,7 @@ struct sk_buff *__hci_cmd_sync_sk(struct hci_dev *hdev, u16 opcode, u32 plen, hdev->req_status = HCI_REQ_PEND; - err = hci_cmd_sync_run(&req); + err = hci_req_sync_run(&req); if (err < 0) return ERR_PTR(err); @@ -800,6 +800,44 @@ int hci_cmd_sync_queue_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, } EXPORT_SYMBOL(hci_cmd_sync_queue_once); +/* Run HCI command: + * + * - hdev must be running + * - if on cmd_sync_work then run immediately otherwise queue + */ +int hci_cmd_sync_run(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + /* Only queue command if hdev is running which means it had been opened + * and is either on init phase or is already up. + */ + if (!test_bit(HCI_RUNNING, &hdev->flags)) + return -ENETDOWN; + + /* If on cmd_sync_work then run immediately otherwise queue */ + if (current_work() == &hdev->cmd_sync_work) + return func(hdev, data); + + return hci_cmd_sync_submit(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_run); + +/* Run HCI command entry once: + * + * - Lookup if an entry already exist and only if it doesn't creates a new entry + * and run it. + * - if on cmd_sync_work then run immediately otherwise queue + */ +int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, + void *data, hci_cmd_sync_work_destroy_t destroy) +{ + if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) + return 0; + + return hci_cmd_sync_run(hdev, func, data, destroy); +} +EXPORT_SYMBOL(hci_cmd_sync_run_once); + /* Lookup HCI command entry: * * - Return first entry that matches by function callback or data or From 249c88e7fb45b6b705040c5af4bd0d0f2bc9735c Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 26 Aug 2024 16:14:04 -0400 Subject: [PATCH 117/268] Bluetooth: MGMT: Fix not generating command complete for MGMT_OP_DISCONNECT [ Upstream commit 227a0cdf4a028a73dc256d0f5144b4808d718893 ] MGMT_OP_DISCONNECT can be called while mgmt_device_connected has not been called yet, which will cause the connection procedure to be aborted, so mgmt_device_disconnected shall still respond with command complete to MGMT_OP_DISCONNECT and just not emit MGMT_EV_DEVICE_DISCONNECTED since MGMT_EV_DEVICE_CONNECTED was never sent. To fix this MGMT_OP_DISCONNECT is changed to work similarly to other command which do use hci_cmd_sync_queue and then use hci_conn_abort to disconnect and returns the result, in order for hci_conn_abort to be used from hci_cmd_sync context it now uses hci_cmd_sync_run_once. Link: https://github.com/bluez/bluez/issues/932 Fixes: 12d4a3b2ccb3 ("Bluetooth: Move check for MGMT_CONNECTED flag into mgmt.c") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- net/bluetooth/hci_conn.c | 6 ++- net/bluetooth/mgmt.c | 84 ++++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 43 deletions(-) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index efa0881a90e1d7..d8a01eb016ad08 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -2877,5 +2877,9 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) return 0; } - return hci_cmd_sync_queue_once(hdev, abort_conn_sync, conn, NULL); + /* Run immediately if on cmd_sync_work since this may be called + * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does + * already queue its callback on cmd_sync_work. + */ + return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index bad365f3d7bf2c..4ae9029b5785f4 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2918,7 +2918,12 @@ static int unpair_device_sync(struct hci_dev *hdev, void *data) if (!conn) return 0; - return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); + /* Disregard any possible error since the likes of hci_abort_conn_sync + * will clean up the connection no matter the error. + */ + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + + return 0; } static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, @@ -3050,13 +3055,44 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, return err; } +static void disconnect_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + + cmd->cmd_complete(cmd, mgmt_status(err)); + mgmt_pending_free(cmd); +} + +static int disconnect_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_disconnect *cp = cmd->param; + struct hci_conn *conn; + + if (cp->addr.type == BDADDR_BREDR) + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, + &cp->addr.bdaddr); + else + conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, + le_addr_type(cp->addr.type)); + + if (!conn) + return -ENOTCONN; + + /* Disregard any possible error since the likes of hci_abort_conn_sync + * will clean up the connection no matter the error. + */ + hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + + return 0; +} + static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_cp_disconnect *cp = data; struct mgmt_rp_disconnect rp; struct mgmt_pending_cmd *cmd; - struct hci_conn *conn; int err; bt_dev_dbg(hdev, "sock %p", sk); @@ -3079,27 +3115,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, goto failed; } - if (pending_find(MGMT_OP_DISCONNECT, hdev)) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_BUSY, &rp, sizeof(rp)); - goto failed; - } - - if (cp->addr.type == BDADDR_BREDR) - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, - &cp->addr.bdaddr); - else - conn = hci_conn_hash_lookup_le(hdev, &cp->addr.bdaddr, - le_addr_type(cp->addr.type)); - - if (!conn || conn->state == BT_OPEN || conn->state == BT_CLOSED) { - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_DISCONNECT, - MGMT_STATUS_NOT_CONNECTED, &rp, - sizeof(rp)); - goto failed; - } - - cmd = mgmt_pending_add(sk, MGMT_OP_DISCONNECT, hdev, data, len); + cmd = mgmt_pending_new(sk, MGMT_OP_DISCONNECT, hdev, data, len); if (!cmd) { err = -ENOMEM; goto failed; @@ -3107,9 +3123,10 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data, cmd->cmd_complete = generic_cmd_complete; - err = hci_disconnect(conn, HCI_ERROR_REMOTE_USER_TERM); + err = hci_cmd_sync_queue(hdev, disconnect_sync, cmd, + disconnect_complete); if (err < 0) - mgmt_pending_remove(cmd); + mgmt_pending_free(cmd); failed: hci_dev_unlock(hdev); @@ -9627,18 +9644,6 @@ void mgmt_device_connected(struct hci_dev *hdev, struct hci_conn *conn, mgmt_event_skb(skb, NULL); } -static void disconnect_rsp(struct mgmt_pending_cmd *cmd, void *data) -{ - struct sock **sk = data; - - cmd->cmd_complete(cmd, 0); - - *sk = cmd->sk; - sock_hold(*sk); - - mgmt_pending_remove(cmd); -} - static void unpair_device_rsp(struct mgmt_pending_cmd *cmd, void *data) { struct hci_dev *hdev = data; @@ -9679,8 +9684,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (link_type != ACL_LINK && link_type != LE_LINK) return; - mgmt_pending_foreach(MGMT_OP_DISCONNECT, hdev, disconnect_rsp, &sk); - bacpy(&ev.addr.bdaddr, bdaddr); ev.addr.type = link_to_bdaddr(link_type, addr_type); ev.reason = reason; @@ -9693,9 +9696,6 @@ void mgmt_device_disconnected(struct hci_dev *hdev, bdaddr_t *bdaddr, if (sk) sock_put(sk); - - mgmt_pending_foreach(MGMT_OP_UNPAIR_DEVICE, hdev, unpair_device_rsp, - hdev); } void mgmt_disconnect_failed(struct hci_dev *hdev, bdaddr_t *bdaddr, From ed60aab606413d3fd4aeee81f9cd933294740a9e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 Aug 2024 22:22:45 +0300 Subject: [PATCH 118/268] igc: Unlock on error in igc_io_resume() [ Upstream commit ef4a99a0164e3972abb421cbb1b09ea6c61414df ] Call rtnl_unlock() on this error path, before returning. Fixes: bc23aa949aeb ("igc: Add pcie error handler support") Signed-off-by: Dan Carpenter Reviewed-by: Gerhard Engleder Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 21fb1a98ebca62..da1018d8326220 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7288,6 +7288,7 @@ static void igc_io_resume(struct pci_dev *pdev) rtnl_lock(); if (netif_running(netdev)) { if (igc_open(netdev)) { + rtnl_unlock(); netdev_err(netdev, "igc_open failed after reset\n"); return; } From 217539e994e53206bbf3fb330261cc78c480d311 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sun, 1 Sep 2024 05:10:51 +0200 Subject: [PATCH 119/268] hwmon: (hp-wmi-sensors) Check if WMI event data exists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit a54da9df75cd1b4b5028f6c60f9a211532680585 ] The BIOS can choose to return no event data in response to a WMI event, so the ACPI object passed to the WMI notify handler can be NULL. Check for such a situation and ignore the event in such a case. Fixes: 23902f98f8d4 ("hwmon: add HP WMI Sensors driver") Signed-off-by: Armin Wolf Reviewed-by: Ilpo Järvinen Message-ID: <20240901031055.3030-2-W_Armin@gmx.de> Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/hp-wmi-sensors.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/hp-wmi-sensors.c b/drivers/hwmon/hp-wmi-sensors.c index b5325d0e72b9cd..dfa1d6926deacc 100644 --- a/drivers/hwmon/hp-wmi-sensors.c +++ b/drivers/hwmon/hp-wmi-sensors.c @@ -1637,6 +1637,8 @@ static void hp_wmi_notify(u32 value, void *context) goto out_unlock; wobj = out.pointer; + if (!wobj) + goto out_unlock; err = populate_event_from_wobj(dev, &event, wobj); if (err) { From 26928c8f00f6bb0e194f3957fe51c69d36838eb2 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Fri, 30 Aug 2024 10:20:25 +0800 Subject: [PATCH 120/268] net: phy: Fix missing of_node_put() for leds [ Upstream commit 2560db6ede1aaf162a73b2df43e0b6c5ed8819f7 ] The call of of_get_child_by_name() will cause refcount incremented for leds, if it succeeds, it should call of_node_put() to decrease it, fix it. Fixes: 01e5b728e9e4 ("net: phy: Add a binding for PHY LEDs") Reviewed-by: Jonathan Cameron Signed-off-by: Jinjie Ruan Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20240830022025.610844-1-ruanjinjie@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/phy_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c895cd178e6a1a..2e4bff6055e223 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3164,11 +3164,13 @@ static int of_phy_leds(struct phy_device *phydev) err = of_phy_led(phydev, led); if (err) { of_node_put(led); + of_node_put(leds); phy_leds_unregister(phydev); return err; } } + of_node_put(leds); return 0; } From 2f057db2fb29bc209c103050647562e60554d3d3 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:27 +0200 Subject: [PATCH 121/268] ice: protect XDP configuration with a mutex [ Upstream commit 2504b8405768a57a71e660dbfd5abd59f679a03f ] The main threat to data consistency in ice_xdp() is a possible asynchronous PF reset. It can be triggered by a user or by TX timeout handler. XDP setup and PF reset code access the same resources in the following sections: * ice_vsi_close() in ice_prepare_for_reset() - already rtnl-locked * ice_vsi_rebuild() for the PF VSI - not protected * ice_vsi_open() - already rtnl-locked With an unfortunate timing, such accesses can result in a crash such as the one below: [ +1.999878] ice 0000:b1:00.0: Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring 14 [ +2.002992] ice 0000:b1:00.0: Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring 18 [Mar15 18:17] ice 0000:b1:00.0 ens801f0np0: NETDEV WATCHDOG: CPU: 38: transmit queue 14 timed out 80692736 ms [ +0.000093] ice 0000:b1:00.0 ens801f0np0: tx_timeout: VSI_num: 6, Q 14, NTC: 0x0, HW_HEAD: 0x0, NTU: 0x0, INT: 0x4000001 [ +0.000012] ice 0000:b1:00.0 ens801f0np0: tx_timeout recovery level 1, txqueue 14 [ +0.394718] ice 0000:b1:00.0: PTP reset successful [ +0.006184] BUG: kernel NULL pointer dereference, address: 0000000000000098 [ +0.000045] #PF: supervisor read access in kernel mode [ +0.000023] #PF: error_code(0x0000) - not-present page [ +0.000023] PGD 0 P4D 0 [ +0.000018] Oops: 0000 [#1] PREEMPT SMP NOPTI [ +0.000023] CPU: 38 PID: 7540 Comm: kworker/38:1 Not tainted 6.8.0-rc7 #1 [ +0.000031] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0014.082620210524 08/26/2021 [ +0.000036] Workqueue: ice ice_service_task [ice] [ +0.000183] RIP: 0010:ice_clean_tx_ring+0xa/0xd0 [ice] [...] [ +0.000013] Call Trace: [ +0.000016] [ +0.000014] ? __die+0x1f/0x70 [ +0.000029] ? page_fault_oops+0x171/0x4f0 [ +0.000029] ? schedule+0x3b/0xd0 [ +0.000027] ? exc_page_fault+0x7b/0x180 [ +0.000022] ? asm_exc_page_fault+0x22/0x30 [ +0.000031] ? ice_clean_tx_ring+0xa/0xd0 [ice] [ +0.000194] ice_free_tx_ring+0xe/0x60 [ice] [ +0.000186] ice_destroy_xdp_rings+0x157/0x310 [ice] [ +0.000151] ice_vsi_decfg+0x53/0xe0 [ice] [ +0.000180] ice_vsi_rebuild+0x239/0x540 [ice] [ +0.000186] ice_vsi_rebuild_by_type+0x76/0x180 [ice] [ +0.000145] ice_rebuild+0x18c/0x840 [ice] [ +0.000145] ? delay_tsc+0x4a/0xc0 [ +0.000022] ? delay_tsc+0x92/0xc0 [ +0.000020] ice_do_reset+0x140/0x180 [ice] [ +0.000886] ice_service_task+0x404/0x1030 [ice] [ +0.000824] process_one_work+0x171/0x340 [ +0.000685] worker_thread+0x277/0x3a0 [ +0.000675] ? preempt_count_add+0x6a/0xa0 [ +0.000677] ? _raw_spin_lock_irqsave+0x23/0x50 [ +0.000679] ? __pfx_worker_thread+0x10/0x10 [ +0.000653] kthread+0xf0/0x120 [ +0.000635] ? __pfx_kthread+0x10/0x10 [ +0.000616] ret_from_fork+0x2d/0x50 [ +0.000612] ? __pfx_kthread+0x10/0x10 [ +0.000604] ret_from_fork_asm+0x1b/0x30 [ +0.000604] The previous way of handling this through returning -EBUSY is not viable, particularly when destroying AF_XDP socket, because the kernel proceeds with removal anyway. There is plenty of code between those calls and there is no need to create a large critical section that covers all of them, same as there is no need to protect ice_vsi_rebuild() with rtnl_lock(). Add xdp_state_lock mutex to protect ice_vsi_rebuild() and ice_xdp(). Leaving unprotected sections in between would result in two states that have to be considered: 1. when the VSI is closed, but not yet rebuild 2. when VSI is already rebuild, but not yet open The latter case is actually already handled through !netif_running() case, we just need to adjust flag checking a little. The former one is not as trivial, because between ice_vsi_close() and ice_vsi_rebuild(), a lot of hardware interaction happens, this can make adding/deleting rings exit with an error. Luckily, VSI rebuild is pending and can apply new configuration for us in a managed fashion. Therefore, add an additional VSI state flag ICE_VSI_REBUILD_PENDING to indicate that ice_xdp() can just hot-swap the program. Also, as ice_vsi_rebuild() flow is touched in this patch, make it more consistent by deconfiguring VSI when coalesce allocation fails. Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Signed-off-by: Larysa Zaremba Reviewed-by: Maciej Fijalkowski Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice.h | 2 ++ drivers/net/ethernet/intel/ice/ice_lib.c | 34 ++++++++++++++--------- drivers/net/ethernet/intel/ice/ice_main.c | 19 +++++++++---- drivers/net/ethernet/intel/ice/ice_xsk.c | 3 +- 4 files changed, 39 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index c7962f322db2d0..7b3ce30ba38fa0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -313,6 +313,7 @@ enum ice_vsi_state { ICE_VSI_UMAC_FLTR_CHANGED, ICE_VSI_MMAC_FLTR_CHANGED, ICE_VSI_PROMISC_CHANGED, + ICE_VSI_REBUILD_PENDING, ICE_VSI_STATE_NBITS /* must be last */ }; @@ -409,6 +410,7 @@ struct ice_vsi { struct ice_tx_ring **xdp_rings; /* XDP ring array */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + struct mutex xdp_state_lock; struct net_device **target_netdevs; diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 13ca3342a0cea4..b3010a53f1b457 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -459,6 +459,7 @@ static void ice_vsi_free(struct ice_vsi *vsi) ice_vsi_free_stats(vsi); ice_vsi_free_arrays(vsi); + mutex_destroy(&vsi->xdp_state_lock); mutex_unlock(&pf->sw_mutex); devm_kfree(dev, vsi); } @@ -660,6 +661,8 @@ static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf) pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi, pf->next_vsi); + mutex_init(&vsi->xdp_state_lock); + unlock_pf: mutex_unlock(&pf->sw_mutex); return vsi; @@ -3164,19 +3167,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (WARN_ON(vsi->type == ICE_VSI_VF && !vsi->vf)) return -EINVAL; + mutex_lock(&vsi->xdp_state_lock); + ret = ice_vsi_realloc_stat_arrays(vsi); if (ret) - goto err_vsi_cfg; + goto unlock; ice_vsi_decfg(vsi); ret = ice_vsi_cfg_def(vsi, ¶ms); if (ret) - goto err_vsi_cfg; + goto unlock; coalesce = kcalloc(vsi->num_q_vectors, sizeof(struct ice_coalesce_stored), GFP_KERNEL); - if (!coalesce) - return -ENOMEM; + if (!coalesce) { + ret = -ENOMEM; + goto decfg; + } prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce); @@ -3184,22 +3191,23 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags) if (ret) { if (vsi_flags & ICE_VSI_FLAG_INIT) { ret = -EIO; - goto err_vsi_cfg_tc_lan; + goto free_coalesce; } - kfree(coalesce); - return ice_schedule_reset(pf, ICE_RESET_PFR); + ret = ice_schedule_reset(pf, ICE_RESET_PFR); + goto free_coalesce; } ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors); - kfree(coalesce); + clear_bit(ICE_VSI_REBUILD_PENDING, vsi->state); - return 0; - -err_vsi_cfg_tc_lan: - ice_vsi_decfg(vsi); +free_coalesce: kfree(coalesce); -err_vsi_cfg: +decfg: + if (ret) + ice_vsi_decfg(vsi); +unlock: + mutex_unlock(&vsi->xdp_state_lock); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ffe6e74b9fea24..3ee92b0e62ff7a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -614,6 +614,7 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ + set_bit(ICE_VSI_REBUILD_PENDING, ice_get_main_vsi(pf)->state); ice_pf_dis_all_vsi(pf, false); if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) @@ -2942,7 +2943,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } /* hot swap progs and avoid toggling link */ - if (ice_is_xdp_ena_vsi(vsi) == !!prog) { + if (ice_is_xdp_ena_vsi(vsi) == !!prog || + test_bit(ICE_VSI_REBUILD_PENDING, vsi->state)) { ice_vsi_assign_bpf_prog(vsi, prog); return 0; } @@ -3014,21 +3016,28 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp) { struct ice_netdev_priv *np = netdev_priv(dev); struct ice_vsi *vsi = np->vsi; + int ret; if (vsi->type != ICE_VSI_PF) { NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI"); return -EINVAL; } + mutex_lock(&vsi->xdp_state_lock); + switch (xdp->command) { case XDP_SETUP_PROG: - return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack); + break; case XDP_SETUP_XSK_POOL: - return ice_xsk_pool_setup(vsi, xdp->xsk.pool, - xdp->xsk.queue_id); + ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id); + break; default: - return -EINVAL; + ret = -EINVAL; } + + mutex_unlock(&vsi->xdp_state_lock); + return ret; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 67511153081ae9..9a9b8698881b4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -396,7 +396,8 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) goto failure; } - if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi); + if_running = !test_bit(ICE_VSI_DOWN, vsi->state) && + ice_is_xdp_ena_vsi(vsi); if (if_running) { struct ice_rx_ring *rx_ring = vsi->rx_rings[qid]; From 81e5622c052b808bdbea06a01fd96bdd83b6f2c3 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 23 Aug 2024 11:59:31 +0200 Subject: [PATCH 122/268] ice: do not bring the VSI up, if it was down before the XDP setup [ Upstream commit 04c7e14e5b0b6227e7b00d7a96ca2f2426ab9171 ] After XDP configuration is completed, we bring the interface up unconditionally, regardless of its state before the call to .ndo_bpf(). Preserve the information whether the interface had to be brought down and later bring it up only in such case. Fixes: efc2214b6047 ("ice: Add support for XDP") Reviewed-by: Wojciech Drewek Reviewed-by: Jacob Keller Tested-by: Chandan Kumar Rout Acked-by: Maciej Fijalkowski Signed-off-by: Larysa Zaremba Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 3ee92b0e62ff7a..4d3a9fc79a6c1d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2931,8 +2931,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack) { unsigned int frame_size = vsi->netdev->mtu + ICE_ETH_PKT_HDR_PAD; - bool if_running = netif_running(vsi->netdev); int ret = 0, xdp_ring_err = 0; + bool if_running; if (prog && !prog->aux->xdp_has_frags) { if (frame_size > ice_max_xdp_frame_size(vsi)) { @@ -2949,8 +2949,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, return 0; } + if_running = netif_running(vsi->netdev) && + !test_and_set_bit(ICE_VSI_DOWN, vsi->state); + /* need to stop netdev while setting up the program for Rx rings */ - if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { + if (if_running) { ret = ice_down(vsi); if (ret) { NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed"); From 07200e313cb5b594c9e984a441896b73438972b3 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Thu, 29 Aug 2024 19:50:55 +0200 Subject: [PATCH 123/268] usbnet: modern method to get random MAC [ Upstream commit bab8eb0dd4cb995caa4a0529d5655531c2ec5e8e ] The driver generates a random MAC once on load and uses it over and over, including on two devices needing a random MAC at the same time. Jakub suggested revamping the driver to the modern API for setting a random MAC rather than fixing the old stuff. The bug is as old as the driver. Signed-off-by: Oliver Neukum Reviewed-by: Simon Horman Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Link: https://patch.msgid.link/20240829175201.670718-1-oneukum@suse.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/usbnet.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 2d14b0d78541a3..6cc1b56ddde2fc 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -61,9 +61,6 @@ /*-------------------------------------------------------------------------*/ -// randomly generated ethernet address -static u8 node_id [ETH_ALEN]; - /* use ethtool to change the level for any given device */ static int msg_level = -1; module_param (msg_level, int, 0); @@ -1731,7 +1728,6 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->net = net; strscpy(net->name, "usb%d", sizeof(net->name)); - eth_hw_addr_set(net, node_id); /* rx and tx sides can use different message sizes; * bind() should set rx_urb_size in that case. @@ -1805,9 +1801,9 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) goto out4; } - /* let userspace know we have a random address */ - if (ether_addr_equal(net->dev_addr, node_id)) - net->addr_assign_type = NET_ADDR_RANDOM; + /* this flags the device for user space */ + if (!is_valid_ether_addr(net->dev_addr)) + eth_hw_addr_random(net); if ((dev->driver_info->flags & FLAG_WLAN) != 0) SET_NETDEV_DEVTYPE(net, &wlan_type); @@ -2217,7 +2213,6 @@ static int __init usbnet_init(void) BUILD_BUG_ON( sizeof_field(struct sk_buff, cb) < sizeof(struct skb_data)); - eth_random_addr(node_id); return 0; } module_init(usbnet_init); From 4a746fb253fa7c2908b3f317d37d8c0202d5f278 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 16 Oct 2023 06:47:39 -0700 Subject: [PATCH 124/268] bpf: Add sockptr support for getsockopt [ Upstream commit a615f67e1a426f35366b8398c11f31c148e7df48 ] The whole network stack uses sockptr, and while it doesn't move to something more modern, let's use sockptr in getsockptr BPF hooks, so, it could be used by other callers. The main motivation for this change is to use it in the io_uring {g,s}etsockopt(), which will use a userspace pointer for *optval, but, a kernel value for optlen. Link: https://lore.kernel.org/all/ZSArfLaaGcfd8LH8@gmail.com/ Signed-off-by: Breno Leitao Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20231016134750.1381153-2-leitao@debian.org Signed-off-by: Jens Axboe Stable-dep-of: 33f339a1ba54 ("bpf, net: Fix a potential race in do_sock_getsockopt()") Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 5 +++-- kernel/bpf/cgroup.c | 20 +++++++++++--------- net/socket.c | 5 +++-- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 31561e78971578..c4956a48ab3e93 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -140,9 +140,10 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, int *optname, char __user *optval, int *optlen, char **kernel_optval); + int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen, int max_optlen, + int optname, sockptr_t optval, + sockptr_t optlen, int max_optlen, int retval); int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index ac37bd53aee031..caae07cc885e9c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1889,8 +1889,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, } int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, - int optname, char __user *optval, - int __user *optlen, int max_optlen, + int optname, sockptr_t optval, + sockptr_t optlen, int max_optlen, int retval) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); @@ -1917,8 +1917,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, * one that kernel returned as well to let * BPF programs inspect the value. */ - - if (get_user(ctx.optlen, optlen)) { + if (copy_from_sockptr(&ctx.optlen, optlen, + sizeof(ctx.optlen))) { ret = -EFAULT; goto out; } @@ -1929,8 +1929,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } orig_optlen = ctx.optlen; - if (copy_from_user(ctx.optval, optval, - min(ctx.optlen, max_optlen)) != 0) { + if (copy_from_sockptr(ctx.optval, optval, + min(ctx.optlen, max_optlen))) { ret = -EFAULT; goto out; } @@ -1944,7 +1944,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, if (ret < 0) goto out; - if (optval && (ctx.optlen > max_optlen || ctx.optlen < 0)) { + if (!sockptr_is_null(optval) && + (ctx.optlen > max_optlen || ctx.optlen < 0)) { if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) { pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n", ctx.optlen, max_optlen); @@ -1956,11 +1957,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, } if (ctx.optlen != 0) { - if (optval && copy_to_user(optval, ctx.optval, ctx.optlen)) { + if (!sockptr_is_null(optval) && + copy_to_sockptr(optval, ctx.optval, ctx.optlen)) { ret = -EFAULT; goto out; } - if (put_user(ctx.optlen, optlen)) { + if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) { ret = -EFAULT; goto out; } diff --git a/net/socket.c b/net/socket.c index 8d83c4bb163b42..b2d75d5661be49 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2375,8 +2375,9 @@ int __sys_getsockopt(int fd, int level, int optname, char __user *optval, if (!in_compat_syscall()) err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, - optval, optlen, max_optlen, - err); + USER_SOCKPTR(optval), + USER_SOCKPTR(optlen), + max_optlen, err); out_put: fput_light(sock->file, fput_needed); return err; From 09fba0162ba095665f3f226cc6220f9fbb00c9d9 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 16 Oct 2023 06:47:40 -0700 Subject: [PATCH 125/268] bpf: Add sockptr support for setsockopt [ Upstream commit 3f31e0d14d44ad491a81b7c1f83f32fbc300a867 ] The whole network stack uses sockptr, and while it doesn't move to something more modern, let's use sockptr in setsockptr BPF hooks, so, it could be used by other callers. The main motivation for this change is to use it in the io_uring {g,s}etsockopt(), which will use a userspace pointer for *optval, but, a kernel value for optlen. Link: https://lore.kernel.org/all/ZSArfLaaGcfd8LH8@gmail.com/ Signed-off-by: Breno Leitao Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20231016134750.1381153-3-leitao@debian.org Signed-off-by: Jens Axboe Stable-dep-of: 33f339a1ba54 ("bpf, net: Fix a potential race in do_sock_getsockopt()") Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 2 +- kernel/bpf/cgroup.c | 5 +++-- net/socket.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c4956a48ab3e93..ebfd3c5a776ad3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,7 +138,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, enum cgroup_bpf_attach_type atype); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, - int *optname, char __user *optval, + int *optname, sockptr_t optval, int *optlen, char **kernel_optval); int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index caae07cc885e9c..913a6a7e62ca67 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1799,7 +1799,7 @@ static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx, } int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, - int *optname, char __user *optval, + int *optname, sockptr_t optval, int *optlen, char **kernel_optval) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); @@ -1822,7 +1822,8 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ctx.optlen = *optlen; - if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) { + if (copy_from_sockptr(ctx.optval, optval, + min(*optlen, max_optlen))) { ret = -EFAULT; goto out; } diff --git a/net/socket.c b/net/socket.c index b2d75d5661be49..f0f087004728e0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2307,7 +2307,7 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, if (!in_compat_syscall()) err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, - user_optval, &optlen, + optval, &optlen, &kernel_optval); if (err < 0) goto out_put; From e88c16a4f07e258cda223a04b2206b733579d68c Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 16 Oct 2023 06:47:41 -0700 Subject: [PATCH 126/268] net/socket: Break down __sys_setsockopt [ Upstream commit 1406245c29454ff84919736be83e14cdaba7fec1 ] Split __sys_setsockopt() into two functions by removing the core logic into a sub-function (do_sock_setsockopt()). This will avoid code duplication when doing the same operation in other callers, for instance. do_sock_setsockopt() will be called by io_uring setsockopt() command operation in the following patch. Signed-off-by: Breno Leitao Reviewed-by: Willem de Bruijn Acked-by: Jakub Kicinski Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20231016134750.1381153-4-leitao@debian.org Signed-off-by: Jens Axboe Stable-dep-of: 33f339a1ba54 ("bpf, net: Fix a potential race in do_sock_getsockopt()") Signed-off-by: Sasha Levin --- include/net/sock.h | 2 ++ net/socket.c | 39 +++++++++++++++++++++++++-------------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 5942b5ff4c7860..bb010cc53b9102 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1875,6 +1875,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen); int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen); int sk_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); diff --git a/net/socket.c b/net/socket.c index f0f087004728e0..aa563fc0cee461 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2281,31 +2281,21 @@ static bool sock_use_custom_sol_socket(const struct socket *sock) return test_bit(SOCK_CUSTOM_SOCKOPT, &sock->flags); } -/* - * Set a socket option. Because we don't know the option lengths we have - * to pass the user mode parameter for the protocols to sort out. - */ -int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, - int optlen) +int do_sock_setsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, int optlen) { - sockptr_t optval = USER_SOCKPTR(user_optval); const struct proto_ops *ops; char *kernel_optval = NULL; - int err, fput_needed; - struct socket *sock; + int err; if (optlen < 0) return -EINVAL; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - return err; - err = security_socket_setsockopt(sock, level, optname); if (err) goto out_put; - if (!in_compat_syscall()) + if (!compat) err = BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock->sk, &level, &optname, optval, &optlen, &kernel_optval); @@ -2328,6 +2318,27 @@ int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, optlen); kfree(kernel_optval); out_put: + return err; +} +EXPORT_SYMBOL(do_sock_setsockopt); + +/* Set a socket option. Because we don't know the option lengths we have + * to pass the user mode parameter for the protocols to sort out. + */ +int __sys_setsockopt(int fd, int level, int optname, char __user *user_optval, + int optlen) +{ + sockptr_t optval = USER_SOCKPTR(user_optval); + bool compat = in_compat_syscall(); + int err, fput_needed; + struct socket *sock; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + err = do_sock_setsockopt(sock, compat, level, optname, optval, optlen); + fput_light(sock->file, fput_needed); return err; } From 2174a3c368993030b44c92713cc646d94ee95190 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 16 Oct 2023 06:47:42 -0700 Subject: [PATCH 127/268] net/socket: Break down __sys_getsockopt [ Upstream commit 0b05b0cd78c92371fdde6333d006f39eaf9e0860 ] Split __sys_getsockopt() into two functions by removing the core logic into a sub-function (do_sock_getsockopt()). This will avoid code duplication when doing the same operation in other callers, for instance. do_sock_getsockopt() will be called by io_uring getsockopt() command operation in the following patch. The same was done for the setsockopt pair. Suggested-by: Martin KaFai Lau Signed-off-by: Breno Leitao Acked-by: Jakub Kicinski Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20231016134750.1381153-5-leitao@debian.org Signed-off-by: Jens Axboe Stable-dep-of: 33f339a1ba54 ("bpf, net: Fix a potential race in do_sock_getsockopt()") Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 2 +- include/net/sock.h | 4 +-- net/core/sock.c | 8 ----- net/socket.c | 64 ++++++++++++++++++++++++-------------- 4 files changed, 44 insertions(+), 34 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ebfd3c5a776ad3..2aa82b7aed89e1 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -379,7 +379,7 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, ({ \ int __ret = 0; \ if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - get_user(__ret, optlen); \ + copy_from_sockptr(&__ret, optlen, sizeof(int)); \ __ret; \ }) diff --git a/include/net/sock.h b/include/net/sock.h index bb010cc53b9102..2a1aee50384828 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1877,11 +1877,11 @@ int sock_setsockopt(struct socket *sock, int level, int op, sockptr_t optval, unsigned int optlen); int do_sock_setsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, int optlen); +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen); int sk_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen); -int sock_getsockopt(struct socket *sock, int level, int op, - char __user *optval, int __user *optlen); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32); struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, diff --git a/net/core/sock.c b/net/core/sock.c index 55d85d50b3e491..bc2a4e38dcea8e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2019,14 +2019,6 @@ int sk_getsockopt(struct sock *sk, int level, int optname, return 0; } -int sock_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - return sk_getsockopt(sock->sk, level, optname, - USER_SOCKPTR(optval), - USER_SOCKPTR(optlen)); -} - /* * Initialize an sk_lock. * diff --git a/net/socket.c b/net/socket.c index aa563fc0cee461..d275f5f148824d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2352,6 +2352,43 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +int do_sock_getsockopt(struct socket *sock, bool compat, int level, + int optname, sockptr_t optval, sockptr_t optlen) +{ + int max_optlen __maybe_unused; + const struct proto_ops *ops; + int err; + + err = security_socket_getsockopt(sock, level, optname); + if (err) + return err; + + if (!compat) + max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + + ops = READ_ONCE(sock->ops); + if (level == SOL_SOCKET) { + err = sk_getsockopt(sock->sk, level, optname, optval, optlen); + } else if (unlikely(!ops->getsockopt)) { + err = -EOPNOTSUPP; + } else { + if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, + "Invalid argument type")) + return -EOPNOTSUPP; + + err = ops->getsockopt(sock, level, optname, optval.user, + optlen.user); + } + + if (!compat) + err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, + optval, optlen, max_optlen, + err); + + return err; +} +EXPORT_SYMBOL(do_sock_getsockopt); + /* * Get a socket option. Because we don't know the option lengths we have * to pass a user mode parameter for the protocols to sort out. @@ -2359,37 +2396,18 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int __sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { - int max_optlen __maybe_unused; - const struct proto_ops *ops; int err, fput_needed; struct socket *sock; + bool compat; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) return err; - err = security_socket_getsockopt(sock, level, optname); - if (err) - goto out_put; + compat = in_compat_syscall(); + err = do_sock_getsockopt(sock, compat, level, optname, + USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); - if (!in_compat_syscall()) - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); - - ops = READ_ONCE(sock->ops); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); - else if (unlikely(!ops->getsockopt)) - err = -EOPNOTSUPP; - else - err = ops->getsockopt(sock, level, optname, optval, - optlen); - - if (!in_compat_syscall()) - err = BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock->sk, level, optname, - USER_SOCKPTR(optval), - USER_SOCKPTR(optlen), - max_optlen, err); -out_put: fput_light(sock->file, fput_needed); return err; } From f8d6acb19f4a6eee74494f9b43744888fd6a5f9f Mon Sep 17 00:00:00 2001 From: Tze-nan Wu Date: Fri, 30 Aug 2024 16:25:17 +0800 Subject: [PATCH 128/268] bpf, net: Fix a potential race in do_sock_getsockopt() [ Upstream commit 33f339a1ba54e56bba57ee9a77c71e385ab4825c ] There's a potential race when `cgroup_bpf_enabled(CGROUP_GETSOCKOPT)` is false during the execution of `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN`, but becomes true when `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is called. This inconsistency can lead to `BPF_CGROUP_RUN_PROG_GETSOCKOPT` receiving an "-EFAULT" from `__cgroup_bpf_run_filter_getsockopt(max_optlen=0)`. Scenario shown as below: `process A` `process B` ----------- ------------ BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN enable CGROUP_GETSOCKOPT BPF_CGROUP_RUN_PROG_GETSOCKOPT (-EFAULT) To resolve this, remove the `BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN` macro and directly uses `copy_from_sockptr` to ensure that `max_optlen` is always set before `BPF_CGROUP_RUN_PROG_GETSOCKOPT` is invoked. Fixes: 0d01da6afc54 ("bpf: implement getsockopt and setsockopt hooks") Co-developed-by: Yanghui Li Signed-off-by: Yanghui Li Co-developed-by: Cheng-Jui Wang Signed-off-by: Cheng-Jui Wang Signed-off-by: Tze-nan Wu Acked-by: Stanislav Fomichev Acked-by: Alexei Starovoitov Link: https://patch.msgid.link/20240830082518.23243-1-Tze-nan.Wu@mediatek.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 9 --------- net/socket.c | 4 ++-- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 2aa82b7aed89e1..d4f2c8706042cd 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -375,14 +375,6 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled(CGROUP_GETSOCKOPT)) \ - copy_from_sockptr(&__ret, optlen, sizeof(int)); \ - __ret; \ -}) - #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, optlen, \ max_optlen, retval) \ ({ \ @@ -500,7 +492,6 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(atype, major, minor, access) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) -#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \ diff --git a/net/socket.c b/net/socket.c index d275f5f148824d..9db33cd4a71b89 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2355,7 +2355,7 @@ INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { - int max_optlen __maybe_unused; + int max_optlen __maybe_unused = 0; const struct proto_ops *ops; int err; @@ -2364,7 +2364,7 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, return err; if (!compat) - max_optlen = BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen); + copy_from_sockptr(&max_optlen, optlen, sizeof(int)); ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { From 40531583c5cdd731b9b51b4929e5cf6781b80e8f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Fri, 30 Aug 2024 17:31:07 +0200 Subject: [PATCH 129/268] bareudp: Fix device stats updates. [ Upstream commit 4963d2343af81f493519f9c3ea9f2169eaa7353a ] Bareudp devices update their stats concurrently. Therefore they need proper atomic increments. Fixes: 571912c69f0e ("net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc.") Signed-off-by: Guillaume Nault Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/04b7b9d0b480158eb3ab4366ec80aa2ab7e41fcb.1725031794.git.gnault@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/bareudp.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 683203f87ae2b2..277493e41b0723 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -82,7 +82,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, sizeof(ipversion))) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } ipversion >>= 4; @@ -92,7 +92,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) { @@ -106,7 +106,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ipv4_is_multicast(tunnel_hdr->daddr)) { proto = htons(ETH_P_MPLS_MC); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } else { @@ -122,7 +122,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) (addr_type & IPV6_ADDR_MULTICAST)) { proto = htons(ETH_P_MPLS_MC); } else { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } } @@ -134,12 +134,12 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) proto, !net_eq(bareudp->net, dev_net(bareudp->dev)))) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); if (!tun_dst) { - bareudp->dev->stats.rx_dropped++; + DEV_STATS_INC(bareudp->dev, rx_dropped); goto drop; } skb_dst_set(skb, &tun_dst->dst); @@ -165,8 +165,8 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) &((struct ipv6hdr *)oiph)->saddr); } if (err > 1) { - ++bareudp->dev->stats.rx_frame_errors; - ++bareudp->dev->stats.rx_errors; + DEV_STATS_INC(bareudp->dev, rx_frame_errors); + DEV_STATS_INC(bareudp->dev, rx_errors); goto drop; } } @@ -462,11 +462,11 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb(skb); if (err == -ELOOP) - dev->stats.collisions++; + DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) - dev->stats.tx_carrier_errors++; + DEV_STATS_INC(dev, tx_carrier_errors); - dev->stats.tx_errors++; + DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } From 1df42be305fe478ded1ee0c1d775f4ece713483b Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 2 Sep 2024 10:39:27 -0700 Subject: [PATCH 130/268] fou: Fix null-ptr-deref in GRO. [ Upstream commit 7e4196935069947d8b70b09c1660b67b067e75cb ] We observed a null-ptr-deref in fou_gro_receive() while shutting down a host. [0] The NULL pointer is sk->sk_user_data, and the offset 8 is of protocol in struct fou. When fou_release() is called due to netns dismantle or explicit tunnel teardown, udp_tunnel_sock_release() sets NULL to sk->sk_user_data. Then, the tunnel socket is destroyed after a single RCU grace period. So, in-flight udp4_gro_receive() could find the socket and execute the FOU GRO handler, where sk->sk_user_data could be NULL. Let's use rcu_dereference_sk_user_data() in fou_from_sock() and add NULL checks in FOU GRO handlers. [0]: BUG: kernel NULL pointer dereference, address: 0000000000000008 PF: supervisor read access in kernel mode PF: error_code(0x0000) - not-present page PGD 80000001032f4067 P4D 80000001032f4067 PUD 103240067 PMD 0 SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.216-204.855.amzn2.x86_64 #1 Hardware name: Amazon EC2 c5.large/, BIOS 1.0 10/16/2017 RIP: 0010:fou_gro_receive (net/ipv4/fou.c:233) [fou] Code: 41 5f c3 cc cc cc cc e8 e7 2e 69 f4 0f 1f 80 00 00 00 00 0f 1f 44 00 00 49 89 f8 41 54 48 89 f7 48 89 d6 49 8b 80 88 02 00 00 <0f> b6 48 08 0f b7 42 4a 66 25 fd fd 80 cc 02 66 89 42 4a 0f b6 42 RSP: 0018:ffffa330c0003d08 EFLAGS: 00010297 RAX: 0000000000000000 RBX: ffff93d9e3a6b900 RCX: 0000000000000010 RDX: ffff93d9e3a6b900 RSI: ffff93d9e3a6b900 RDI: ffff93dac2e24d08 RBP: ffff93d9e3a6b900 R08: ffff93dacbce6400 R09: 0000000000000002 R10: 0000000000000000 R11: ffffffffb5f369b0 R12: ffff93dacbce6400 R13: ffff93dac2e24d08 R14: 0000000000000000 R15: ffffffffb4edd1c0 FS: 0000000000000000(0000) GS:ffff93daee800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000102140001 CR4: 00000000007706f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ? show_trace_log_lvl (arch/x86/kernel/dumpstack.c:259) ? __die_body.cold (arch/x86/kernel/dumpstack.c:478 arch/x86/kernel/dumpstack.c:420) ? no_context (arch/x86/mm/fault.c:752) ? exc_page_fault (arch/x86/include/asm/irqflags.h:49 arch/x86/include/asm/irqflags.h:89 arch/x86/mm/fault.c:1435 arch/x86/mm/fault.c:1483) ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:571) ? fou_gro_receive (net/ipv4/fou.c:233) [fou] udp_gro_receive (include/linux/netdevice.h:2552 net/ipv4/udp_offload.c:559) udp4_gro_receive (net/ipv4/udp_offload.c:604) inet_gro_receive (net/ipv4/af_inet.c:1549 (discriminator 7)) dev_gro_receive (net/core/dev.c:6035 (discriminator 4)) napi_gro_receive (net/core/dev.c:6170) ena_clean_rx_irq (drivers/amazon/net/ena/ena_netdev.c:1558) [ena] ena_io_poll (drivers/amazon/net/ena/ena_netdev.c:1742) [ena] napi_poll (net/core/dev.c:6847) net_rx_action (net/core/dev.c:6917) __do_softirq (arch/x86/include/asm/jump_label.h:25 include/linux/jump_label.h:200 include/trace/events/irq.h:142 kernel/softirq.c:299) asm_call_irq_on_stack (arch/x86/entry/entry_64.S:809) do_softirq_own_stack (arch/x86/include/asm/irq_stack.h:27 arch/x86/include/asm/irq_stack.h:77 arch/x86/kernel/irq_64.c:77) irq_exit_rcu (kernel/softirq.c:393 kernel/softirq.c:423 kernel/softirq.c:435) common_interrupt (arch/x86/kernel/irq.c:239) asm_common_interrupt (arch/x86/include/asm/idtentry.h:626) RIP: 0010:acpi_idle_do_entry (arch/x86/include/asm/irqflags.h:49 arch/x86/include/asm/irqflags.h:89 drivers/acpi/processor_idle.c:114 drivers/acpi/processor_idle.c:575) Code: 8b 15 d1 3c c4 02 ed c3 cc cc cc cc 65 48 8b 04 25 40 ef 01 00 48 8b 00 a8 08 75 eb 0f 1f 44 00 00 0f 00 2d d5 09 55 00 fb f4 c3 cc cc cc cc e9 be fc ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 RSP: 0018:ffffffffb5603e58 EFLAGS: 00000246 RAX: 0000000000004000 RBX: ffff93dac0929c00 RCX: ffff93daee833900 RDX: ffff93daee800000 RSI: ffff93daee87dc00 RDI: ffff93daee87dc64 RBP: 0000000000000001 R08: ffffffffb5e7b6c0 R09: 0000000000000044 R10: ffff93daee831b04 R11: 00000000000001cd R12: 0000000000000001 R13: ffffffffb5e7b740 R14: 0000000000000001 R15: 0000000000000000 ? sched_clock_cpu (kernel/sched/clock.c:371) acpi_idle_enter (drivers/acpi/processor_idle.c:712 (discriminator 3)) cpuidle_enter_state (drivers/cpuidle/cpuidle.c:237) cpuidle_enter (drivers/cpuidle/cpuidle.c:353) cpuidle_idle_call (kernel/sched/idle.c:158 kernel/sched/idle.c:239) do_idle (kernel/sched/idle.c:302) cpu_startup_entry (kernel/sched/idle.c:395 (discriminator 1)) start_kernel (init/main.c:1048) secondary_startup_64_no_verify (arch/x86/kernel/head_64.S:310) Modules linked in: udp_diag tcp_diag inet_diag nft_nat ipip tunnel4 dummy fou ip_tunnel nft_masq nft_chain_nat nf_nat wireguard nft_ct curve25519_x86_64 libcurve25519_generic nf_conntrack libchacha20poly1305 nf_defrag_ipv6 nf_defrag_ipv4 nft_objref chacha_x86_64 nft_counter nf_tables nfnetlink poly1305_x86_64 ip6_udp_tunnel udp_tunnel libchacha crc32_pclmul ghash_clmulni_intel aesni_intel crypto_simd cryptd glue_helper mousedev psmouse button ena ptp pps_core crc32c_intel CR2: 0000000000000008 Fixes: d92283e338f6 ("fou: change to use UDP socket GRO") Reported-by: Alphonse Kurian Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20240902173927.62706-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/fou_core.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index b38b82ae903de0..e0b8d6b17a34dc 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -50,7 +50,7 @@ struct fou_net { static inline struct fou *fou_from_sock(struct sock *sk) { - return sk->sk_user_data; + return rcu_dereference_sk_user_data(sk); } static int fou_recv_pull(struct sk_buff *skb, struct fou *fou, size_t len) @@ -233,9 +233,15 @@ static struct sk_buff *fou_gro_receive(struct sock *sk, struct sk_buff *skb) { const struct net_offload __rcu **offloads; - u8 proto = fou_from_sock(sk)->protocol; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; struct sk_buff *pp = NULL; + u8 proto; + + if (!fou) + goto out; + + proto = fou->protocol; /* We can clear the encap_mark for FOU as we are essentially doing * one of two possible things. We are either adding an L4 tunnel @@ -263,14 +269,24 @@ static int fou_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { const struct net_offload __rcu **offloads; - u8 proto = fou_from_sock(sk)->protocol; + struct fou *fou = fou_from_sock(sk); const struct net_offload *ops; - int err = -ENOSYS; + u8 proto; + int err; + + if (!fou) { + err = -ENOENT; + goto out; + } + + proto = fou->protocol; offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads; ops = rcu_dereference(offloads[proto]); - if (WARN_ON(!ops || !ops->callbacks.gro_complete)) + if (WARN_ON(!ops || !ops->callbacks.gro_complete)) { + err = -ENOSYS; goto out; + } err = ops->callbacks.gro_complete(skb, nhoff); @@ -320,6 +336,9 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct gro_remcsum grc; u8 proto; + if (!fou) + goto out; + skb_gro_remcsum_init(&grc); off = skb_gro_offset(skb); From 565eb51b3d801ad400b6a39ebfbec9c0e675bc84 Mon Sep 17 00:00:00 2001 From: Hayes Wang Date: Tue, 3 Sep 2024 14:33:33 +0800 Subject: [PATCH 131/268] r8152: fix the firmware doesn't work [ Upstream commit 8487b4af59d4d7feda4b119dc2d92c67ca25c27e ] generic_ocp_write() asks the parameter "size" must be 4 bytes align. Therefore, write the bp would fail, if the mac->bp_num is odd. Align the size to 4 for fixing it. The way may write an extra bp, but the rtl8152_is_fw_mac_ok() makes sure the value must be 0 for the bp whose index is more than mac->bp_num. That is, there is no influence for the firmware. Besides, I check the return value of generic_ocp_write() to make sure everything is correct. Fixes: e5c266a61186 ("r8152: set bp in bulk") Signed-off-by: Hayes Wang Link: https://patch.msgid.link/20240903063333.4502-1-hayeswang@realtek.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/usb/r8152.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 127b34dcc5b373..ce19ebd180f126 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -5143,14 +5143,23 @@ static void rtl8152_fw_mac_apply(struct r8152 *tp, struct fw_mac *mac) data = (u8 *)mac; data += __le16_to_cpu(mac->fw_offset); - generic_ocp_write(tp, __le16_to_cpu(mac->fw_reg), 0xff, length, data, - type); + if (generic_ocp_write(tp, __le16_to_cpu(mac->fw_reg), 0xff, length, + data, type) < 0) { + dev_err(&tp->intf->dev, "Write %s fw fail\n", + type ? "PLA" : "USB"); + return; + } ocp_write_word(tp, type, __le16_to_cpu(mac->bp_ba_addr), __le16_to_cpu(mac->bp_ba_value)); - generic_ocp_write(tp, __le16_to_cpu(mac->bp_start), BYTE_EN_DWORD, - __le16_to_cpu(mac->bp_num) << 1, mac->bp, type); + if (generic_ocp_write(tp, __le16_to_cpu(mac->bp_start), BYTE_EN_DWORD, + ALIGN(__le16_to_cpu(mac->bp_num) << 1, 4), + mac->bp, type) < 0) { + dev_err(&tp->intf->dev, "Write %s bp fail\n", + type ? "PLA" : "USB"); + return; + } bp_en_addr = __le16_to_cpu(mac->bp_en_addr); if (bp_en_addr) From c6c535a444690f68d30324b70ec3d1e387d25b5b Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Tue, 3 Sep 2024 10:19:57 +0200 Subject: [PATCH 132/268] net: bridge: br_fdb_external_learn_add(): always set EXT_LEARN [ Upstream commit bee2ef946d3184e99077be526567d791c473036f ] When userspace wants to take over a fdb entry by setting it as EXTERN_LEARNED, we set both flags BR_FDB_ADDED_BY_EXT_LEARN and BR_FDB_ADDED_BY_USER in br_fdb_external_learn_add(). If the bridge updates the entry later because its port changed, we clear the BR_FDB_ADDED_BY_EXT_LEARN flag, but leave the BR_FDB_ADDED_BY_USER flag set. If userspace then wants to take over the entry again, br_fdb_external_learn_add() sees that BR_FDB_ADDED_BY_USER and skips setting the BR_FDB_ADDED_BY_EXT_LEARN flags, thus silently ignores the update. Fix this by always allowing to set BR_FDB_ADDED_BY_EXT_LEARN regardless if this was a user fdb entry or not. Fixes: 710ae7287737 ("net: bridge: Mark FDB entries that were added by user as such") Signed-off-by: Jonas Gorski Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/20240903081958.29951-1-jonas.gorski@bisdn.de Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/bridge/br_fdb.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index e69a872bfc1d70..a6d8cd9a58078f 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -1425,12 +1425,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, modified = true; } - if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { + if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { /* Refresh entry */ fdb->used = jiffies; - } else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) { - /* Take over SW learned entry */ - set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags); + } else { modified = true; } From 97d6274615046d870d1bfa3497fef95790164537 Mon Sep 17 00:00:00 2001 From: Pawel Dembicki Date: Tue, 3 Sep 2024 22:33:41 +0200 Subject: [PATCH 133/268] net: dsa: vsc73xx: fix possible subblocks range of CAPT block [ Upstream commit 8e69c96df771ab469cec278edb47009351de4da6 ] CAPT block (CPU Capture Buffer) have 7 sublocks: 0-3, 4, 6, 7. Function 'vsc73xx_is_addr_valid' allows to use only block 0 at this moment. This patch fix it. Fixes: 05bd97fc559d ("net: dsa: Add Vitesse VSC73xx DSA router driver") Signed-off-by: Pawel Dembicki Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/20240903203340.1518789-1-paweldembicki@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/dsa/vitesse-vsc73xx-core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index 23bd8b3f899310..a28bf5433ea727 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -34,7 +34,7 @@ #define VSC73XX_BLOCK_ANALYZER 0x2 /* Only subblock 0 */ #define VSC73XX_BLOCK_MII 0x3 /* Subblocks 0 and 1 */ #define VSC73XX_BLOCK_MEMINIT 0x3 /* Only subblock 2 */ -#define VSC73XX_BLOCK_CAPTURE 0x4 /* Only subblock 2 */ +#define VSC73XX_BLOCK_CAPTURE 0x4 /* Subblocks 0-4, 6, 7 */ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ @@ -370,13 +370,19 @@ int vsc73xx_is_addr_valid(u8 block, u8 subblock) break; case VSC73XX_BLOCK_MII: - case VSC73XX_BLOCK_CAPTURE: case VSC73XX_BLOCK_ARBITER: switch (subblock) { case 0 ... 1: return 1; } break; + case VSC73XX_BLOCK_CAPTURE: + switch (subblock) { + case 0 ... 4: + case 6 ... 7: + return 1; + } + break; } return 0; From 2b110cce19c8e1c04b8de388f531f3bed6e19283 Mon Sep 17 00:00:00 2001 From: Jamie Bainbridge Date: Wed, 4 Sep 2024 16:12:26 +1000 Subject: [PATCH 134/268] selftests: net: enable bind tests [ Upstream commit e4af74a53b7aa865e7fcc104630ebb7a9129b71f ] bind_wildcard is compiled but not run, bind_timewait is not compiled. These two tests complete in a very short time, use the test harness properly, and seem reasonable to enable. The author of the tests confirmed via email that these were intended to be run. Enable these two tests. Fixes: 13715acf8ab5 ("selftest: Add test for bind() conflicts.") Fixes: 2c042e8e54ef ("tcp: Add selftest for bind() and TIME_WAIT.") Signed-off-by: Jamie Bainbridge Reviewed-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/5a009b26cf5fb1ad1512d89c61b37e2fac702323.1725430322.git.jamie.bainbridge@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- tools/testing/selftests/net/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index d417de10512339..91a48efb140bef 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -85,7 +85,8 @@ TEST_GEN_FILES += csum TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += xdp_dummy.o TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard +TEST_GEN_PROGS += bind_wildcard +TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh From e997b357b13a7d95de31681fc54fcc34235fa527 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 18 Jun 2024 15:12:29 +0530 Subject: [PATCH 135/268] xen: privcmd: Fix possible access to a freed kirqfd instance [ Upstream commit 611ff1b1ae989a7bcce3e2a8e132ee30e968c557 ] Nothing prevents simultaneous ioctl calls to privcmd_irqfd_assign() and privcmd_irqfd_deassign(). If that happens, it is possible that a kirqfd created and added to the irqfds_list by privcmd_irqfd_assign() may get removed by another thread executing privcmd_irqfd_deassign(), while the former is still using it after dropping the locks. This can lead to a situation where an already freed kirqfd instance may be accessed and cause kernel oops. Use SRCU locking to prevent the same, as is done for the KVM implementation for irqfds. Reported-by: Al Viro Suggested-by: Paolo Bonzini Signed-off-by: Viresh Kumar Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/9e884af1f1f842eacbb7afc5672c8feb4dea7f3f.1718703669.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross Signed-off-by: Sasha Levin --- drivers/xen/privcmd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 923f064c7e3e92..61aaded483e1d3 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -842,6 +843,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, /* Irqfd support */ static struct workqueue_struct *irqfd_cleanup_wq; static DEFINE_SPINLOCK(irqfds_lock); +DEFINE_STATIC_SRCU(irqfds_srcu); static LIST_HEAD(irqfds_list); struct privcmd_kernel_irqfd { @@ -869,6 +871,9 @@ static void irqfd_shutdown(struct work_struct *work) container_of(work, struct privcmd_kernel_irqfd, shutdown); u64 cnt; + /* Make sure irqfd has been initialized in assign path */ + synchronize_srcu(&irqfds_srcu); + eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt); eventfd_ctx_put(kirqfd->eventfd); kfree(kirqfd); @@ -931,7 +936,7 @@ static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd) __poll_t events; struct fd f; void *dm_op; - int ret; + int ret, idx; kirqfd = kzalloc(sizeof(*kirqfd) + irqfd->size, GFP_KERNEL); if (!kirqfd) @@ -977,6 +982,7 @@ static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd) } } + idx = srcu_read_lock(&irqfds_srcu); list_add_tail(&kirqfd->list, &irqfds_list); spin_unlock_irqrestore(&irqfds_lock, flags); @@ -988,6 +994,8 @@ static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd) if (events & EPOLLIN) irqfd_inject(kirqfd); + srcu_read_unlock(&irqfds_srcu, idx); + /* * Do not drop the file until the kirqfd is fully initialized, otherwise * we might race against the EPOLLHUP. From 441e6f5829beb77a8c8e11e9693cf0154b88b0aa Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 2 Jul 2024 12:08:09 +0100 Subject: [PATCH 136/268] firmware: cs_dsp: Don't allow writes to read-only controls [ Upstream commit 62412a9357b16a4e39dc582deb2e2a682b92524c ] Add a check to cs_dsp_coeff_write_ctrl() to abort if the control is not writeable. The cs_dsp code originated as an ASoC driver (wm_adsp) where all controls were exported as ALSA controls. It relied on ALSA to enforce the read-only permission. Now that the code has been separated from ALSA/ASoC it must perform its own permission check. This isn't currently causing any problems so there shouldn't be any need to backport this. If the client of cs_dsp exposes the control as an ALSA control, it should set permissions on that ALSA control to protect it. The few uses of cs_dsp_coeff_write_ctrl() inside drivers are for writable controls. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20240702110809.16836-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/firmware/cirrus/cs_dsp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/firmware/cirrus/cs_dsp.c b/drivers/firmware/cirrus/cs_dsp.c index a1da7581adb038..e62ffffe5fb8d4 100644 --- a/drivers/firmware/cirrus/cs_dsp.c +++ b/drivers/firmware/cirrus/cs_dsp.c @@ -796,6 +796,9 @@ int cs_dsp_coeff_write_ctrl(struct cs_dsp_coeff_ctl *ctl, lockdep_assert_held(&ctl->dsp->pwr_lock); + if (ctl->flags && !(ctl->flags & WMFW_CTL_FLAG_WRITEABLE)) + return -EPERM; + if (len + off * sizeof(u32) > ctl->len) return -EINVAL; From 8bdbc44c6d2638bccbe33d075ad80a4f8f701893 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Fri, 28 Jun 2024 16:55:39 -0400 Subject: [PATCH 137/268] phy: zynqmp: Take the phy mutex in xlate [ Upstream commit d79c6840917097285e03a49f709321f5fb972750 ] Take the phy mutex in xlate to protect against concurrent modification/access to gtr_phy. This does not typically cause any issues, since in most systems the phys are only xlated once and thereafter accessed with the phy API (which takes the locks). However, we are about to allow userspace to access phys for debugging, so it's important to avoid any data races. Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20240628205540.3098010-5-sean.anderson@linux.dev Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/xilinx/phy-zynqmp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c index 8c8b1ca31e4c46..c72b52955a8678 100644 --- a/drivers/phy/xilinx/phy-zynqmp.c +++ b/drivers/phy/xilinx/phy-zynqmp.c @@ -846,6 +846,7 @@ static struct phy *xpsgtr_xlate(struct device *dev, phy_type = args->args[1]; phy_instance = args->args[2]; + guard(mutex)(>r_phy->phy->mutex); ret = xpsgtr_set_lane_type(gtr_phy, phy_type, phy_instance); if (ret < 0) { dev_err(gtr_dev->dev, "Invalid PHY type and/or instance\n"); From 67786b291e77f28864b5f7465d3c444d8723d02d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?= Date: Thu, 27 Jun 2024 12:18:40 +0200 Subject: [PATCH 138/268] ASoC: topology: Properly initialize soc_enum values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8ec2a2643544ce352f012ad3d248163199d05dfc ] soc_tplg_denum_create_values() should properly set its values field. Signed-off-by: Amadeusz Sławiński Link: https://patch.msgid.link/20240627101850.2191513-4-amadeuszx.slawinski@linux.intel.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 7e8fca0b066280..a643ef654b9d74 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -851,6 +851,8 @@ static int soc_tplg_denum_create_values(struct soc_tplg *tplg, struct soc_enum * se->dobj.control.dvalues[i] = le32_to_cpu(ec->values[i]); } + se->items = le32_to_cpu(ec->items); + se->values = (const unsigned int *)se->dobj.control.dvalues; return 0; } From 8b3267428333916c15e48fd8b05db7dde6493935 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 2 Jul 2024 12:13:24 +0200 Subject: [PATCH 139/268] dm init: Handle minors larger than 255 [ Upstream commit 140ce37fd78a629105377e17842465258a5459ef ] dm_parse_device_entry() simply copies the minor number into dmi.dev, but the dev_t format splits the minor number between the lowest 8 bytes and highest 12 bytes. If the minor number is larger than 255, part of it will end up getting treated as the major number Fix this by checking that the minor number is valid and then encoding it as a dev_t. Signed-off-by: Benjamin Marzinski Signed-off-by: Mikulas Patocka Signed-off-by: Sasha Levin --- drivers/md/dm-init.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c index 2a71bcdba92d14..b37bbe76250034 100644 --- a/drivers/md/dm-init.c +++ b/drivers/md/dm-init.c @@ -212,8 +212,10 @@ static char __init *dm_parse_device_entry(struct dm_device *dev, char *str) strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid)); /* minor */ if (strlen(field[2])) { - if (kstrtoull(field[2], 0, &dev->dmi.dev)) + if (kstrtoull(field[2], 0, &dev->dmi.dev) || + dev->dmi.dev >= (1 << MINORBITS)) return ERR_PTR(-EINVAL); + dev->dmi.dev = huge_encode_dev((dev_t)dev->dmi.dev); dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG; } /* flags */ From 7cfa7abb240c04b5c33e5c0c34f957829b36dbc1 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Tue, 2 Jul 2024 21:08:33 +0800 Subject: [PATCH 140/268] iommu/vt-d: Handle volatile descriptor status read [ Upstream commit b5e86a95541cea737394a1da967df4cd4d8f7182 ] Queued invalidation wait descriptor status is volatile in that IOMMU hardware writes the data upon completion. Use READ_ONCE() to prevent compiler optimizations which ensures memory reads every time. As a side effect, READ_ONCE() also enforces strict types and may add an extra instruction. But it should not have negative performance impact since we use cpu_relax anyway and the extra time(by adding an instruction) may allow IOMMU HW request cacheline ownership easier. e.g. gcc 12.3 BEFORE: 81 38 ad de 00 00 cmpl $0x2,(%rax) AFTER (with READ_ONCE()) 772f: 8b 00 mov (%rax),%eax 7731: 3d ad de 00 00 cmp $0x2,%eax //status data is 32 bit Signed-off-by: Jacob Pan Reviewed-by: Kevin Tian Reviewed-by: Yi Liu Link: https://lore.kernel.org/r/20240607173817.3914600-1-jacob.jun.pan@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20240702130839.108139-2-baolu.lu@linux.intel.com Signed-off-by: Will Deacon Signed-off-by: Sasha Levin --- drivers/iommu/intel/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a9ab4..84f0459e503cfd 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1422,7 +1422,7 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc, */ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); - while (qi->desc_status[wait_index] != QI_DONE) { + while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) { /* * We will leave the interrupts disabled, to prevent interrupt * context to queue another cmd while a cmd is already submitted From 84a6b76b285906264270ef0db5163d73b8d0f74f Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 3 Jul 2024 14:52:29 -0400 Subject: [PATCH 141/268] cgroup: Protect css->cgroup write under css_set_lock [ Upstream commit 57b56d16800e8961278ecff0dc755d46c4575092 ] The writing of css->cgroup associated with the cgroup root in rebind_subsystems() is currently protected only by cgroup_mutex. However, the reading of css->cgroup in both proc_cpuset_show() and proc_cgroup_show() is protected just by css_set_lock. That makes the readers susceptible to racing problems like data tearing or caching. It is also a problem that can be reported by KCSAN. This can be fixed by using READ_ONCE() and WRITE_ONCE() to access css->cgroup. Alternatively, the writing of css->cgroup can be moved under css_set_lock as well which is done by this patch. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/cgroup/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 5eca6281d1aa68..660817c125e73d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1829,9 +1829,9 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); rcu_assign_pointer(dcgrp->subsys[ssid], css); ss->root = dst_root; - css->cgroup = dcgrp; spin_lock_irq(&css_set_lock); + css->cgroup = dcgrp; WARN_ON(!list_empty(&dcgrp->e_csets[ss->id])); list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id], e_cset_node[ss->id]) { From ec5b47a370177d79ae7773858042c107e21f8ecc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 3 Jul 2024 17:22:36 +0200 Subject: [PATCH 142/268] um: line: always fill *error_out in setup_one_line() [ Upstream commit 824ac4a5edd3f7494ab1996826c4f47f8ef0f63d ] The pointer isn't initialized by callers, but I have encountered cases where it's still printed; initialize it in all possible cases in setup_one_line(). Link: https://patch.msgid.link/20240703172235.ad863568b55f.Iaa1eba4db8265d7715ba71d5f6bb8c7ff63d27e9@changeid Acked-By: Anton Ivanov Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- arch/um/drivers/line.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 375200e9aba9a7..2ba4e0d4e26b06 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -383,6 +383,7 @@ int setup_one_line(struct line *lines, int n, char *init, parse_chan_pair(NULL, line, n, opts, error_out); err = 0; } + *error_out = "configured as 'none'"; } else { char *new = kstrdup(init, GFP_KERNEL); if (!new) { @@ -406,6 +407,7 @@ int setup_one_line(struct line *lines, int n, char *init, } } if (err) { + *error_out = "failed to parse channel pair"; line->init_str = NULL; line->valid = 0; kfree(new); From 72377cee3f46cc4aa5d5f445023c6b7f4e1b2d9e Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Tue, 2 Jul 2024 22:51:52 +0800 Subject: [PATCH 143/268] devres: Initialize an uninitialized struct member [ Upstream commit 56a20ad349b5c51909cf8810f7c79b288864ad33 ] Initialize an uninitialized struct member for driver API devres_open_group(). Signed-off-by: Zijun Hu Link: https://lore.kernel.org/r/1719931914-19035-4-git-send-email-quic_zijuhu@quicinc.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/base/devres.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index 8d709dbd4e0c1d..e9b0d94aeabd90 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -567,6 +567,7 @@ void * devres_open_group(struct device *dev, void *id, gfp_t gfp) grp->id = grp; if (id) grp->id = id; + grp->color = 0; spin_lock_irqsave(&dev->devres_lock, flags); add_dr(dev, &grp->node[0]); From b82d4d5c736f4fd2ed224c35f554f50d1953d21e Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Mon, 1 Jul 2024 13:15:06 +0530 Subject: [PATCH 144/268] pci/hotplug/pnv_php: Fix hotplug driver crash on Powernv [ Upstream commit 335e35b748527f0c06ded9eebb65387f60647fda ] The hotplug driver for powerpc (pci/hotplug/pnv_php.c) causes a kernel crash when we try to hot-unplug/disable the PCIe switch/bridge from the PHB. The crash occurs because although the MSI data structure has been released during disable/hot-unplug path and it has been assigned with NULL, still during unregistration the code was again trying to explicitly disable the MSI which causes the NULL pointer dereference and kernel crash. The patch fixes the check during unregistration path to prevent invoking pci_disable_msi/msix() since its data structure is already freed. Reported-by: Timothy Pearson Closes: https://lore.kernel.org/all/1981605666.2142272.1703742465927.JavaMail.zimbra@raptorengineeringinc.com/ Acked-by: Bjorn Helgaas Tested-by: Shawn Anastasio Signed-off-by: Krishna Kumar Signed-off-by: Michael Ellerman Link: https://msgid.link/20240701074513.94873-2-krishnak@linux.ibm.com Signed-off-by: Sasha Levin --- drivers/pci/hotplug/pnv_php.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 881d420637bf1e..092c9ac0d26d27 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -39,7 +39,6 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, bool disable_device) { struct pci_dev *pdev = php_slot->pdev; - int irq = php_slot->irq; u16 ctrl; if (php_slot->irq > 0) { @@ -58,7 +57,7 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, php_slot->wq = NULL; } - if (disable_device || irq > 0) { + if (disable_device) { if (pdev->msix_enabled) pci_disable_msix(pdev); else if (pdev->msi_enabled) From c5b30148ef3b2fc6825228c4d9f716114c232932 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 6 Jun 2024 19:13:45 +0800 Subject: [PATCH 145/268] virtio_ring: fix KMSAN error for premapped mode [ Upstream commit 840b2d39a2dc1b96deb3f5c7fef76c9b24f08f51 ] Add kmsan for virtqueue_dma_map_single_attrs to fix: BUG: KMSAN: uninit-value in receive_buf+0x45ca/0x6990 receive_buf+0x45ca/0x6990 virtnet_poll+0x17e0/0x3130 net_rx_action+0x832/0x26e0 handle_softirqs+0x330/0x10f0 [...] Uninit was created at: __alloc_pages_noprof+0x62a/0xe60 alloc_pages_noprof+0x392/0x830 skb_page_frag_refill+0x21a/0x5c0 virtnet_rq_alloc+0x50/0x1500 try_fill_recv+0x372/0x54c0 virtnet_open+0x210/0xbe0 __dev_open+0x56e/0x920 __dev_change_flags+0x39c/0x2000 dev_change_flags+0xaa/0x200 do_setlink+0x197a/0x7420 rtnl_setlink+0x77c/0x860 [...] Signed-off-by: Xuan Zhuo Tested-by: Alexander Potapenko Message-Id: <20240606111345.93600-1-xuanzhuo@linux.alibaba.com> Signed-off-by: Michael S. Tsirkin Tested-by: Ilya Leoshkevich # s390x Acked-by: Jason Wang Signed-off-by: Sasha Levin --- drivers/virtio/virtio_ring.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6f7e5010a6735d..80669e05bf0ee4 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -3126,8 +3126,10 @@ dma_addr_t virtqueue_dma_map_single_attrs(struct virtqueue *_vq, void *ptr, { struct vring_virtqueue *vq = to_vvq(_vq); - if (!vq->use_dma_api) + if (!vq->use_dma_api) { + kmsan_handle_dma(virt_to_page(ptr), offset_in_page(ptr), size, dir); return (dma_addr_t)virt_to_phys(ptr); + } return dma_map_single_attrs(vring_dma_dev(vq), ptr, size, dir, attrs); } From c83d464b82a8ad62ec9077637f75d73fe955635a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20=C5=9Alusarz?= Date: Tue, 28 May 2024 13:02:46 +0200 Subject: [PATCH 146/268] wifi: rtw88: usb: schedule rx work after everything is set up MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit adc539784c98a7cc602cbf557debfc2e7b9be8b3 ] Right now it's possible to hit NULL pointer dereference in rtw_rx_fill_rx_status on hw object and/or its fields because initialization routine can start getting USB replies before rtw_dev is fully setup. The stack trace looks like this: rtw_rx_fill_rx_status rtw8821c_query_rx_desc rtw_usb_rx_handler ... queue_work rtw_usb_read_port_complete ... usb_submit_urb rtw_usb_rx_resubmit rtw_usb_init_rx rtw_usb_probe So while we do the async stuff rtw_usb_probe continues and calls rtw_register_hw, which does all kinds of initialization (e.g. via ieee80211_register_hw) that rtw_rx_fill_rx_status relies on. Fix this by moving the first usb_submit_urb after everything is set up. For me, this bug manifested as: [ 8.893177] rtw_8821cu 1-1:1.2: band wrong, packet dropped [ 8.910904] rtw_8821cu 1-1:1.2: hw->conf.chandef.chan NULL in rtw_rx_fill_rx_status because I'm using Larry's backport of rtw88 driver with the NULL checks in rtw_rx_fill_rx_status. Link: https://lore.kernel.org/linux-wireless/CA+shoWQ7P49jhQasofDcTdQhiuarPTjYEDa--NiVVx494WcuQw@mail.gmail.com/ Signed-off-by: Marcin Ślusarz Cc: Tim K Cc: Ping-Ke Shih Cc: Larry Finger Cc: Kalle Valo Cc: linux-wireless@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20240528110246.477321-1-marcin.slusarz@gmail.com Signed-off-by: Sasha Levin --- drivers/net/wireless/realtek/rtw88/usb.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index efd0c2915a0512..04a64afcbf8a2d 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -742,7 +742,6 @@ static struct rtw_hci_ops rtw_usb_ops = { static int rtw_usb_init_rx(struct rtw_dev *rtwdev) { struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); - int i; rtwusb->rxwq = create_singlethread_workqueue("rtw88_usb: rx wq"); if (!rtwusb->rxwq) { @@ -754,13 +753,19 @@ static int rtw_usb_init_rx(struct rtw_dev *rtwdev) INIT_WORK(&rtwusb->rx_work, rtw_usb_rx_handler); + return 0; +} + +static void rtw_usb_setup_rx(struct rtw_dev *rtwdev) +{ + struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); + int i; + for (i = 0; i < RTW_USB_RXCB_NUM; i++) { struct rx_usb_ctrl_block *rxcb = &rtwusb->rx_cb[i]; rtw_usb_rx_resubmit(rtwusb, rxcb); } - - return 0; } static void rtw_usb_deinit_rx(struct rtw_dev *rtwdev) @@ -897,6 +902,8 @@ int rtw_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) goto err_destroy_rxwq; } + rtw_usb_setup_rx(rtwdev); + return 0; err_destroy_rxwq: From 2f49e05d6b58d660f035a75ff96b77071b4bd5ed Mon Sep 17 00:00:00 2001 From: Kyoungrul Kim Date: Thu, 27 Jun 2024 17:51:04 +0900 Subject: [PATCH 147/268] scsi: ufs: core: Remove SCSI host only if added [ Upstream commit 7cbff570dbe8907e23bba06f6414899a0fbb2fcc ] If host tries to remove ufshcd driver from a UFS device it would cause a kernel panic if ufshcd_async_scan fails during ufshcd_probe_hba before adding a SCSI host with scsi_add_host and MCQ is enabled since SCSI host has been defered after MCQ configuration introduced by commit 0cab4023ec7b ("scsi: ufs: core: Defer adding host to SCSI if MCQ is supported"). To guarantee that SCSI host is removed only if it has been added, set the scsi_host_added flag to true after adding a SCSI host and check whether it is set or not before removing it. Signed-off-by: Kyoungrul Kim Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240627085104epcms2p5897a3870ea5c6416aa44f94df6c543d7@epcms2p5 Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/ufs/core/ufshcd.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index ad0ef5b6b8cf9c..ed59d2367a4e72 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -10130,7 +10130,8 @@ void ufshcd_remove(struct ufs_hba *hba) blk_mq_destroy_queue(hba->tmf_queue); blk_put_queue(hba->tmf_queue); blk_mq_free_tag_set(&hba->tmf_tag_set); - scsi_remove_host(hba->host); + if (hba->scsi_host_added) + scsi_remove_host(hba->host); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); ufshcd_hba_stop(hba); @@ -10408,6 +10409,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) dev_err(hba->dev, "scsi_add_host failed\n"); goto out_disable; } + hba->scsi_host_added = true; } hba->tmf_tag_set = (struct blk_mq_tag_set) { @@ -10489,7 +10491,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) free_tmf_tag_set: blk_mq_free_tag_set(&hba->tmf_tag_set); out_remove_scsi_host: - scsi_remove_host(hba->host); + if (hba->scsi_host_added) + scsi_remove_host(hba->host); out_disable: hba->is_irq_enabled = false; ufshcd_hba_exit(hba); From 7b1d779647afaea9185fa2f150b1721e7c1aae89 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Thu, 27 Jun 2024 15:59:23 +0000 Subject: [PATCH 148/268] scsi: pm80xx: Set phy->enable_completion only when we wait for it [ Upstream commit e4f949ef1516c0d74745ee54a0f4882c1f6c7aea ] pm8001_phy_control() populates the enable_completion pointer with a stack address, sends a PHY_LINK_RESET / PHY_HARD_RESET, waits 300 ms, and returns. The problem arises when a phy control response comes late. After 300 ms the pm8001_phy_control() function returns and the passed enable_completion stack address is no longer valid. Late phy control response invokes complete() on a dangling enable_completion pointer which leads to a kernel crash. Signed-off-by: Igor Pylypiv Signed-off-by: Terrence Adams Link: https://lore.kernel.org/r/20240627155924.2361370-2-tadamsjr@google.com Acked-by: Jack Wang Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/pm8001/pm8001_sas.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index a5a31dfa451228..ee2da8e49d4cfb 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -166,7 +166,6 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, unsigned long flags; pm8001_ha = sas_phy->ha->lldd_ha; phy = &pm8001_ha->phy[phy_id]; - pm8001_ha->phy[phy_id].enable_completion = &completion; if (PM8001_CHIP_DISP->fatal_errors(pm8001_ha)) { /* @@ -190,6 +189,7 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, rates->maximum_linkrate; } if (pm8001_ha->phy[phy_id].phy_state == PHY_LINK_DISABLE) { + pm8001_ha->phy[phy_id].enable_completion = &completion; PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); wait_for_completion(&completion); } @@ -198,6 +198,7 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, break; case PHY_FUNC_HARD_RESET: if (pm8001_ha->phy[phy_id].phy_state == PHY_LINK_DISABLE) { + pm8001_ha->phy[phy_id].enable_completion = &completion; PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); wait_for_completion(&completion); } @@ -206,6 +207,7 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, break; case PHY_FUNC_LINK_RESET: if (pm8001_ha->phy[phy_id].phy_state == PHY_LINK_DISABLE) { + pm8001_ha->phy[phy_id].enable_completion = &completion; PM8001_CHIP_DISP->phy_start_req(pm8001_ha, phy_id); wait_for_completion(&completion); } From 3a986d134402dee9bdbbece33f2f9ea0024d0916 Mon Sep 17 00:00:00 2001 From: Hareshx Sankar Raj Date: Tue, 25 Jun 2024 15:41:19 +0100 Subject: [PATCH 149/268] crypto: qat - fix unintentional re-enabling of error interrupts [ Upstream commit f0622894c59458fceb33c4197462bc2006f3fc6b ] The logic that detects pending VF2PF interrupts unintentionally clears the section of the error mask register(s) not related to VF2PF. This might cause interrupts unrelated to VF2PF, reported through errsou3 and errsou5, to be reported again after the execution of the function disable_pending_vf2pf_interrupts() in dh895xcc and GEN2 devices. Fix by updating only section of errmsk3 and errmsk5 related to VF2PF. Signed-off-by: Hareshx Sankar Raj Reviewed-by: Damian Muszynski Signed-off-by: Giovanni Cabiddu Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.c | 4 +++- .../crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.c index 70ef1196393814..43af81fcab8686 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_pfvf.c @@ -100,7 +100,9 @@ static u32 adf_gen2_disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); - errmsk3 &= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled); + /* Update only section of errmsk3 related to VF2PF */ + errmsk3 &= ~ADF_GEN2_ERR_MSK_VF2PF(ADF_GEN2_VF_MSK); + errmsk3 |= ADF_GEN2_ERR_MSK_VF2PF(sources | disabled); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); /* Return the sources of the (new) interrupt(s) */ diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 09551f94912653..0e40897cc983a8 100644 --- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -191,8 +191,12 @@ static u32 disable_pending_vf2pf_interrupts(void __iomem *pmisc_addr) ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); - errmsk3 &= ADF_DH895XCC_ERR_MSK_VF2PF_L(sources | disabled); - errmsk5 &= ADF_DH895XCC_ERR_MSK_VF2PF_U(sources | disabled); + /* Update only section of errmsk3 and errmsk5 related to VF2PF */ + errmsk3 &= ~ADF_DH895XCC_ERR_MSK_VF2PF_L(ADF_DH895XCC_VF_MSK); + errmsk5 &= ~ADF_DH895XCC_ERR_MSK_VF2PF_U(ADF_DH895XCC_VF_MSK); + + errmsk3 |= ADF_DH895XCC_ERR_MSK_VF2PF_L(sources | disabled); + errmsk5 |= ADF_DH895XCC_ERR_MSK_VF2PF_U(sources | disabled); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK3, errmsk3); ADF_CSR_WR(pmisc_addr, ADF_GEN2_ERRMSK5, errmsk5); From 6891b11a0c6227ca7ed15786928a07b1c0e4d4af Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Jul 2024 23:43:04 -0700 Subject: [PATCH 150/268] hwmon: (adc128d818) Fix underflows seen when writing limit attributes [ Upstream commit 8cad724c8537fe3e0da8004646abc00290adae40 ] DIV_ROUND_CLOSEST() after kstrtol() results in an underflow if a large negative number such as -9223372036854775808 is provided by the user. Fix it by reordering clamp_val() and DIV_ROUND_CLOSEST() operations. Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/adc128d818.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/adc128d818.c b/drivers/hwmon/adc128d818.c index 46e3c8c5076573..73fd967998472c 100644 --- a/drivers/hwmon/adc128d818.c +++ b/drivers/hwmon/adc128d818.c @@ -176,7 +176,7 @@ static ssize_t adc128_in_store(struct device *dev, mutex_lock(&data->update_lock); /* 10 mV LSB on limit registers */ - regval = clamp_val(DIV_ROUND_CLOSEST(val, 10), 0, 255); + regval = DIV_ROUND_CLOSEST(clamp_val(val, 0, 2550), 10); data->in[index][nr] = regval << 4; reg = index == 1 ? ADC128_REG_IN_MIN(nr) : ADC128_REG_IN_MAX(nr); i2c_smbus_write_byte_data(data->client, reg, regval); @@ -214,7 +214,7 @@ static ssize_t adc128_temp_store(struct device *dev, return err; mutex_lock(&data->update_lock); - regval = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -128, 127); + regval = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); data->temp[index] = regval << 1; i2c_smbus_write_byte_data(data->client, index == 1 ? ADC128_REG_TEMP_MAX From 46e4fd338d5bdbaf60e41cda625b24949d2af201 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Jul 2024 23:48:42 -0700 Subject: [PATCH 151/268] hwmon: (lm95234) Fix underflows seen when writing limit attributes [ Upstream commit af64e3e1537896337405f880c1e9ac1f8c0c6198 ] DIV_ROUND_CLOSEST() after kstrtol() results in an underflow if a large negative number such as -9223372036854775808 is provided by the user. Fix it by reordering clamp_val() and DIV_ROUND_CLOSEST() operations. Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/lm95234.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/lm95234.c b/drivers/hwmon/lm95234.c index 67b9d7636ee427..37e8e9679aeb6b 100644 --- a/drivers/hwmon/lm95234.c +++ b/drivers/hwmon/lm95234.c @@ -301,7 +301,8 @@ static ssize_t tcrit2_store(struct device *dev, struct device_attribute *attr, if (ret < 0) return ret; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), 0, index ? 255 : 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, 0, (index ? 255 : 127) * 1000), + 1000); mutex_lock(&data->update_lock); data->tcrit2[index] = val; @@ -350,7 +351,7 @@ static ssize_t tcrit1_store(struct device *dev, struct device_attribute *attr, if (ret < 0) return ret; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), 0, 255); + val = DIV_ROUND_CLOSEST(clamp_val(val, 0, 255000), 1000); mutex_lock(&data->update_lock); data->tcrit1[index] = val; @@ -391,7 +392,7 @@ static ssize_t tcrit1_hyst_store(struct device *dev, if (ret < 0) return ret; - val = DIV_ROUND_CLOSEST(val, 1000); + val = DIV_ROUND_CLOSEST(clamp_val(val, -255000, 255000), 1000); val = clamp_val((int)data->tcrit1[index] - val, 0, 31); mutex_lock(&data->update_lock); @@ -431,7 +432,7 @@ static ssize_t offset_store(struct device *dev, struct device_attribute *attr, return ret; /* Accuracy is 1/2 degrees C */ - val = clamp_val(DIV_ROUND_CLOSEST(val, 500), -128, 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, -64000, 63500), 500); mutex_lock(&data->update_lock); data->toffset[index] = val; From 2f695544084a559f181cafdfd3f864c5ff9dd1db Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Jul 2024 23:50:08 -0700 Subject: [PATCH 152/268] hwmon: (nct6775-core) Fix underflows seen when writing limit attributes [ Upstream commit 0403e10bf0824bf0ec2bb135d4cf1c0cc3bf4bf0 ] DIV_ROUND_CLOSEST() after kstrtol() results in an underflow if a large negative number such as -9223372036854775808 is provided by the user. Fix it by reordering clamp_val() and DIV_ROUND_CLOSEST() operations. Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/nct6775-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index f3bf2e4701c387..8da7aa1614d7d1 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -2262,7 +2262,7 @@ store_temp_offset(struct device *dev, struct device_attribute *attr, if (err < 0) return err; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), -128, 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, -128000, 127000), 1000); mutex_lock(&data->update_lock); data->temp_offset[nr] = val; From 8fecb75bff1b7d87a071c32a37aa0700f2be379d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 6 Jul 2024 23:51:34 -0700 Subject: [PATCH 153/268] hwmon: (w83627ehf) Fix underflows seen when writing limit attributes [ Upstream commit 5c1de37969b7bc0abcb20b86e91e70caebbd4f89 ] DIV_ROUND_CLOSEST() after kstrtol() results in an underflow if a large negative number such as -9223372036854775808 is provided by the user. Fix it by reordering clamp_val() and DIV_ROUND_CLOSEST() operations. Signed-off-by: Guenter Roeck Signed-off-by: Sasha Levin --- drivers/hwmon/w83627ehf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/w83627ehf.c b/drivers/hwmon/w83627ehf.c index fe960c0a624f77..7d7d70afde6552 100644 --- a/drivers/hwmon/w83627ehf.c +++ b/drivers/hwmon/w83627ehf.c @@ -895,7 +895,7 @@ store_target_temp(struct device *dev, struct device_attribute *attr, if (err < 0) return err; - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), 0, 127); + val = DIV_ROUND_CLOSEST(clamp_val(val, 0, 127000), 1000); mutex_lock(&data->update_lock); data->target_temp[nr] = val; @@ -920,7 +920,7 @@ store_tolerance(struct device *dev, struct device_attribute *attr, return err; /* Limit the temp to 0C - 15C */ - val = clamp_val(DIV_ROUND_CLOSEST(val, 1000), 0, 15); + val = DIV_ROUND_CLOSEST(clamp_val(val, 0, 15000), 1000); mutex_lock(&data->update_lock); reg = w83627ehf_read_value(data, W83627EHF_REG_TOLERANCE[nr]); From 4b83b207f00670ff3f9f3ea04943f406ab432c1f Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Sun, 7 Jul 2024 16:30:07 +0800 Subject: [PATCH 154/268] ASoc: TAS2781: replace beXX_to_cpup with get_unaligned_beXX for potentially broken alignment [ Upstream commit 1cc509edbe23b61e8c245611bd15d88edb635a38 ] Use get_unaligned_be16 instead of be16_to_cpup and get_unaligned_be32 instead of be32_to_cpup for potentially broken alignment. Signed-off-by: Shenghao Ding Link: https://patch.msgid.link/20240707083011.98-1-shenghao-ding@ti.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/tas2781-fmwlib.c | 71 +++++++++++++++---------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 41ad82a4291634..3639dcd0bbb2bc 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -21,7 +21,7 @@ #include #include #include - +#include #define ERROR_PRAM_CRCCHK 0x0000000 #define ERROR_YRAM_CRCCHK 0x0000001 @@ -125,8 +125,7 @@ static struct tasdevice_config_info *tasdevice_add_config( /* convert data[offset], data[offset + 1], data[offset + 2] and * data[offset + 3] into host */ - cfg_info->nblocks = - be32_to_cpup((__be32 *)&config_data[config_offset]); + cfg_info->nblocks = get_unaligned_be32(&config_data[config_offset]); config_offset += 4; /* Several kinds of dsp/algorithm firmwares can run on tas2781, @@ -170,14 +169,14 @@ static struct tasdevice_config_info *tasdevice_add_config( } bk_da[i]->yram_checksum = - be16_to_cpup((__be16 *)&config_data[config_offset]); + get_unaligned_be16(&config_data[config_offset]); config_offset += 2; bk_da[i]->block_size = - be32_to_cpup((__be32 *)&config_data[config_offset]); + get_unaligned_be32(&config_data[config_offset]); config_offset += 4; bk_da[i]->n_subblks = - be32_to_cpup((__be32 *)&config_data[config_offset]); + get_unaligned_be32(&config_data[config_offset]); config_offset += 4; @@ -227,7 +226,7 @@ int tasdevice_rca_parser(void *context, const struct firmware *fmw) } buf = (unsigned char *)fmw->data; - fw_hdr->img_sz = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->img_sz = get_unaligned_be32(&buf[offset]); offset += 4; if (fw_hdr->img_sz != fmw->size) { dev_err(tas_priv->dev, @@ -238,9 +237,9 @@ int tasdevice_rca_parser(void *context, const struct firmware *fmw) goto out; } - fw_hdr->checksum = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->checksum = get_unaligned_be32(&buf[offset]); offset += 4; - fw_hdr->binary_version_num = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->binary_version_num = get_unaligned_be32(&buf[offset]); if (fw_hdr->binary_version_num < 0x103) { dev_err(tas_priv->dev, "File version 0x%04x is too low", fw_hdr->binary_version_num); @@ -249,7 +248,7 @@ int tasdevice_rca_parser(void *context, const struct firmware *fmw) goto out; } offset += 4; - fw_hdr->drv_fw_version = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->drv_fw_version = get_unaligned_be32(&buf[offset]); offset += 8; fw_hdr->plat_type = buf[offset]; offset += 1; @@ -277,11 +276,11 @@ int tasdevice_rca_parser(void *context, const struct firmware *fmw) for (i = 0; i < TASDEVICE_DEVICE_SUM; i++, offset++) fw_hdr->devs[i] = buf[offset]; - fw_hdr->nconfig = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->nconfig = get_unaligned_be32(&buf[offset]); offset += 4; for (i = 0; i < TASDEVICE_CONFIG_SUM; i++) { - fw_hdr->config_size[i] = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->config_size[i] = get_unaligned_be32(&buf[offset]); offset += 4; total_config_sz += fw_hdr->config_size[i]; } @@ -330,7 +329,7 @@ static int fw_parse_block_data_kernel(struct tasdevice_fw *tas_fmw, /* convert data[offset], data[offset + 1], data[offset + 2] and * data[offset + 3] into host */ - block->type = be32_to_cpup((__be32 *)&data[offset]); + block->type = get_unaligned_be32(&data[offset]); offset += 4; block->is_pchksum_present = data[offset]; @@ -345,10 +344,10 @@ static int fw_parse_block_data_kernel(struct tasdevice_fw *tas_fmw, block->ychksum = data[offset]; offset++; - block->blk_size = be32_to_cpup((__be32 *)&data[offset]); + block->blk_size = get_unaligned_be32(&data[offset]); offset += 4; - block->nr_subblocks = be32_to_cpup((__be32 *)&data[offset]); + block->nr_subblocks = get_unaligned_be32(&data[offset]); offset += 4; if (offset + block->blk_size > fmw->size) { @@ -381,7 +380,7 @@ static int fw_parse_data_kernel(struct tasdevice_fw *tas_fmw, offset = -EINVAL; goto out; } - img_data->nr_blk = be32_to_cpup((__be32 *)&data[offset]); + img_data->nr_blk = get_unaligned_be32(&data[offset]); offset += 4; img_data->dev_blks = kcalloc(img_data->nr_blk, @@ -477,14 +476,14 @@ static int fw_parse_variable_header_kernel( offset = -EINVAL; goto out; } - fw_hdr->device_family = be16_to_cpup((__be16 *)&buf[offset]); + fw_hdr->device_family = get_unaligned_be16(&buf[offset]); if (fw_hdr->device_family != 0) { dev_err(tas_priv->dev, "%s:not TAS device\n", __func__); offset = -EINVAL; goto out; } offset += 2; - fw_hdr->device = be16_to_cpup((__be16 *)&buf[offset]); + fw_hdr->device = get_unaligned_be16(&buf[offset]); if (fw_hdr->device >= TASDEVICE_DSP_TAS_MAX_DEVICE || fw_hdr->device == 6) { dev_err(tas_priv->dev, "Unsupported dev %d\n", fw_hdr->device); @@ -502,7 +501,7 @@ static int fw_parse_variable_header_kernel( goto out; } - tas_fmw->nr_programs = be32_to_cpup((__be32 *)&buf[offset]); + tas_fmw->nr_programs = get_unaligned_be32(&buf[offset]); offset += 4; if (tas_fmw->nr_programs == 0 || tas_fmw->nr_programs > @@ -521,14 +520,14 @@ static int fw_parse_variable_header_kernel( for (i = 0; i < tas_fmw->nr_programs; i++) { program = &(tas_fmw->programs[i]); - program->prog_size = be32_to_cpup((__be32 *)&buf[offset]); + program->prog_size = get_unaligned_be32(&buf[offset]); offset += 4; } /* Skip the unused prog_size */ offset += 4 * (TASDEVICE_MAXPROGRAM_NUM_KERNEL - tas_fmw->nr_programs); - tas_fmw->nr_configurations = be32_to_cpup((__be32 *)&buf[offset]); + tas_fmw->nr_configurations = get_unaligned_be32(&buf[offset]); offset += 4; /* The max number of config in firmware greater than 4 pieces of @@ -560,7 +559,7 @@ static int fw_parse_variable_header_kernel( for (i = 0; i < tas_fmw->nr_programs; i++) { config = &(tas_fmw->configs[i]); - config->cfg_size = be32_to_cpup((__be32 *)&buf[offset]); + config->cfg_size = get_unaligned_be32(&buf[offset]); offset += 4; } @@ -598,7 +597,7 @@ static int tasdevice_process_block(void *context, unsigned char *data, switch (subblk_typ) { case TASDEVICE_CMD_SING_W: { int i; - unsigned short len = be16_to_cpup((__be16 *)&data[2]); + unsigned short len = get_unaligned_be16(&data[2]); subblk_offset += 2; if (subblk_offset + 4 * len > sublocksize) { @@ -624,7 +623,7 @@ static int tasdevice_process_block(void *context, unsigned char *data, } break; case TASDEVICE_CMD_BURST: { - unsigned short len = be16_to_cpup((__be16 *)&data[2]); + unsigned short len = get_unaligned_be16(&data[2]); subblk_offset += 2; if (subblk_offset + 4 + len > sublocksize) { @@ -665,7 +664,7 @@ static int tasdevice_process_block(void *context, unsigned char *data, is_err = true; break; } - sleep_time = be16_to_cpup((__be16 *)&data[2]) * 1000; + sleep_time = get_unaligned_be16(&data[2]) * 1000; usleep_range(sleep_time, sleep_time + 50); subblk_offset += 2; } @@ -940,7 +939,7 @@ static int fw_parse_variable_hdr(struct tasdevice_priv offset += len; - fw_hdr->device_family = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->device_family = get_unaligned_be32(&buf[offset]); if (fw_hdr->device_family != 0) { dev_err(tas_priv->dev, "%s: not TAS device\n", __func__); offset = -EINVAL; @@ -948,7 +947,7 @@ static int fw_parse_variable_hdr(struct tasdevice_priv } offset += 4; - fw_hdr->device = be32_to_cpup((__be32 *)&buf[offset]); + fw_hdr->device = get_unaligned_be32(&buf[offset]); if (fw_hdr->device >= TASDEVICE_DSP_TAS_MAX_DEVICE || fw_hdr->device == 6) { dev_err(tas_priv->dev, "Unsupported dev %d\n", fw_hdr->device); @@ -993,7 +992,7 @@ static int fw_parse_block_data(struct tasdevice_fw *tas_fmw, offset = -EINVAL; goto out; } - block->type = be32_to_cpup((__be32 *)&data[offset]); + block->type = get_unaligned_be32(&data[offset]); offset += 4; if (tas_fmw->fw_hdr.fixed_hdr.drv_ver >= PPC_DRIVER_CRCCHK) { @@ -1018,7 +1017,7 @@ static int fw_parse_block_data(struct tasdevice_fw *tas_fmw, block->is_ychksum_present = 0; } - block->nr_cmds = be32_to_cpup((__be32 *)&data[offset]); + block->nr_cmds = get_unaligned_be32(&data[offset]); offset += 4; n = block->nr_cmds * 4; @@ -1069,7 +1068,7 @@ static int fw_parse_data(struct tasdevice_fw *tas_fmw, goto out; } offset += n; - img_data->nr_blk = be16_to_cpup((__be16 *)&data[offset]); + img_data->nr_blk = get_unaligned_be16(&data[offset]); offset += 2; img_data->dev_blks = kcalloc(img_data->nr_blk, @@ -1106,7 +1105,7 @@ static int fw_parse_program_data(struct tasdevice_priv *tas_priv, offset = -EINVAL; goto out; } - tas_fmw->nr_programs = be16_to_cpup((__be16 *)&buf[offset]); + tas_fmw->nr_programs = get_unaligned_be16(&buf[offset]); offset += 2; if (tas_fmw->nr_programs == 0) { @@ -1173,7 +1172,7 @@ static int fw_parse_configuration_data( offset = -EINVAL; goto out; } - tas_fmw->nr_configurations = be16_to_cpup((__be16 *)&data[offset]); + tas_fmw->nr_configurations = get_unaligned_be16(&data[offset]); offset += 2; if (tas_fmw->nr_configurations == 0) { @@ -1805,7 +1804,7 @@ static int fw_parse_header(struct tasdevice_priv *tas_priv, /* Convert data[offset], data[offset + 1], data[offset + 2] and * data[offset + 3] into host */ - fw_fixed_hdr->fwsize = be32_to_cpup((__be32 *)&buf[offset]); + fw_fixed_hdr->fwsize = get_unaligned_be32(&buf[offset]); offset += 4; if (fw_fixed_hdr->fwsize != fmw->size) { dev_err(tas_priv->dev, "File size not match, %lu %u", @@ -1814,9 +1813,9 @@ static int fw_parse_header(struct tasdevice_priv *tas_priv, goto out; } offset += 4; - fw_fixed_hdr->ppcver = be32_to_cpup((__be32 *)&buf[offset]); + fw_fixed_hdr->ppcver = get_unaligned_be32(&buf[offset]); offset += 8; - fw_fixed_hdr->drv_ver = be32_to_cpup((__be32 *)&buf[offset]); + fw_fixed_hdr->drv_ver = get_unaligned_be32(&buf[offset]); offset += 72; out: @@ -1858,7 +1857,7 @@ static int fw_parse_calibration_data(struct tasdevice_priv *tas_priv, offset = -EINVAL; goto out; } - tas_fmw->nr_calibrations = be16_to_cpup((__be16 *)&data[offset]); + tas_fmw->nr_calibrations = get_unaligned_be16(&data[offset]); offset += 2; if (tas_fmw->nr_calibrations != 1) { From 030958c2d05e2ed523b8c0fc1ae76a45be76a029 Mon Sep 17 00:00:00 2001 From: Andreas Ziegler Date: Wed, 3 Jul 2024 10:34:36 +0200 Subject: [PATCH 155/268] libbpf: Add NULL checks to bpf_object__{prev_map,next_map} [ Upstream commit cedc12c5b57f7efa6dbebfb2b140e8675f5a2616 ] In the current state, an erroneous call to bpf_object__find_map_by_name(NULL, ...) leads to a segmentation fault through the following call chain: bpf_object__find_map_by_name(obj = NULL, ...) -> bpf_object__for_each_map(pos, obj = NULL) -> bpf_object__next_map((obj = NULL), NULL) -> return (obj = NULL)->maps While calling bpf_object__find_map_by_name with obj = NULL is obviously incorrect, this should not lead to a segmentation fault but rather be handled gracefully. As __bpf_map__iter already handles this situation correctly, we can delegate the check for the regular case there and only add a check in case the prev or next parameter is NULL. Signed-off-by: Andreas Ziegler Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240703083436.505124-1-ziegler.andreas@siemens.com Signed-off-by: Sasha Levin --- tools/lib/bpf/libbpf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index de35b9a21dad7d..ceed16a10285ab 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9753,7 +9753,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) struct bpf_map * bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) { - if (prev == NULL) + if (prev == NULL && obj != NULL) return obj->maps; return __bpf_map__iter(prev, obj, 1); @@ -9762,7 +9762,7 @@ bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev) struct bpf_map * bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next) { - if (next == NULL) { + if (next == NULL && obj != NULL) { if (!obj->nr_maps) return NULL; return obj->maps + obj->nr_maps - 1; From 077c7e5fee4b4b3fea29fd3a951a6b01f2802d9e Mon Sep 17 00:00:00 2001 From: Yifan Zha Date: Thu, 27 Jun 2024 15:06:23 +0800 Subject: [PATCH 156/268] drm/amdgpu: Set no_hw_access when VF request full GPU fails [ Upstream commit 33f23fc3155b13c4a96d94a0a22dc26db767440b ] [Why] If VF request full GPU access and the request failed, the VF driver can get stuck accessing registers for an extended period during the unload of KMS. [How] Set no_hw_access flag when VF request for full GPU access fails This prevents further hardware access attempts, avoiding the prolonged stuck state. Signed-off-by: Yifan Zha Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index d9dc675b46aed5..22575422ca7ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -137,8 +137,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) if (virt->ops && virt->ops->req_full_gpu) { r = virt->ops->req_full_gpu(adev, init); - if (r) + if (r) { + adev->no_hw_access = true; return r; + } adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; } From 3236afd1a2fa35c60a9b3a2bcf23b6699f909387 Mon Sep 17 00:00:00 2001 From: "Luis Henriques (SUSE)" Date: Wed, 29 May 2024 10:20:30 +0100 Subject: [PATCH 157/268] ext4: fix possible tid_t sequence overflows [ Upstream commit 63469662cc45d41705f14b4648481d5d29cf5999 ] In the fast commit code there are a few places where tid_t variables are being compared without taking into account the fact that these sequence numbers may wrap. Fix this issue by using the helper functions tid_gt() and tid_geq(). Signed-off-by: Luis Henriques (SUSE) Reviewed-by: Jan Kara Reviewed-by: Harshad Shirwadkar Link: https://patch.msgid.link/20240529092030.9557-3-luis.henriques@linux.dev Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/fast_commit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5d473e50598f99..f32a91d7c05d90 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -353,7 +353,7 @@ void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handl read_unlock(&sbi->s_journal->j_state_lock); } spin_lock(&sbi->s_fc_lock); - if (sbi->s_fc_ineligible_tid < tid) + if (tid_gt(tid, sbi->s_fc_ineligible_tid)) sbi->s_fc_ineligible_tid = tid; spin_unlock(&sbi->s_fc_lock); WARN_ON(reason >= EXT4_FC_REASON_MAX); @@ -1213,7 +1213,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) if (ret == -EALREADY) { /* There was an ongoing commit, check if we need to restart */ if (atomic_read(&sbi->s_fc_subtid) <= subtid && - commit_tid > journal->j_commit_sequence) + tid_gt(commit_tid, journal->j_commit_sequence)) goto restart_fc; ext4_fc_update_stats(sb, EXT4_FC_STATUS_SKIPPED, 0, 0, commit_tid); @@ -1288,7 +1288,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); - if (iter->i_sync_tid <= tid) + if (tid_geq(tid, iter->i_sync_tid)) ext4_fc_reset_inode(&iter->vfs_inode); /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); @@ -1319,7 +1319,7 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_MAIN]); - if (tid >= sbi->s_fc_ineligible_tid) { + if (tid_geq(tid, sbi->s_fc_ineligible_tid)) { sbi->s_fc_ineligible_tid = 0; ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); } From e16c4c245150d7ded5f07a4acf33f26a67914260 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 20 Jun 2024 15:24:05 +0800 Subject: [PATCH 158/268] jbd2: avoid mount failed when commit block is partial submitted [ Upstream commit 0bab8db4152c4a2185a1367db09cc402bdc62d5e ] We encountered a problem that the file system could not be mounted in the power-off scenario. The analysis of the file system mirror shows that only part of the data is written to the last commit block. The valid data of the commit block is concentrated in the first sector. However, the data of the entire block is involved in the checksum calculation. For different hardware, the minimum atomic unit may be different. If the checksum of a committed block is incorrect, clear the data except the 'commit_header' and then calculate the checksum. If the checkusm is correct, it is considered that the block is partially committed, Then continue to replay journal. Signed-off-by: Ye Bin Reviewed-by: Jan Kara Link: https://patch.msgid.link/20240620072405.3533701-1-yebin@huaweicloud.com Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/jbd2/recovery.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 5b771a3d8d9aeb..421c0d360836e0 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -448,6 +448,27 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) return provided == cpu_to_be32(calculated); } +static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf) +{ + struct commit_header *h; + __be32 provided; + __u32 calculated; + void *tmpbuf; + + tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL); + if (!tmpbuf) + return false; + + memcpy(tmpbuf, buf, sizeof(struct commit_header)); + h = tmpbuf; + provided = h->h_chksum[0]; + h->h_chksum[0] = 0; + calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize); + kfree(tmpbuf); + + return provided == cpu_to_be32(calculated); +} + static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, journal_block_tag3_t *tag3, void *buf, __u32 sequence) @@ -814,6 +835,13 @@ static int do_one_pass(journal_t *journal, if (pass == PASS_SCAN && !jbd2_commit_block_csum_verify(journal, bh->b_data)) { + if (jbd2_commit_block_csum_verify_partial( + journal, + bh->b_data)) { + pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n", + next_commit_ID, next_log_block); + goto chksum_ok; + } chksum_error: if (commit_time < last_trans_commit_time) goto ignore_crc_mismatch; @@ -828,6 +856,7 @@ static int do_one_pass(journal_t *journal, } } if (pass == PASS_SCAN) { + chksum_ok: last_trans_commit_time = commit_time; head_block = next_log_block; } @@ -847,6 +876,7 @@ static int do_one_pass(journal_t *journal, next_log_block); need_check_commit_time = true; } + /* If we aren't in the REVOKE pass, then we can * just skip over this block. */ if (pass != PASS_REVOKE) { From abc8b81b6fc7f19ac78bbeb9adf23b95d6b1d441 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Thu, 20 Jun 2024 17:28:55 +0800 Subject: [PATCH 159/268] dma-mapping: benchmark: Don't starve others when doing the test [ Upstream commit 54624acf8843375a6de3717ac18df3b5104c39c5 ] The test thread will start N benchmark kthreads and then schedule out until the test time finished and notify the benchmark kthreads to stop. The benchmark kthreads will keep running until notified to stop. There's a problem with current implementation when the benchmark kthreads number is equal to the CPUs on a non-preemptible kernel: since the scheduler will balance the kthreads across the CPUs and when the test time's out the test thread won't get a chance to be scheduled on any CPU then cannot notify the benchmark kthreads to stop. This can be easily reproduced on a VM (simulated with 16 CPUs) with PREEMPT_VOLUNTARY: estuary:/mnt$ ./dma_map_benchmark -t 16 -s 1 rcu: INFO: rcu_sched self-detected stall on CPU rcu: 10-...!: (5221 ticks this GP) idle=ed24/1/0x4000000000000000 softirq=142/142 fqs=0 rcu: (t=5254 jiffies g=-559 q=45 ncpus=16) rcu: rcu_sched kthread starved for 5255 jiffies! g-559 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x0 ->cpu=12 rcu: Unless rcu_sched kthread gets sufficient CPU time, OOM is now expected behavior. rcu: RCU grace-period kthread stack dump: task:rcu_sched state:R running task stack:0 pid:16 tgid:16 ppid:2 flags:0x00000008 Call trace __switch_to+0xec/0x138 __schedule+0x2f8/0x1080 schedule+0x30/0x130 schedule_timeout+0xa0/0x188 rcu_gp_fqs_loop+0x128/0x528 rcu_gp_kthread+0x1c8/0x208 kthread+0xec/0xf8 ret_from_fork+0x10/0x20 Sending NMI from CPU 10 to CPUs 0: NMI backtrace for cpu 0 CPU: 0 PID: 332 Comm: dma-map-benchma Not tainted 6.10.0-rc1-vanilla-LSE #8 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : arm_smmu_cmdq_issue_cmdlist+0x218/0x730 lr : arm_smmu_cmdq_issue_cmdlist+0x488/0x730 sp : ffff80008748b630 x29: ffff80008748b630 x28: 0000000000000000 x27: ffff80008748b780 x26: 0000000000000000 x25: 000000000000bc70 x24: 000000000001bc70 x23: ffff0000c12af080 x22: 0000000000010000 x21: 000000000000ffff x20: ffff80008748b700 x19: ffff0000c12af0c0 x18: 0000000000010000 x17: 0000000000000001 x16: 0000000000000040 x15: ffffffffffffffff x14: 0001ffffffffffff x13: 000000000000ffff x12: 00000000000002f1 x11: 000000000001ffff x10: 0000000000000031 x9 : ffff800080b6b0b8 x8 : ffff0000c2a48000 x7 : 000000000001bc71 x6 : 0001800000000000 x5 : 00000000000002f1 x4 : 01ffffffffffffff x3 : 000000000009aaf1 x2 : 0000000000000018 x1 : 000000000000000f x0 : ffff0000c12af18c Call trace: arm_smmu_cmdq_issue_cmdlist+0x218/0x730 __arm_smmu_tlb_inv_range+0xe0/0x1a8 arm_smmu_iotlb_sync+0xc0/0x128 __iommu_dma_unmap+0x248/0x320 iommu_dma_unmap_page+0x5c/0xe8 dma_unmap_page_attrs+0x38/0x1d0 map_benchmark_thread+0x118/0x2c0 kthread+0xec/0xf8 ret_from_fork+0x10/0x20 Solve this by adding scheduling point in the kthread loop, so if there're other threads in the system they may have a chance to run, especially the thread to notify the test end. However this may degrade the test concurrency so it's recommended to run this on an idle system. Signed-off-by: Yicong Yang Acked-by: Barry Song Signed-off-by: Christoph Hellwig Signed-off-by: Sasha Levin --- kernel/dma/map_benchmark.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index 4950e0b622b1f3..cc19a3efea8960 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -89,6 +89,22 @@ static int map_benchmark_thread(void *data) atomic64_add(map_sq, &map->sum_sq_map); atomic64_add(unmap_sq, &map->sum_sq_unmap); atomic64_inc(&map->loops); + + /* + * We may test for a long time so periodically check whether + * we need to schedule to avoid starving the others. Otherwise + * we may hangup the kernel in a non-preemptible kernel when + * the test kthreads number >= CPU number, the test kthreads + * will run endless on every CPU since the thread resposible + * for notifying the kthread stop (in do_map_benchmark()) + * could not be scheduled. + * + * Note this may degrade the test concurrency since the test + * threads may need to share the CPU time with other load + * in the system. So it's recommended to run this benchmark + * on an idle system. + */ + cond_resched(); } out: From c2618dcb26c7211342b54520b5b148c0d3471c8a Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 3 Jul 2024 09:24:09 +0200 Subject: [PATCH 160/268] wifi: mwifiex: Do not return unused priv in mwifiex_get_priv_by_id() [ Upstream commit c145eea2f75ff7949392aebecf7ef0a81c1f6c14 ] mwifiex_get_priv_by_id() returns the priv pointer corresponding to the bss_num and bss_type, but without checking if the priv is actually currently in use. Unused priv pointers do not have a wiphy attached to them which can lead to NULL pointer dereferences further down the callstack. Fix this by returning only used priv pointers which have priv->bss_mode set to something else than NL80211_IFTYPE_UNSPECIFIED. Said NULL pointer dereference happened when an Accesspoint was started with wpa_supplicant -i mlan0 with this config: network={ ssid="somessid" mode=2 frequency=2412 key_mgmt=WPA-PSK WPA-PSK-SHA256 proto=RSN group=CCMP pairwise=CCMP psk="12345678" } When waiting for the AP to be established, interrupting wpa_supplicant with and starting it again this happens: | Unable to handle kernel NULL pointer dereference at virtual address 0000000000000140 | Mem abort info: | ESR = 0x0000000096000004 | EC = 0x25: DABT (current EL), IL = 32 bits | SET = 0, FnV = 0 | EA = 0, S1PTW = 0 | FSC = 0x04: level 0 translation fault | Data abort info: | ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 | CM = 0, WnR = 0, TnD = 0, TagAccess = 0 | GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 | user pgtable: 4k pages, 48-bit VAs, pgdp=0000000046d96000 | [0000000000000140] pgd=0000000000000000, p4d=0000000000000000 | Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP | Modules linked in: caam_jr caamhash_desc spidev caamalg_desc crypto_engine authenc libdes mwifiex_sdio +mwifiex crct10dif_ce cdc_acm onboard_usb_hub fsl_imx8_ddr_perf imx8m_ddrc rtc_ds1307 lm75 rtc_snvs +imx_sdma caam imx8mm_thermal spi_imx error imx_cpufreq_dt fuse ip_tables x_tables ipv6 | CPU: 0 PID: 8 Comm: kworker/0:1 Not tainted 6.9.0-00007-g937242013fce-dirty #18 | Hardware name: somemachine (DT) | Workqueue: events sdio_irq_work | pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : mwifiex_get_cfp+0xd8/0x15c [mwifiex] | lr : mwifiex_get_cfp+0x34/0x15c [mwifiex] | sp : ffff8000818b3a70 | x29: ffff8000818b3a70 x28: ffff000006bfd8a5 x27: 0000000000000004 | x26: 000000000000002c x25: 0000000000001511 x24: 0000000002e86bc9 | x23: ffff000006bfd996 x22: 0000000000000004 x21: ffff000007bec000 | x20: 000000000000002c x19: 0000000000000000 x18: 0000000000000000 | x17: 000000040044ffff x16: 00500072b5503510 x15: ccc283740681e517 | x14: 0201000101006d15 x13: 0000000002e8ff43 x12: 002c01000000ffb1 | x11: 0100000000000000 x10: 02e8ff43002c0100 x9 : 0000ffb100100157 | x8 : ffff000003d20000 x7 : 00000000000002f1 x6 : 00000000ffffe124 | x5 : 0000000000000001 x4 : 0000000000000003 x3 : 0000000000000000 | x2 : 0000000000000000 x1 : 0001000000011001 x0 : 0000000000000000 | Call trace: | mwifiex_get_cfp+0xd8/0x15c [mwifiex] | mwifiex_parse_single_response_buf+0x1d0/0x504 [mwifiex] | mwifiex_handle_event_ext_scan_report+0x19c/0x2f8 [mwifiex] | mwifiex_process_sta_event+0x298/0xf0c [mwifiex] | mwifiex_process_event+0x110/0x238 [mwifiex] | mwifiex_main_process+0x428/0xa44 [mwifiex] | mwifiex_sdio_interrupt+0x64/0x12c [mwifiex_sdio] | process_sdio_pending_irqs+0x64/0x1b8 | sdio_irq_work+0x4c/0x7c | process_one_work+0x148/0x2a0 | worker_thread+0x2fc/0x40c | kthread+0x110/0x114 | ret_from_fork+0x10/0x20 | Code: a94153f3 a8c37bfd d50323bf d65f03c0 (f940a000) | ---[ end trace 0000000000000000 ]--- Signed-off-by: Sascha Hauer Acked-by: Brian Norris Reviewed-by: Francesco Dolcini Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20240703072409.556618-1-s.hauer@pengutronix.de Signed-off-by: Sasha Levin --- drivers/net/wireless/marvell/mwifiex/main.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 7bdec6c622481d..dc6b4cf616bea6 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -1290,6 +1290,9 @@ mwifiex_get_priv_by_id(struct mwifiex_adapter *adapter, for (i = 0; i < adapter->priv_num; i++) { if (adapter->priv[i]) { + if (adapter->priv[i]->bss_mode == NL80211_IFTYPE_UNSPECIFIED) + continue; + if ((adapter->priv[i]->bss_num == bss_num) && (adapter->priv[i]->bss_type == bss_type)) break; From 6922ab2932622dbc638620aae0e2f6b8eb22940c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 19 Jan 2024 14:57:29 +0100 Subject: [PATCH 161/268] drm/amdgpu: reject gang submit on reserved VMIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 320debca1ba3a81c87247eac84eff976ead09ee0 ] A gang submit won't work if the VMID is reserved and we can't flush out VM changes from multiple engines at the same time. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 15 ++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 1 + 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 61668a784315fc..e361dc37a0890b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1096,6 +1096,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) unsigned int i; int r; + /* + * We can't use gang submit on with reserved VMIDs when the VM changes + * can't be invalidated by more than one engine at the same time. + */ + if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + for (i = 0; i < p->gang_size; ++i) { + struct drm_sched_entity *entity = p->entities[i]; + struct drm_gpu_scheduler *sched = entity->rq->sched; + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + return -EINVAL; + } + } + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ff1ea99292fbf0..69dfc699d78b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -459,6 +459,19 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, return r; } +/* + * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @vm: the VM to check + * @vmhub: the VMHUB which will be used + * + * Returns: True if the VM will use a reserved VMID. + */ +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +{ + return vm->reserved_vmid[vmhub] || + (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); +} + int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index fa8c42c83d5d26..240fa675126029 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, From 6b1b0a86d9c14e07f9f703eb1da2e4cb88ff3a6c Mon Sep 17 00:00:00 2001 From: Zqiang Date: Thu, 4 Jul 2024 14:52:13 +0800 Subject: [PATCH 162/268] smp: Add missing destroy_work_on_stack() call in smp_call_on_cpu() [ Upstream commit 77aeb1b685f9db73d276bad4bb30d48505a6fd23 ] For CONFIG_DEBUG_OBJECTS_WORK=y kernels sscs.work defined by INIT_WORK_ONSTACK() is initialized by debug_object_init_on_stack() for the debug check in __init_work() to work correctly. But this lacks the counterpart to remove the tracked object from debug objects again, which will cause a debug object warning once the stack is freed. Add the missing destroy_work_on_stack() invocation to cure that. [ tglx: Massaged changelog ] Signed-off-by: Zqiang Signed-off-by: Thomas Gleixner Tested-by: Paul E. McKenney Link: https://lore.kernel.org/r/20240704065213.13559-1-qiang.zhang1211@gmail.com Signed-off-by: Sasha Levin --- kernel/smp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/smp.c b/kernel/smp.c index 695eb13a276d26..3eeffeaf5450c6 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -1119,6 +1119,7 @@ int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys) queue_work_on(cpu, system_wq, &sscs.work); wait_for_completion(&sscs.done); + destroy_work_on_stack(&sscs.work); return sscs.ret; } From 951b696db1885a5ffeb2ea00b300f12ce09893a0 Mon Sep 17 00:00:00 2001 From: Konstantin Komarov Date: Mon, 17 Jun 2024 14:53:57 +0300 Subject: [PATCH 163/268] fs/ntfs3: Check more cases when directory is corrupted [ Upstream commit 744375343662058cbfda96d871786e5a5cbe1947 ] Mark ntfs dirty in this case. Rename ntfs_filldir to ntfs_dir_emit. Signed-off-by: Konstantin Komarov Signed-off-by: Sasha Levin --- fs/ntfs3/dir.c | 52 +++++++++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 9d0a09f00b3845..e1b856ecce61d0 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -272,9 +272,12 @@ struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode; } -static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, - const struct NTFS_DE *e, u8 *name, - struct dir_context *ctx) +/* + * returns false if 'ctx' if full + */ +static inline bool ntfs_dir_emit(struct ntfs_sb_info *sbi, + struct ntfs_inode *ni, const struct NTFS_DE *e, + u8 *name, struct dir_context *ctx) { const struct ATTR_FILE_NAME *fname; unsigned long ino; @@ -284,29 +287,29 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, fname = Add2Ptr(e, sizeof(struct NTFS_DE)); if (fname->type == FILE_NAME_DOS) - return 0; + return true; if (!mi_is_ref(&ni->mi, &fname->home)) - return 0; + return true; ino = ino_get(&e->ref); if (ino == MFT_REC_ROOT) - return 0; + return true; /* Skip meta files. Unless option to show metafiles is set. */ if (!sbi->options->showmeta && ntfs_is_meta_file(sbi, ino)) - return 0; + return true; if (sbi->options->nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN)) - return 0; + return true; name_len = ntfs_utf16_to_nls(sbi, fname->name, fname->name_len, name, PATH_MAX); if (name_len <= 0) { ntfs_warn(sbi->sb, "failed to convert name for inode %lx.", ino); - return 0; + return true; } /* @@ -336,17 +339,20 @@ static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, } } - return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); + return dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); } /* * ntfs_read_hdr - Helper function for ntfs_readdir(). + * + * returns 0 if ok. + * returns -EINVAL if directory is corrupted. + * returns +1 if 'ctx' is full. */ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, const struct INDEX_HDR *hdr, u64 vbo, u64 pos, u8 *name, struct dir_context *ctx) { - int err; const struct NTFS_DE *e; u32 e_size; u32 end = le32_to_cpu(hdr->used); @@ -354,12 +360,12 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, for (;; off += e_size) { if (off + sizeof(struct NTFS_DE) > end) - return -1; + return -EINVAL; e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) - return -1; + return -EINVAL; if (de_is_last(e)) return 0; @@ -369,14 +375,15 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, continue; if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME) - return -1; + return -EINVAL; ctx->pos = vbo + off; /* Submit the name to the filldir callback. */ - err = ntfs_filldir(sbi, ni, e, name, ctx); - if (err) - return err; + if (!ntfs_dir_emit(sbi, ni, e, name, ctx)) { + /* ctx is full. */ + return +1; + } } } @@ -475,8 +482,6 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) vbo = (u64)bit << index_bits; if (vbo >= i_size) { - ntfs_inode_err(dir, "Looks like your dir is corrupt"); - ctx->pos = eod; err = -EINVAL; goto out; } @@ -499,9 +504,16 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) __putname(name); put_indx_node(node); - if (err == -ENOENT) { + if (err == 1) { + /* 'ctx' is full. */ + err = 0; + } else if (err == -ENOENT) { err = 0; ctx->pos = pos; + } else if (err < 0) { + if (err == -EINVAL) + ntfs_inode_err(dir, "directory corrupted"); + ctx->pos = eod; } return err; From e7469c65b3c54a52fe0c63016497f17f9fd6fa22 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 May 2024 14:12:12 -0400 Subject: [PATCH 164/268] btrfs: replace BUG_ON with ASSERT in walk_down_proc() [ Upstream commit 1f9d44c0a12730a24f8bb75c5e1102207413cc9b ] We have a couple of areas where we check to make sure the tree block is locked before looking up or messing with references. This is old code so it has this as BUG_ON(). Convert this to ASSERT() for developers. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6ecfd05e1db96..2d39481fb3cecf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5144,7 +5144,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, if (lookup_info && ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { - BUG_ON(!path->locks[level]); + ASSERT(path->locks[level]); ret = btrfs_lookup_extent_info(trans, fs_info, eb->start, level, 1, &wc->refs[level], @@ -5168,7 +5168,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, /* wc->stage == UPDATE_BACKREF */ if (!(wc->flags[level] & flag)) { - BUG_ON(!path->locks[level]); + ASSERT(path->locks[level]); ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); /* -ENOMEM */ ret = btrfs_dec_ref(trans, root, eb, 0); From 7d1df13bf078ffebfedd361d714ff6cee1ff01b9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 May 2024 14:12:13 -0400 Subject: [PATCH 165/268] btrfs: clean up our handling of refs == 0 in snapshot delete [ Upstream commit b8ccef048354074a548f108e51d0557d6adfd3a3 ] In reada we BUG_ON(refs == 0), which could be unkind since we aren't holding a lock on the extent leaf and thus could get a transient incorrect answer. In walk_down_proc we also BUG_ON(refs == 0), which could happen if we have extent tree corruption. Change that to return -EUCLEAN. In do_walk_down() we catch this case and handle it correctly, however we return -EIO, which -EUCLEAN is a more appropriate error code. Finally in walk_up_proc we have the same BUG_ON(refs == 0), so convert that to proper error handling. Also adjust the error message so we can actually do something with the information. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/extent-tree.c | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2d39481fb3cecf..72851adc1feeb5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5085,7 +5085,15 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, /* We don't care about errors in readahead. */ if (ret < 0) continue; - BUG_ON(refs == 0); + + /* + * This could be racey, it's conceivable that we raced and end + * up with a bogus refs count, if that's the case just skip, if + * we are actually corrupt we will notice when we look up + * everything again with our locks. + */ + if (refs == 0) + continue; if (wc->stage == DROP_REFERENCE) { if (refs == 1) @@ -5152,7 +5160,11 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, BUG_ON(ret == -ENOMEM); if (ret) return ret; - BUG_ON(wc->refs[level] == 0); + if (unlikely(wc->refs[level] == 0)) { + btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0", + eb->start); + return -EUCLEAN; + } } if (wc->stage == DROP_REFERENCE) { @@ -5286,8 +5298,9 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, goto out_unlock; if (unlikely(wc->refs[level - 1] == 0)) { - btrfs_err(fs_info, "Missing references."); - ret = -EIO; + btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0", + bytenr); + ret = -EUCLEAN; goto out_unlock; } *lookup_info = 0; @@ -5487,7 +5500,12 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, path->locks[level] = 0; return ret; } - BUG_ON(wc->refs[level] == 0); + if (unlikely(wc->refs[level] == 0)) { + btrfs_tree_unlock_rw(eb, path->locks[level]); + btrfs_err(fs_info, "bytenr %llu has 0 references, expect > 0", + eb->start); + return -EUCLEAN; + } if (wc->refs[level] == 1) { btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; From 41a0f85e268d72fe04f731b8ceea4748c2d65491 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 18 Jun 2024 15:55:16 +0100 Subject: [PATCH 166/268] btrfs: replace BUG_ON() with error handling at update_ref_for_cow() [ Upstream commit b56329a782314fde5b61058e2a25097af7ccb675 ] Instead of a BUG_ON() just return an error, log an error message and abort the transaction in case we find an extent buffer belonging to the relocation tree that doesn't have the full backref flag set. This is unexpected and should never happen (save for bugs or a potential bad memory). Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/ctree.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 118ad4d2cbbe26..2eb4e03080ac9b 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -451,8 +451,16 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, } owner = btrfs_header_owner(buf); - BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && - !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + if (unlikely(owner == BTRFS_TREE_RELOC_OBJECTID && + !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))) { + btrfs_crit(fs_info, +"found tree block at bytenr %llu level %d root %llu refs %llu flags %llx without full backref flag set", + buf->start, btrfs_header_level(buf), + btrfs_root_id(root), refs, flags); + ret = -EUCLEAN; + btrfs_abort_transaction(trans, ret); + return ret; + } if (refs > 1) { if ((owner == root->root_key.objectid || From 124451bbc2d3aae70f0c54c02129312237d01871 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 2 Jul 2024 22:29:51 -0700 Subject: [PATCH 167/268] cxl/region: Verify target positions using the ordered target list [ Upstream commit 82a3e3a235633aa0575fac9507d648dd80f3437f ] When a root decoder is configured the interleave target list is read from the BIOS populated CFMWS structure. Per the CXL spec 3.1 Table 9-22 the target list is in interleave order. The CXL driver populates its decoder target list in the same order and stores it in 'struct cxl_switch_decoder' field "@target: active ordered target list in current decoder configuration" Given the promise of an ordered list, the driver can stop duplicating the work of BIOS and simply check target positions against the ordered list during region configuration. The simplified check against the ordered list is presented here. A follow-on patch will remove the unused code. For Modulo arithmetic this is not a fix, only a simplification. For XOR arithmetic this is a fix for HB IW of 3,6,12. Fixes: f9db85bfec0d ("cxl/acpi: Support CXL XOR Interleave Math (CXIMS)") Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/35d08d3aba08fee0f9b86ab1cef0c25116ca8a55.1719980933.git.alison.schofield@intel.com Signed-off-by: Dave Jiang Signed-off-by: Sasha Levin --- drivers/cxl/core/region.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index bc5a43897d5783..5060d9802795ee 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1528,10 +1528,13 @@ static int cxl_region_attach_position(struct cxl_region *cxlr, const struct cxl_dport *dport, int pos) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd; + struct cxl_decoder *cxld = &cxlsd->cxld; + int iw = cxld->interleave_ways; struct cxl_port *iter; int rc; - if (cxlrd->calc_hb(cxlrd, pos) != dport) { + if (dport != cxlrd->cxlsd.target[pos % iw]) { dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), dev_name(&cxlrd->cxlsd.cxld.dev)); From ce2e63804abf4e1f3a1a13ff09c6540f587dc711 Mon Sep 17 00:00:00 2001 From: "yang.zhang" Date: Wed, 8 May 2024 10:24:45 +0800 Subject: [PATCH 168/268] riscv: set trap vector earlier [ Upstream commit 6ad8735994b854b23c824dd6b1dd2126e893a3b4 ] The exception vector of the booting hart is not set before enabling the mmu and then still points to the value of the previous firmware, typically _start. That makes it hard to debug setup_vm() when bad things happen. So fix that by setting the exception vector earlier. Reviewed-by: Alexandre Ghiti Signed-off-by: yang.zhang Link: https://lore.kernel.org/r/20240508022445.6131-1-gaoshanliukou@163.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/head.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 0097c145385f67..9691fa8f2faa7b 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -305,6 +305,9 @@ clear_bss_done: #else mv a0, a1 #endif /* CONFIG_BUILTIN_DTB */ + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 call setup_vm #ifdef CONFIG_MMU la a0, early_pg_dir From 78c6e39fef5c428960aff742149bba302dd46f5a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 May 2024 18:04:35 -0700 Subject: [PATCH 169/268] PCI: Add missing bridge lock to pci_bus_lock() [ Upstream commit a4e772898f8bf2e7e1cf661a12c60a5612c4afab ] One of the true positives that the cfg_access_lock lockdep effort identified is this sequence: WARNING: CPU: 14 PID: 1 at drivers/pci/pci.c:4886 pci_bridge_secondary_bus_reset+0x5d/0x70 RIP: 0010:pci_bridge_secondary_bus_reset+0x5d/0x70 Call Trace: ? __warn+0x8c/0x190 ? pci_bridge_secondary_bus_reset+0x5d/0x70 ? report_bug+0x1f8/0x200 ? handle_bug+0x3c/0x70 ? exc_invalid_op+0x18/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? pci_bridge_secondary_bus_reset+0x5d/0x70 pci_reset_bus+0x1d8/0x270 vmd_probe+0x778/0xa10 pci_device_probe+0x95/0x120 Where pci_reset_bus() users are triggering unlocked secondary bus resets. Ironically pci_bus_reset(), several calls down from pci_reset_bus(), uses pci_bus_lock() before issuing the reset which locks everything *but* the bridge itself. For the same motivation as adding: bridge = pci_upstream_bridge(dev); if (bridge) pci_dev_lock(bridge); to pci_reset_function() for the "bus" and "cxl_bus" reset cases, add pci_dev_lock() for @bus->self to pci_bus_lock(). Link: https://lore.kernel.org/r/171711747501.1628941.15217746952476635316.stgit@dwillia2-xfh.jf.intel.com Reported-by: Imre Deak Closes: http://lore.kernel.org/r/6657833b3b5ae_14984b29437@dwillia2-xfh.jf.intel.com.notmuch Signed-off-by: Dan Williams Signed-off-by: Keith Busch [bhelgaas: squash in recursive locking deadlock fix from Keith Busch: https://lore.kernel.org/r/20240711193650.701834-1-kbusch@meta.com] Signed-off-by: Bjorn Helgaas Tested-by: Hans de Goede Tested-by: Kalle Valo Reviewed-by: Dave Jiang Signed-off-by: Sasha Levin --- drivers/pci/pci.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a0f961a380fa95..53e9e9788bd54d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5718,10 +5718,12 @@ static void pci_bus_lock(struct pci_bus *bus) { struct pci_dev *dev; + pci_dev_lock(bus->self); list_for_each_entry(dev, &bus->devices, bus_list) { - pci_dev_lock(dev); if (dev->subordinate) pci_bus_lock(dev->subordinate); + else + pci_dev_lock(dev); } } @@ -5733,8 +5735,10 @@ static void pci_bus_unlock(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } + pci_dev_unlock(bus->self); } /* Return 1 on successful lock, 0 on contention */ @@ -5742,15 +5746,15 @@ static int pci_bus_trylock(struct pci_bus *bus) { struct pci_dev *dev; + if (!pci_dev_trylock(bus->self)) + return 0; + list_for_each_entry(dev, &bus->devices, bus_list) { - if (!pci_dev_trylock(dev)) - goto unlock; if (dev->subordinate) { - if (!pci_bus_trylock(dev->subordinate)) { - pci_dev_unlock(dev); + if (!pci_bus_trylock(dev->subordinate)) goto unlock; - } - } + } else if (!pci_dev_trylock(dev)) + goto unlock; } return 1; @@ -5758,8 +5762,10 @@ static int pci_bus_trylock(struct pci_bus *bus) list_for_each_entry_continue_reverse(dev, &bus->devices, bus_list) { if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } + pci_dev_unlock(bus->self); return 0; } @@ -5791,9 +5797,10 @@ static void pci_slot_lock(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - pci_dev_lock(dev); if (dev->subordinate) pci_bus_lock(dev->subordinate); + else + pci_dev_lock(dev); } } @@ -5819,14 +5826,13 @@ static int pci_slot_trylock(struct pci_slot *slot) list_for_each_entry(dev, &slot->bus->devices, bus_list) { if (!dev->slot || dev->slot != slot) continue; - if (!pci_dev_trylock(dev)) - goto unlock; if (dev->subordinate) { if (!pci_bus_trylock(dev->subordinate)) { pci_dev_unlock(dev); goto unlock; } - } + } else if (!pci_dev_trylock(dev)) + goto unlock; } return 1; @@ -5837,7 +5843,8 @@ static int pci_slot_trylock(struct pci_slot *slot) continue; if (dev->subordinate) pci_bus_unlock(dev->subordinate); - pci_dev_unlock(dev); + else + pci_dev_unlock(dev); } return 0; } From 9fd29738377c10749cb292510ebc202988ea0a31 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Wed, 10 Jul 2024 10:12:45 -0700 Subject: [PATCH 170/268] tcp: Don't drop SYN+ACK for simultaneous connect(). [ Upstream commit 23e89e8ee7be73e21200947885a6d3a109a2c58d ] RFC 9293 states that in the case of simultaneous connect(), the connection gets established when SYN+ACK is received. [0] TCP Peer A TCP Peer B 1. CLOSED CLOSED 2. SYN-SENT --> ... 3. SYN-RECEIVED <-- <-- SYN-SENT 4. ... --> SYN-RECEIVED 5. SYN-RECEIVED --> ... 6. ESTABLISHED <-- <-- SYN-RECEIVED 7. ... --> ESTABLISHED However, since commit 0c24604b68fc ("tcp: implement RFC 5961 4.2"), such a SYN+ACK is dropped in tcp_validate_incoming() and responded with Challenge ACK. For example, the write() syscall in the following packetdrill script fails with -EAGAIN, and wrong SNMP stats get incremented. 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0 > S 0:0(0) +0 < S 0:0(0) win 1000 +0 > S. 0:0(0) ack 1 +0 < S. 0:0(0) ack 1 win 1000 +0 write(3, ..., 100) = 100 +0 > P. 1:101(100) ack 1 -- # packetdrill cross-synack.pkt cross-synack.pkt:13: runtime error in write call: Expected result 100 but got -1 with errno 11 (Resource temporarily unavailable) # nstat ... TcpExtTCPChallengeACK 1 0.0 TcpExtTCPSYNChallenge 1 0.0 The problem is that bpf_skops_established() is triggered by the Challenge ACK instead of SYN+ACK. This causes the bpf prog to miss the chance to check if the peer supports a TCP option that is expected to be exchanged in SYN and SYN+ACK. Let's accept a bare SYN+ACK for active-open TCP_SYN_RECV sockets to avoid such a situation. Note that tcp_ack_snd_check() in tcp_rcv_state_process() is skipped not to send an unnecessary ACK, but this could be a bit risky for net.git, so this targets for net-next. Link: https://www.rfc-editor.org/rfc/rfc9293.html#section-3.5-7 [0] Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240710171246.87533-2-kuniyu@amazon.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/ipv4/tcp_input.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 24c7c955dc955d..336bc97e86d5ee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5880,6 +5880,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, * RFC 5961 4.2 : Send a challenge ack */ if (th->syn) { + if (sk->sk_state == TCP_SYN_RECV && sk->sk_socket && th->ack && + TCP_SKB_CB(skb)->seq + 1 == TCP_SKB_CB(skb)->end_seq && + TCP_SKB_CB(skb)->seq + 1 == tp->rcv_nxt && + TCP_SKB_CB(skb)->ack_seq == tp->snd_nxt) + goto pass; syn_challenge: if (syn_inerr) TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); @@ -5889,6 +5894,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, goto discard; } +pass: bpf_skops_parse_hdr(sk, skb); return true; @@ -6673,6 +6679,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_fast_path_on(tp); if (sk->sk_shutdown & SEND_SHUTDOWN) tcp_shutdown(sk, SEND_SHUTDOWN); + + if (sk->sk_socket) + goto consume; break; case TCP_FIN_WAIT1: { From 013dae4735d2010544d1f2121bdeb8e6c9ea171e Mon Sep 17 00:00:00 2001 From: Neeraj Sanjay Kale Date: Wed, 15 May 2024 12:36:55 +0530 Subject: [PATCH 171/268] Bluetooth: btnxpuart: Fix Null pointer dereference in btnxpuart_flush() [ Upstream commit c68bbf5e334b35b36ac5b9f0419f1f93f796bad1 ] This adds a check before freeing the rx->skb in flush and close functions to handle the kernel crash seen while removing driver after FW download fails or before FW download completes. dmesg log: [ 54.634586] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000080 [ 54.643398] Mem abort info: [ 54.646204] ESR = 0x0000000096000004 [ 54.649964] EC = 0x25: DABT (current EL), IL = 32 bits [ 54.655286] SET = 0, FnV = 0 [ 54.658348] EA = 0, S1PTW = 0 [ 54.661498] FSC = 0x04: level 0 translation fault [ 54.666391] Data abort info: [ 54.669273] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000 [ 54.674768] CM = 0, WnR = 0, TnD = 0, TagAccess = 0 [ 54.674771] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0 [ 54.674775] user pgtable: 4k pages, 48-bit VAs, pgdp=0000000048860000 [ 54.674780] [0000000000000080] pgd=0000000000000000, p4d=0000000000000000 [ 54.703880] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP [ 54.710152] Modules linked in: btnxpuart(-) overlay fsl_jr_uio caam_jr caamkeyblob_desc caamhash_desc caamalg_desc crypto_engine authenc libdes crct10dif_ce polyval_ce polyval_generic snd_soc_imx_spdif snd_soc_imx_card snd_soc_ak5558 snd_soc_ak4458 caam secvio error snd_soc_fsl_micfil snd_soc_fsl_spdif snd_soc_fsl_sai snd_soc_fsl_utils imx_pcm_dma gpio_ir_recv rc_core sch_fq_codel fuse [ 54.744357] CPU: 3 PID: 72 Comm: kworker/u9:0 Not tainted 6.6.3-otbr-g128004619037 #2 [ 54.744364] Hardware name: FSL i.MX8MM EVK board (DT) [ 54.744368] Workqueue: hci0 hci_power_on [ 54.757244] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 54.757249] pc : kfree_skb_reason+0x18/0xb0 [ 54.772299] lr : btnxpuart_flush+0x40/0x58 [btnxpuart] [ 54.782921] sp : ffff8000805ebca0 [ 54.782923] x29: ffff8000805ebca0 x28: ffffa5c6cf1869c0 x27: ffffa5c6cf186000 [ 54.782931] x26: ffff377b84852400 x25: ffff377b848523c0 x24: ffff377b845e7230 [ 54.782938] x23: ffffa5c6ce8dbe08 x22: ffffa5c6ceb65410 x21: 00000000ffffff92 [ 54.782945] x20: ffffa5c6ce8dbe98 x19: ffffffffffffffac x18: ffffffffffffffff [ 54.807651] x17: 0000000000000000 x16: ffffa5c6ce2824ec x15: ffff8001005eb857 [ 54.821917] x14: 0000000000000000 x13: ffffa5c6cf1a02e0 x12: 0000000000000642 [ 54.821924] x11: 0000000000000040 x10: ffffa5c6cf19d690 x9 : ffffa5c6cf19d688 [ 54.821931] x8 : ffff377b86000028 x7 : 0000000000000000 x6 : 0000000000000000 [ 54.821938] x5 : ffff377b86000000 x4 : 0000000000000000 x3 : 0000000000000000 [ 54.843331] x2 : 0000000000000000 x1 : 0000000000000002 x0 : ffffffffffffffac [ 54.857599] Call trace: [ 54.857601] kfree_skb_reason+0x18/0xb0 [ 54.863878] btnxpuart_flush+0x40/0x58 [btnxpuart] [ 54.863888] hci_dev_open_sync+0x3a8/0xa04 [ 54.872773] hci_power_on+0x54/0x2e4 [ 54.881832] process_one_work+0x138/0x260 [ 54.881842] worker_thread+0x32c/0x438 [ 54.881847] kthread+0x118/0x11c [ 54.881853] ret_from_fork+0x10/0x20 [ 54.896406] Code: a9be7bfd 910003fd f9000bf3 aa0003f3 (b940d400) [ 54.896410] ---[ end trace 0000000000000000 ]--- Signed-off-by: Neeraj Sanjay Kale Tested-by: Guillaume Legoupil Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Sasha Levin --- drivers/bluetooth/btnxpuart.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c index 814dd966b1a450..5ee9a8b8dcfdb8 100644 --- a/drivers/bluetooth/btnxpuart.c +++ b/drivers/bluetooth/btnxpuart.c @@ -1326,8 +1326,10 @@ static int btnxpuart_close(struct hci_dev *hdev) serdev_device_close(nxpdev->serdev); skb_queue_purge(&nxpdev->txq); - kfree_skb(nxpdev->rx_skb); - nxpdev->rx_skb = NULL; + if (!IS_ERR_OR_NULL(nxpdev->rx_skb)) { + kfree_skb(nxpdev->rx_skb); + nxpdev->rx_skb = NULL; + } clear_bit(BTNXPUART_SERDEV_OPEN, &nxpdev->tx_state); return 0; } @@ -1342,8 +1344,10 @@ static int btnxpuart_flush(struct hci_dev *hdev) cancel_work_sync(&nxpdev->tx_work); - kfree_skb(nxpdev->rx_skb); - nxpdev->rx_skb = NULL; + if (!IS_ERR_OR_NULL(nxpdev->rx_skb)) { + kfree_skb(nxpdev->rx_skb); + nxpdev->rx_skb = NULL; + } return 0; } From 938acd8e3aa04b53843d03e274ac2f93501a8bea Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 14 Jul 2024 01:53:32 +0300 Subject: [PATCH 172/268] net: dpaa: avoid on-stack arrays of NR_CPUS elements [ Upstream commit 555a05d84ca2c587e2d4777006e2c2fb3dfbd91d ] The dpaa-eth driver is written for PowerPC and Arm SoCs which have 1-24 CPUs. It depends on CONFIG_NR_CPUS having a reasonably small value in Kconfig. Otherwise, there are 2 functions which allocate on-stack arrays of NR_CPUS elements, and these can quickly explode in size, leading to warnings such as: drivers/net/ethernet/freescale/dpaa/dpaa_eth.c:3280:12: warning: stack frame size (16664) exceeds limit (2048) in 'dpaa_eth_probe' [-Wframe-larger-than] The problem is twofold: - Reducing the array size to the boot-time num_possible_cpus() (rather than the compile-time NR_CPUS) creates a variable-length array, which should be avoided in the Linux kernel. - Using NR_CPUS as an array size makes the driver blow up in stack consumption with generic, as opposed to hand-crafted, .config files. A simple solution is to use dynamic allocation for num_possible_cpus() elements (aka a small number determined at runtime). Link: https://lore.kernel.org/all/202406261920.l5pzM1rj-lkp@intel.com/ Signed-off-by: Vladimir Oltean Reviewed-by: Breno Leitao Acked-by: Madalin Bucur Link: https://patch.msgid.link/20240713225336.1746343-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 20 ++++++++++++++----- .../ethernet/freescale/dpaa/dpaa_ethtool.c | 10 +++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index dcbc598b11c6c8..c6a3eefd83bff9 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -931,14 +931,18 @@ static inline void dpaa_setup_egress(const struct dpaa_priv *priv, } } -static void dpaa_fq_setup(struct dpaa_priv *priv, - const struct dpaa_fq_cbs *fq_cbs, - struct fman_port *tx_port) +static int dpaa_fq_setup(struct dpaa_priv *priv, + const struct dpaa_fq_cbs *fq_cbs, + struct fman_port *tx_port) { int egress_cnt = 0, conf_cnt = 0, num_portals = 0, portal_cnt = 0, cpu; const cpumask_t *affine_cpus = qman_affine_cpus(); - u16 channels[NR_CPUS]; struct dpaa_fq *fq; + u16 *channels; + + channels = kcalloc(num_possible_cpus(), sizeof(u16), GFP_KERNEL); + if (!channels) + return -ENOMEM; for_each_cpu_and(cpu, affine_cpus, cpu_online_mask) channels[num_portals++] = qman_affine_channel(cpu); @@ -997,6 +1001,10 @@ static void dpaa_fq_setup(struct dpaa_priv *priv, break; } } + + kfree(channels); + + return 0; } static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv, @@ -3416,7 +3424,9 @@ static int dpaa_eth_probe(struct platform_device *pdev) */ dpaa_eth_add_channel(priv->channel, &pdev->dev); - dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]); + err = dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]); + if (err) + goto free_dpaa_bps; /* Create a congestion group for this netdev, with * dynamically-allocated CGR ID. diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c index 5bd0b36d1feb58..3f8cd4a7d84576 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c @@ -457,12 +457,16 @@ static int dpaa_set_coalesce(struct net_device *dev, struct netlink_ext_ack *extack) { const cpumask_t *cpus = qman_affine_cpus(); - bool needs_revert[NR_CPUS] = {false}; struct qman_portal *portal; u32 period, prev_period; u8 thresh, prev_thresh; + bool *needs_revert; int cpu, res; + needs_revert = kcalloc(num_possible_cpus(), sizeof(bool), GFP_KERNEL); + if (!needs_revert) + return -ENOMEM; + period = c->rx_coalesce_usecs; thresh = c->rx_max_coalesced_frames; @@ -485,6 +489,8 @@ static int dpaa_set_coalesce(struct net_device *dev, needs_revert[cpu] = true; } + kfree(needs_revert); + return 0; revert_values: @@ -498,6 +504,8 @@ static int dpaa_set_coalesce(struct net_device *dev, qman_dqrr_set_ithresh(portal, prev_thresh); } + kfree(needs_revert); + return res; } From fd8e14122374f62cb850d44838ff8d2e5aee28ab Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 20 Jul 2024 22:41:07 +0800 Subject: [PATCH 173/268] LoongArch: Use correct API to map cmdline in relocate_kernel() [ Upstream commit 0124fbb4c6dba23dbdf80c829be68adbccde2722 ] fw_arg1 is in memory space rather than I/O space, so we should use early_memremap_ro() instead of early_ioremap() to map the cmdline. Moreover, we should unmap it after using. Suggested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Signed-off-by: Huacai Chen Signed-off-by: Sasha Levin --- arch/loongarch/kernel/relocate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 1acfa704c8d09b..0eddd4a66b8745 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -170,7 +171,7 @@ unsigned long __init relocate_kernel(void) unsigned long kernel_length; unsigned long random_offset = 0; void *location_new = _text; /* Default to original kernel start */ - char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */ + char *cmdline = early_memremap_ro(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */ strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE); @@ -182,6 +183,7 @@ unsigned long __init relocate_kernel(void) random_offset = (unsigned long)location_new - (unsigned long)(_text); #endif reloc_offset = (unsigned long)_text - VMLINUX_LOAD_ADDRESS; + early_memunmap(cmdline, COMMAND_LINE_SIZE); if (random_offset) { kernel_length = (long)(_end) - (long)(_text); From e42ea96d6d36a16526cb82b8aa2e5422814c3250 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 19 Jul 2024 12:40:24 +0200 Subject: [PATCH 174/268] regmap: maple: work around gcc-14.1 false-positive warning [ Upstream commit 542440fd7b30983cae23e32bd22f69a076ec7ef4 ] With gcc-14.1, there is a false-postive -Wuninitialized warning in regcache_maple_drop: drivers/base/regmap/regcache-maple.c: In function 'regcache_maple_drop': drivers/base/regmap/regcache-maple.c:113:23: error: 'lower_index' is used uninitialized [-Werror=uninitialized] 113 | unsigned long lower_index, lower_last; | ^~~~~~~~~~~ drivers/base/regmap/regcache-maple.c:113:36: error: 'lower_last' is used uninitialized [-Werror=uninitialized] 113 | unsigned long lower_index, lower_last; | ^~~~~~~~~~ I've created a reduced test case to see if this needs to be reported as a gcc, but it appears that the gcc-14.x branch already has a change that turns this into a more sensible -Wmaybe-uninitialized warning, so I ended up not reporting it so far. The reduced test case also produces a warning for gcc-13 and gcc-12 but I don't see that with the version in the kernel. Link: https://godbolt.org/z/oKbohKqd3 Link: https://lore.kernel.org/all/CAMuHMdWj=FLmkazPbYKPevDrcym2_HDb_U7Mb9YE9ovrP0jJfA@mail.gmail.com/ Signed-off-by: Arnd Bergmann Link: https://patch.msgid.link/20240719104030.1382465-1-arnd@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/base/regmap/regcache-maple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/base/regmap/regcache-maple.c b/drivers/base/regmap/regcache-maple.c index 55999a50ccc0b8..0b6c2277128b42 100644 --- a/drivers/base/regmap/regcache-maple.c +++ b/drivers/base/regmap/regcache-maple.c @@ -110,7 +110,8 @@ static int regcache_maple_drop(struct regmap *map, unsigned int min, struct maple_tree *mt = map->cache; MA_STATE(mas, mt, min, max); unsigned long *entry, *lower, *upper; - unsigned long lower_index, lower_last; + /* initialized to work around false-positive -Wuninitialized warning */ + unsigned long lower_index = 0, lower_last = 0; unsigned long upper_index, upper_last; int ret = 0; From c03185f4a23e7f89d84c9981091770e876e64480 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 23 Jul 2024 09:59:54 +0100 Subject: [PATCH 175/268] vfs: Fix potential circular locking through setxattr() and removexattr() [ Upstream commit c3a5e3e872f3688ae0dc57bb78ca633921d96a91 ] When using cachefiles, lockdep may emit something similar to the circular locking dependency notice below. The problem appears to stem from the following: (1) Cachefiles manipulates xattrs on the files in its cache when called from ->writepages(). (2) The setxattr() and removexattr() system call handlers get the name (and value) from userspace after taking the sb_writers lock, putting accesses of the vma->vm_lock and mm->mmap_lock inside of that. (3) The afs filesystem uses a per-inode lock to prevent multiple revalidation RPCs and in writeback vs truncate to prevent parallel operations from deadlocking against the server on one side and local page locks on the other. Fix this by moving the getting of the name and value in {get,remove}xattr() outside of the sb_writers lock. This also has the minor benefits that we don't need to reget these in the event of a retry and we never try to take the sb_writers lock in the event we can't pull the name and value into the kernel. Alternative approaches that might fix this include moving the dispatch of a write to the cache off to a workqueue or trying to do without the validation lock in afs. Note that this might also affect other filesystems that use netfslib and/or cachefiles. ====================================================== WARNING: possible circular locking dependency detected 6.10.0-build2+ #956 Not tainted ------------------------------------------------------ fsstress/6050 is trying to acquire lock: ffff888138fd82f0 (mapping.invalidate_lock#3){++++}-{3:3}, at: filemap_fault+0x26e/0x8b0 but task is already holding lock: ffff888113f26d18 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x165/0x250 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #4 (&vma->vm_lock->lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_write+0x3b/0x50 vma_start_write+0x6b/0xa0 vma_link+0xcc/0x140 insert_vm_struct+0xb7/0xf0 alloc_bprm+0x2c1/0x390 kernel_execve+0x65/0x1a0 call_usermodehelper_exec_async+0x14d/0x190 ret_from_fork+0x24/0x40 ret_from_fork_asm+0x1a/0x30 -> #3 (&mm->mmap_lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 __might_fault+0x7c/0xb0 strncpy_from_user+0x25/0x160 removexattr+0x7f/0x100 __do_sys_fremovexattr+0x7e/0xb0 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #2 (sb_writers#14){.+.+}-{0:0}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 percpu_down_read+0x3c/0x90 vfs_iocb_iter_write+0xe9/0x1d0 __cachefiles_write+0x367/0x430 cachefiles_issue_write+0x299/0x2f0 netfs_advance_write+0x117/0x140 netfs_write_folio.isra.0+0x5ca/0x6e0 netfs_writepages+0x230/0x2f0 afs_writepages+0x4d/0x70 do_writepages+0x1e8/0x3e0 filemap_fdatawrite_wbc+0x84/0xa0 __filemap_fdatawrite_range+0xa8/0xf0 file_write_and_wait_range+0x59/0x90 afs_release+0x10f/0x270 __fput+0x25f/0x3d0 __do_sys_close+0x43/0x70 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #1 (&vnode->validate_lock){++++}-{3:3}: __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_read+0x95/0x200 afs_writepages+0x37/0x70 do_writepages+0x1e8/0x3e0 filemap_fdatawrite_wbc+0x84/0xa0 filemap_invalidate_inode+0x167/0x1e0 netfs_unbuffered_write_iter+0x1bd/0x2d0 vfs_write+0x22e/0x320 ksys_write+0xbc/0x130 do_syscall_64+0x9f/0x100 entry_SYSCALL_64_after_hwframe+0x76/0x7e -> #0 (mapping.invalidate_lock#3){++++}-{3:3}: check_noncircular+0x119/0x160 check_prev_add+0x195/0x430 __lock_acquire+0xaf0/0xd80 lock_acquire.part.0+0x103/0x280 down_read+0x95/0x200 filemap_fault+0x26e/0x8b0 __do_fault+0x57/0xd0 do_pte_missing+0x23b/0x320 __handle_mm_fault+0x2d4/0x320 handle_mm_fault+0x14f/0x260 do_user_addr_fault+0x2a2/0x500 exc_page_fault+0x71/0x90 asm_exc_page_fault+0x22/0x30 other info that might help us debug this: Chain exists of: mapping.invalidate_lock#3 --> &mm->mmap_lock --> &vma->vm_lock->lock Possible unsafe locking scenario: CPU0 CPU1 ---- ---- rlock(&vma->vm_lock->lock); lock(&mm->mmap_lock); lock(&vma->vm_lock->lock); rlock(mapping.invalidate_lock#3); *** DEADLOCK *** 1 lock held by fsstress/6050: #0: ffff888113f26d18 (&vma->vm_lock->lock){++++}-{3:3}, at: lock_vma_under_rcu+0x165/0x250 stack backtrace: CPU: 0 PID: 6050 Comm: fsstress Not tainted 6.10.0-build2+ #956 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Call Trace: dump_stack_lvl+0x57/0x80 check_noncircular+0x119/0x160 ? queued_spin_lock_slowpath+0x4be/0x510 ? __pfx_check_noncircular+0x10/0x10 ? __pfx_queued_spin_lock_slowpath+0x10/0x10 ? mark_lock+0x47/0x160 ? init_chain_block+0x9c/0xc0 ? add_chain_block+0x84/0xf0 check_prev_add+0x195/0x430 __lock_acquire+0xaf0/0xd80 ? __pfx___lock_acquire+0x10/0x10 ? __lock_release.isra.0+0x13b/0x230 lock_acquire.part.0+0x103/0x280 ? filemap_fault+0x26e/0x8b0 ? __pfx_lock_acquire.part.0+0x10/0x10 ? rcu_is_watching+0x34/0x60 ? lock_acquire+0xd7/0x120 down_read+0x95/0x200 ? filemap_fault+0x26e/0x8b0 ? __pfx_down_read+0x10/0x10 ? __filemap_get_folio+0x25/0x1a0 filemap_fault+0x26e/0x8b0 ? __pfx_filemap_fault+0x10/0x10 ? find_held_lock+0x7c/0x90 ? __pfx___lock_release.isra.0+0x10/0x10 ? __pte_offset_map+0x99/0x110 __do_fault+0x57/0xd0 do_pte_missing+0x23b/0x320 __handle_mm_fault+0x2d4/0x320 ? __pfx___handle_mm_fault+0x10/0x10 handle_mm_fault+0x14f/0x260 do_user_addr_fault+0x2a2/0x500 exc_page_fault+0x71/0x90 asm_exc_page_fault+0x22/0x30 Signed-off-by: David Howells Link: https://lore.kernel.org/r/2136178.1721725194@warthog.procyon.org.uk cc: Alexander Viro cc: Christian Brauner cc: Jan Kara cc: Jeff Layton cc: Gao Xiang cc: Matthew Wilcox cc: netfs@lists.linux.dev cc: linux-erofs@lists.ozlabs.org cc: linux-fsdevel@vger.kernel.org [brauner: fix minor issues] Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/xattr.c | 91 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 48 insertions(+), 43 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index efd4736bc94b09..c20046548f218e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -631,10 +631,9 @@ int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, ctx->kvalue, ctx->size, ctx->flags); } -static long -setxattr(struct mnt_idmap *idmap, struct dentry *d, - const char __user *name, const void __user *value, size_t size, - int flags) +static int path_setxattr(const char __user *pathname, + const char __user *name, const void __user *value, + size_t size, int flags, unsigned int lookup_flags) { struct xattr_name kname; struct xattr_ctx ctx = { @@ -644,33 +643,20 @@ setxattr(struct mnt_idmap *idmap, struct dentry *d, .kname = &kname, .flags = flags, }; + struct path path; int error; error = setxattr_copy(name, &ctx); if (error) return error; - error = do_setxattr(idmap, d, &ctx); - - kvfree(ctx.kvalue); - return error; -} - -static int path_setxattr(const char __user *pathname, - const char __user *name, const void __user *value, - size_t size, int flags, unsigned int lookup_flags) -{ - struct path path; - int error; - retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) - return error; + goto out; error = mnt_want_write(path.mnt); if (!error) { - error = setxattr(mnt_idmap(path.mnt), path.dentry, name, - value, size, flags); + error = do_setxattr(mnt_idmap(path.mnt), path.dentry, &ctx); mnt_drop_write(path.mnt); } path_put(&path); @@ -678,6 +664,9 @@ static int path_setxattr(const char __user *pathname, lookup_flags |= LOOKUP_REVAL; goto retry; } + +out: + kvfree(ctx.kvalue); return error; } @@ -698,20 +687,32 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { - struct fd f = fdget(fd); - int error = -EBADF; + struct xattr_name kname; + struct xattr_ctx ctx = { + .cvalue = value, + .kvalue = NULL, + .size = size, + .kname = &kname, + .flags = flags, + }; + int error; + CLASS(fd, f)(fd); if (!f.file) - return error; + return -EBADF; + audit_file(f.file); + error = setxattr_copy(name, &ctx); + if (error) + return error; + error = mnt_want_write_file(f.file); if (!error) { - error = setxattr(file_mnt_idmap(f.file), - f.file->f_path.dentry, name, - value, size, flags); + error = do_setxattr(file_mnt_idmap(f.file), + f.file->f_path.dentry, &ctx); mnt_drop_write_file(f.file); } - fdput(f); + kvfree(ctx.kvalue); return error; } @@ -900,9 +901,17 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) * Extended attribute REMOVE operations */ static long -removexattr(struct mnt_idmap *idmap, struct dentry *d, - const char __user *name) +removexattr(struct mnt_idmap *idmap, struct dentry *d, const char *name) { + if (is_posix_acl_xattr(name)) + return vfs_remove_acl(idmap, d, name); + return vfs_removexattr(idmap, d, name); +} + +static int path_removexattr(const char __user *pathname, + const char __user *name, unsigned int lookup_flags) +{ + struct path path; int error; char kname[XATTR_NAME_MAX + 1]; @@ -911,25 +920,13 @@ removexattr(struct mnt_idmap *idmap, struct dentry *d, error = -ERANGE; if (error < 0) return error; - - if (is_posix_acl_xattr(kname)) - return vfs_remove_acl(idmap, d, kname); - - return vfs_removexattr(idmap, d, kname); -} - -static int path_removexattr(const char __user *pathname, - const char __user *name, unsigned int lookup_flags) -{ - struct path path; - int error; retry: error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path); if (error) return error; error = mnt_want_write(path.mnt); if (!error) { - error = removexattr(mnt_idmap(path.mnt), path.dentry, name); + error = removexattr(mnt_idmap(path.mnt), path.dentry, kname); mnt_drop_write(path.mnt); } path_put(&path); @@ -955,15 +952,23 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { struct fd f = fdget(fd); + char kname[XATTR_NAME_MAX + 1]; int error = -EBADF; if (!f.file) return error; audit_file(f.file); + + error = strncpy_from_user(kname, name, sizeof(kname)); + if (error == 0 || error == sizeof(kname)) + error = -ERANGE; + if (error < 0) + return error; + error = mnt_want_write_file(f.file); if (!error) { error = removexattr(file_mnt_idmap(f.file), - f.file->f_path.dentry, name); + f.file->f_path.dentry, kname); mnt_drop_write_file(f.file); } fdput(f); From 1f489656d506c9c27ef4b245c96334e923141ffd Mon Sep 17 00:00:00 2001 From: Frank Li Date: Mon, 3 Jun 2024 11:15:27 -0400 Subject: [PATCH 176/268] i3c: master: svc: resend target address when get NACK [ Upstream commit 9bc7501b0b90f4d0c34b97c14ff1f708ce7ad8f3 ] According to I3C Spec 1.1.1, 11-Jun-2021, section: 5.1.2.2.3: If the Controller chooses to start an I3C Message with an I3C Dynamic Address, then special provisions shall be made because that same I3C Target may be initiating an IBI or a Controller Role Request. So, one of three things may happen: (skip 1, 2) 3. The Addresses match and the RnW bits also match, and so neither Controller nor Target will ACK since both are expecting the other side to provide ACK. As a result, each side might think it had "won" arbitration, but neither side would continue, as each would subsequently see that the other did not provide ACK. ... For either value of RnW: Due to the NACK, the Controller shall defer the Private Write or Private Read, and should typically transmit the Target ^^^^^^^^^^^^^^^^^^^ Address again after a Repeated START (i.e., the next one or any one prior ^^^^^^^^^^^^^ to a STOP in the Frame). Since the Address Header following a Repeated START is not arbitrated, the Controller will always win (see Section 5.1.2.2.4). Resend target address again if address is not 7E and controller get NACK. Reviewed-by: Miquel Raynal Signed-off-by: Frank Li Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/i3c/master/svc-i3c-master.c | 58 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/drivers/i3c/master/svc-i3c-master.c b/drivers/i3c/master/svc-i3c-master.c index ccd0c4680be290..acc937275c184c 100644 --- a/drivers/i3c/master/svc-i3c-master.c +++ b/drivers/i3c/master/svc-i3c-master.c @@ -1037,29 +1037,59 @@ static int svc_i3c_master_xfer(struct svc_i3c_master *master, u8 *in, const u8 *out, unsigned int xfer_len, unsigned int *actual_len, bool continued) { + int retry = 2; u32 reg; int ret; /* clean SVC_I3C_MINT_IBIWON w1c bits */ writel(SVC_I3C_MINT_IBIWON, master->regs + SVC_I3C_MSTATUS); - writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | - xfer_type | - SVC_I3C_MCTRL_IBIRESP_NACK | - SVC_I3C_MCTRL_DIR(rnw) | - SVC_I3C_MCTRL_ADDR(addr) | - SVC_I3C_MCTRL_RDTERM(*actual_len), - master->regs + SVC_I3C_MCTRL); - ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, + while (retry--) { + writel(SVC_I3C_MCTRL_REQUEST_START_ADDR | + xfer_type | + SVC_I3C_MCTRL_IBIRESP_NACK | + SVC_I3C_MCTRL_DIR(rnw) | + SVC_I3C_MCTRL_ADDR(addr) | + SVC_I3C_MCTRL_RDTERM(*actual_len), + master->regs + SVC_I3C_MCTRL); + + ret = readl_poll_timeout(master->regs + SVC_I3C_MSTATUS, reg, SVC_I3C_MSTATUS_MCTRLDONE(reg), 0, 1000); - if (ret) - goto emit_stop; + if (ret) + goto emit_stop; - if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { - ret = -ENXIO; - *actual_len = 0; - goto emit_stop; + if (readl(master->regs + SVC_I3C_MERRWARN) & SVC_I3C_MERRWARN_NACK) { + /* + * According to I3C Spec 1.1.1, 11-Jun-2021, section: 5.1.2.2.3. + * If the Controller chooses to start an I3C Message with an I3C Dynamic + * Address, then special provisions shall be made because that same I3C + * Target may be initiating an IBI or a Controller Role Request. So, one of + * three things may happen: (skip 1, 2) + * + * 3. The Addresses match and the RnW bits also match, and so neither + * Controller nor Target will ACK since both are expecting the other side to + * provide ACK. As a result, each side might think it had "won" arbitration, + * but neither side would continue, as each would subsequently see that the + * other did not provide ACK. + * ... + * For either value of RnW: Due to the NACK, the Controller shall defer the + * Private Write or Private Read, and should typically transmit the Target + * Address again after a Repeated START (i.e., the next one or any one prior + * to a STOP in the Frame). Since the Address Header following a Repeated + * START is not arbitrated, the Controller will always win (see Section + * 5.1.2.2.4). + */ + if (retry && addr != 0x7e) { + writel(SVC_I3C_MERRWARN_NACK, master->regs + SVC_I3C_MERRWARN); + } else { + ret = -ENXIO; + *actual_len = 0; + goto emit_stop; + } + } else { + break; + } } /* From 5a022269abb22809f2a174b90f200fc4b9526058 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 28 Jun 2024 16:15:58 +0300 Subject: [PATCH 177/268] i3c: mipi-i3c-hci: Error out instead on BUG_ON() in IBI DMA setup [ Upstream commit 8a2be2f1db268ec735419e53ef04ca039fc027dc ] Definitely condition dma_get_cache_alignment * defined value > 256 during driver initialization is not reason to BUG_ON(). Turn that to graceful error out with -EINVAL. Signed-off-by: Jarkko Nikula Link: https://lore.kernel.org/r/20240628131559.502822-3-jarkko.nikula@linux.intel.com Signed-off-by: Alexandre Belloni Signed-off-by: Sasha Levin --- drivers/i3c/master/mipi-i3c-hci/dma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/i3c/master/mipi-i3c-hci/dma.c b/drivers/i3c/master/mipi-i3c-hci/dma.c index 337c95d43f3f64..edc3a69bfe31fa 100644 --- a/drivers/i3c/master/mipi-i3c-hci/dma.c +++ b/drivers/i3c/master/mipi-i3c-hci/dma.c @@ -291,7 +291,10 @@ static int hci_dma_init(struct i3c_hci *hci) rh->ibi_chunk_sz = dma_get_cache_alignment(); rh->ibi_chunk_sz *= IBI_CHUNK_CACHELINES; - BUG_ON(rh->ibi_chunk_sz > 256); + if (rh->ibi_chunk_sz > 256) { + ret = -EINVAL; + goto err_out; + } ibi_status_ring_sz = rh->ibi_status_sz * rh->ibi_status_entries; ibi_data_ring_sz = rh->ibi_chunk_sz * rh->ibi_chunks_total; From d43fde5ebfe7ea1b8189c67dcf37ceb5371158b8 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 29 Jul 2024 10:46:04 +0800 Subject: [PATCH 178/268] kselftests: dmabuf-heaps: Ensure the driver name is null-terminated [ Upstream commit 291e4baf70019f17a81b7b47aeb186b27d222159 ] Even if a vgem device is configured in, we will skip the import_vgem_fd() test almost every time. TAP version 13 1..11 # Testing heap: system # ======================================= # Testing allocation and importing: ok 1 # SKIP Could not open vgem -1 The problem is that we use the DRM_IOCTL_VERSION ioctl to query the driver version information but leave the name field a non-null-terminated string. Terminate it properly to actually test against the vgem device. While at it, let's check the length of the driver name is exactly 4 bytes and return early otherwise (in case there is a name like "vgemfoo" that gets converted to "vgem\0" unexpectedly). Signed-off-by: Zenghui Yu Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240729024604.2046-1-yuzenghui@huawei.com Signed-off-by: Sasha Levin --- tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 890a8236a8ba73..2809f9a25c4335 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -28,9 +28,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } From 16ccaf581da4fcf1e4d66086cf37263f9a656d43 Mon Sep 17 00:00:00 2001 From: Devyn Liu Date: Tue, 30 Jul 2024 11:20:40 +0800 Subject: [PATCH 179/268] spi: hisi-kunpeng: Add verification for the max_frequency provided by the firmware [ Upstream commit 5127c42c77de18651aa9e8e0a3ced190103b449c ] If the value of max_speed_hz is 0, it may cause a division by zero error in hisi_calc_effective_speed(). The value of max_speed_hz is provided by firmware. Firmware is generally considered as a trusted domain. However, as division by zero errors can cause system failure, for defense measure, the value of max_speed is validated here. So 0 is regarded as invalid and an error code is returned. Signed-off-by: Devyn Liu Reviewed-by: Jay Fang Link: https://patch.msgid.link/20240730032040.3156393-3-liudingyuan@huawei.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-hisi-kunpeng.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 6910b4d4c427bf..16054695bdb04a 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -481,6 +481,9 @@ static int hisi_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (host->max_speed_hz == 0) + return dev_err_probe(dev, -EINVAL, "spi-max-frequency can't be 0\n"); + ret = device_property_read_u16(dev, "num-cs", &host->num_chipselect); if (ret) From f1eb69aa8562d3b3186128f00394bad498f474cc Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 29 Jul 2024 21:59:24 +0200 Subject: [PATCH 180/268] btrfs: initialize location to fix -Wmaybe-uninitialized in btrfs_lookup_dentry() [ Upstream commit b8e947e9f64cac9df85a07672b658df5b2bcff07 ] Some arch + compiler combinations report a potentially unused variable location in btrfs_lookup_dentry(). This is a false alert as the variable is passed by value and always valid or there's an error. The compilers cannot probably reason about that although btrfs_inode_by_name() is in the same file. > + /kisskb/src/fs/btrfs/inode.c: error: 'location.objectid' may be used +uninitialized in this function [-Werror=maybe-uninitialized]: => 5603:9 > + /kisskb/src/fs/btrfs/inode.c: error: 'location.type' may be used +uninitialized in this function [-Werror=maybe-uninitialized]: => 5674:5 m68k-gcc8/m68k-allmodconfig mips-gcc8/mips-allmodconfig powerpc-gcc5/powerpc-all{mod,yes}config powerpc-gcc5/ppc64_defconfig Initialize it to zero, this should fix the warnings and won't change the behaviour as btrfs_inode_by_name() accepts only a root or inode item types, otherwise returns an error. Reported-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/linux-btrfs/bd4e9928-17b3-9257-8ba7-6b7f9bbb639a@linux-m68k.org/ Reviewed-by: Qu Wenruo Signed-off-by: David Sterba Signed-off-by: Sasha Levin --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 18ce5353092d71..a4223821188788 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5668,7 +5668,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; From fc9fabeee107eecc1485455c8aa7d17c5b3bc28b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 29 Jul 2024 13:06:43 +0200 Subject: [PATCH 181/268] s390/vmlinux.lds.S: Move ro_after_init section behind rodata section [ Upstream commit 75c10d5377d8821efafed32e4d72068d9c1f8ec0 ] The .data.rel.ro and .got section were added between the rodata and ro_after_init data section, which adds an RW mapping in between all RO mapping of the kernel image: ---[ Kernel Image Start ]--- 0x000003ffe0000000-0x000003ffe0e00000 14M PMD RO X 0x000003ffe0e00000-0x000003ffe0ec7000 796K PTE RO X 0x000003ffe0ec7000-0x000003ffe0f00000 228K PTE RO NX 0x000003ffe0f00000-0x000003ffe1300000 4M PMD RO NX 0x000003ffe1300000-0x000003ffe1331000 196K PTE RO NX 0x000003ffe1331000-0x000003ffe13b3000 520K PTE RW NX <--- 0x000003ffe13b3000-0x000003ffe13d5000 136K PTE RO NX 0x000003ffe13d5000-0x000003ffe1400000 172K PTE RW NX 0x000003ffe1400000-0x000003ffe1500000 1M PMD RW NX 0x000003ffe1500000-0x000003ffe1700000 2M PTE RW NX 0x000003ffe1700000-0x000003ffe1800000 1M PMD RW NX 0x000003ffe1800000-0x000003ffe187e000 504K PTE RW NX ---[ Kernel Image End ]--- Move the ro_after_init data section again right behind the rodata section to prevent interleaving RO and RW mappings: ---[ Kernel Image Start ]--- 0x000003ffe0000000-0x000003ffe0e00000 14M PMD RO X 0x000003ffe0e00000-0x000003ffe0ec7000 796K PTE RO X 0x000003ffe0ec7000-0x000003ffe0f00000 228K PTE RO NX 0x000003ffe0f00000-0x000003ffe1300000 4M PMD RO NX 0x000003ffe1300000-0x000003ffe1353000 332K PTE RO NX 0x000003ffe1353000-0x000003ffe1400000 692K PTE RW NX 0x000003ffe1400000-0x000003ffe1500000 1M PMD RW NX 0x000003ffe1500000-0x000003ffe1700000 2M PTE RW NX 0x000003ffe1700000-0x000003ffe1800000 1M PMD RW NX 0x000003ffe1800000-0x000003ffe187e000 504K PTE RW NX ---[ Kernel Image End ]--- Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Sasha Levin --- arch/s390/kernel/vmlinux.lds.S | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 2ae201ebf90b97..de5f9f623f5b27 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -71,6 +71,15 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) BOOT_DATA_PRESERVED From 30e9ce7cd5591be639b53595c95812f1a2afdfdc Mon Sep 17 00:00:00 2001 From: Camila Alvarez Date: Tue, 30 Jul 2024 19:42:43 -0400 Subject: [PATCH 182/268] HID: cougar: fix slab-out-of-bounds Read in cougar_report_fixup [ Upstream commit a6e9c391d45b5865b61e569146304cff72821a5d ] report_fixup for the Cougar 500k Gaming Keyboard was not verifying that the report descriptor size was correct before accessing it Reported-by: syzbot+24c0361074799d02c452@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=24c0361074799d02c452 Signed-off-by: Camila Alvarez Reviewed-by: Silvan Jegen Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-cougar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b51..0fa785f52707ac 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); From 60dc4ee0428d70bcbb41436b6729d29f1cbdfb89 Mon Sep 17 00:00:00 2001 From: Olivier Sobrie Date: Tue, 23 Jul 2024 10:44:35 +0200 Subject: [PATCH 183/268] HID: amd_sfh: free driver_data after destroying hid device [ Upstream commit 97155021ae17b86985121b33cf8098bcde00d497 ] HID driver callbacks aren't called anymore once hid_destroy_device() has been called. Hence, hid driver_data should be freed only after the hid_destroy_device() function returned as driver_data is used in several callbacks. I observed a crash with kernel 6.10.0 on my T14s Gen 3, after enabling KASAN to debug memory allocation, I got this output: [ 13.050438] ================================================================== [ 13.054060] BUG: KASAN: slab-use-after-free in amd_sfh_get_report+0x3ec/0x530 [amd_sfh] [ 13.054809] psmouse serio1: trackpoint: Synaptics TrackPoint firmware: 0x02, buttons: 3/3 [ 13.056432] Read of size 8 at addr ffff88813152f408 by task (udev-worker)/479 [ 13.060970] CPU: 5 PID: 479 Comm: (udev-worker) Not tainted 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.063978] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.067860] Call Trace: [ 13.069383] input: TPPS/2 Synaptics TrackPoint as /devices/platform/i8042/serio1/input/input8 [ 13.071486] [ 13.071492] dump_stack_lvl+0x5d/0x80 [ 13.074870] snd_hda_intel 0000:33:00.6: enabling device (0000 -> 0002) [ 13.078296] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.082199] print_report+0x174/0x505 [ 13.085776] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.089367] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.093255] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.097464] kasan_report+0xc8/0x150 [ 13.101461] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.105802] amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.110303] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.114879] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.119450] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.124097] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.127404] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.131925] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.136455] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.140197] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.143602] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.147234] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.150446] ? __devm_add_action+0x167/0x1d0 [ 13.155061] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.158581] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.161814] platform_probe+0xa2/0x150 [ 13.165029] really_probe+0x1e3/0x8a0 [ 13.168243] __driver_probe_device+0x18c/0x370 [ 13.171500] driver_probe_device+0x4a/0x120 [ 13.175000] __driver_attach+0x190/0x4a0 [ 13.178521] ? __pfx___driver_attach+0x10/0x10 [ 13.181771] bus_for_each_dev+0x106/0x180 [ 13.185033] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.188229] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.191446] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.194382] bus_add_driver+0x29e/0x4d0 [ 13.197328] driver_register+0x1a5/0x360 [ 13.200283] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.203362] do_one_initcall+0xa7/0x380 [ 13.206432] ? __pfx_do_one_initcall+0x10/0x10 [ 13.210175] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.213211] ? kasan_unpoison+0x44/0x70 [ 13.216688] do_init_module+0x238/0x750 [ 13.219696] load_module+0x5011/0x6af0 [ 13.223096] ? kasan_save_stack+0x30/0x50 [ 13.226743] ? kasan_save_track+0x14/0x30 [ 13.230080] ? kasan_save_free_info+0x3b/0x60 [ 13.233323] ? poison_slab_object+0x109/0x180 [ 13.236778] ? __pfx_load_module+0x10/0x10 [ 13.239703] ? poison_slab_object+0x109/0x180 [ 13.243070] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.245924] ? init_module_from_file+0x13d/0x150 [ 13.248745] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.251503] ? init_module_from_file+0xdf/0x150 [ 13.254198] init_module_from_file+0xdf/0x150 [ 13.256826] ? __pfx_init_module_from_file+0x10/0x10 [ 13.259428] ? kasan_save_track+0x14/0x30 [ 13.261959] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.264471] ? kasan_save_free_info+0x3b/0x60 [ 13.267026] ? poison_slab_object+0x109/0x180 [ 13.269494] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.271949] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.274324] ? _raw_spin_lock+0x85/0xe0 [ 13.276671] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.278963] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.281193] idempotent_init_module+0x23b/0x650 [ 13.283420] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.285619] ? __pfx___seccomp_filter+0x10/0x10 [ 13.287714] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.289828] ? __fget_light+0x57/0x420 [ 13.291870] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.293880] ? security_capable+0x74/0xb0 [ 13.295820] __x64_sys_finit_module+0xbe/0x130 [ 13.297874] do_syscall_64+0x82/0x190 [ 13.299898] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.301905] ? irqtime_account_irq+0x3d/0x1f0 [ 13.303877] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.305753] ? __irq_exit_rcu+0x4e/0x130 [ 13.307577] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.309489] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.311371] RIP: 0033:0x7a21f96ade9d [ 13.313234] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.317051] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.319024] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.321100] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.323314] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.325505] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.327637] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.329737] [ 13.333945] Allocated by task 139: [ 13.336111] kasan_save_stack+0x30/0x50 [ 13.336121] kasan_save_track+0x14/0x30 [ 13.336125] __kasan_kmalloc+0xaa/0xb0 [ 13.336129] amdtp_hid_probe+0xb1/0x440 [amd_sfh] [ 13.336138] amd_sfh_hid_client_init+0xb8a/0x10f0 [amd_sfh] [ 13.336144] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.336150] process_one_work+0x673/0xeb0 [ 13.336155] worker_thread+0x795/0x1250 [ 13.336160] kthread+0x290/0x350 [ 13.336164] ret_from_fork+0x34/0x70 [ 13.336169] ret_from_fork_asm+0x1a/0x30 [ 13.338175] Freed by task 139: [ 13.340064] kasan_save_stack+0x30/0x50 [ 13.340072] kasan_save_track+0x14/0x30 [ 13.340076] kasan_save_free_info+0x3b/0x60 [ 13.340081] poison_slab_object+0x109/0x180 [ 13.340085] __kasan_slab_free+0x32/0x50 [ 13.340089] kfree+0xe5/0x310 [ 13.340094] amdtp_hid_remove+0xb2/0x160 [amd_sfh] [ 13.340102] amd_sfh_hid_client_deinit+0x324/0x640 [amd_sfh] [ 13.340107] amd_sfh_hid_client_init+0x94a/0x10f0 [amd_sfh] [ 13.340113] sfh_init_work+0x47/0x120 [amd_sfh] [ 13.340118] process_one_work+0x673/0xeb0 [ 13.340123] worker_thread+0x795/0x1250 [ 13.340127] kthread+0x290/0x350 [ 13.340132] ret_from_fork+0x34/0x70 [ 13.340136] ret_from_fork_asm+0x1a/0x30 [ 13.342482] The buggy address belongs to the object at ffff88813152f400 which belongs to the cache kmalloc-64 of size 64 [ 13.347357] The buggy address is located 8 bytes inside of freed 64-byte region [ffff88813152f400, ffff88813152f440) [ 13.347367] The buggy address belongs to the physical page: [ 13.355409] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x13152f [ 13.355416] anon flags: 0x2ffff8000000000(node=0|zone=2|lastcpupid=0x1ffff) [ 13.355423] page_type: 0xffffefff(slab) [ 13.355429] raw: 02ffff8000000000 ffff8881000428c0 ffffea0004c43a00 0000000000000005 [ 13.355435] raw: 0000000000000000 0000000000200020 00000001ffffefff 0000000000000000 [ 13.355439] page dumped because: kasan: bad access detected [ 13.357295] Memory state around the buggy address: [ 13.357299] ffff88813152f300: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357303] ffff88813152f380: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357306] >ffff88813152f400: fa fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 13.357309] ^ [ 13.357311] ffff88813152f480: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc [ 13.357315] ffff88813152f500: 00 00 00 00 00 00 00 06 fc fc fc fc fc fc fc fc [ 13.357318] ================================================================== [ 13.357405] Disabling lock debugging due to kernel taint [ 13.383534] Oops: general protection fault, probably for non-canonical address 0xe0a1bc4140000013: 0000 [#1] PREEMPT SMP KASAN NOPTI [ 13.383544] KASAN: maybe wild-memory-access in range [0x050e020a00000098-0x050e020a0000009f] [ 13.383551] CPU: 3 PID: 479 Comm: (udev-worker) Tainted: G B 6.10.0-arch1-2 #1 893bb55d7f0073f25c46adbb49eb3785fefd74b0 [ 13.383561] Hardware name: LENOVO 21CQCTO1WW/21CQCTO1WW, BIOS R22ET70W (1.40 ) 03/21/2024 [ 13.383565] RIP: 0010:amd_sfh_get_report+0x81/0x530 [amd_sfh] [ 13.383580] Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 78 03 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8b 63 08 49 8d 7c 24 10 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e 1a 03 00 00 45 8b 74 24 10 45 [ 13.383585] RSP: 0018:ffff8881261f7388 EFLAGS: 00010212 [ 13.383592] RAX: dffffc0000000000 RBX: ffff88813152f400 RCX: 0000000000000002 [ 13.383597] RDX: 00a1c04140000013 RSI: 0000000000000008 RDI: 050e020a0000009b [ 13.383600] RBP: ffff88814d010000 R08: 0000000000000002 R09: fffffbfff3ddb8c0 [ 13.383604] R10: ffffffff9eedc607 R11: ffff88810ce98000 R12: 050e020a0000008b [ 13.383607] R13: ffff88814d010000 R14: dffffc0000000000 R15: 0000000000000004 [ 13.383611] FS: 00007a21f94d0880(0000) GS:ffff8887e7d80000(0000) knlGS:0000000000000000 [ 13.383615] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 13.383618] CR2: 00007e0014c438f0 CR3: 000000012614c000 CR4: 0000000000f50ef0 [ 13.383622] PKRU: 55555554 [ 13.383625] Call Trace: [ 13.383629] [ 13.383632] ? __die_body.cold+0x19/0x27 [ 13.383644] ? die_addr+0x46/0x70 [ 13.383652] ? exc_general_protection+0x150/0x240 [ 13.383664] ? asm_exc_general_protection+0x26/0x30 [ 13.383674] ? amd_sfh_get_report+0x81/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383686] ? amd_sfh_get_report+0x3ec/0x530 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383697] amdtp_hid_request+0xb8/0x110 [amd_sfh 05f43221435b5205f734cd9da29399130f398a38] [ 13.383706] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383713] sensor_hub_get_feature+0x1d3/0x540 [hid_sensor_hub 3f13be3016ff415bea03008d45d99da837ee3082] [ 13.383727] hid_sensor_parse_common_attributes+0x4d0/0xad0 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383739] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383745] ? __pfx_hid_sensor_parse_common_attributes+0x10/0x10 [hid_sensor_iio_common c3a5cbe93969c28b122609768bbe23efe52eb8f5] [ 13.383753] ? _raw_spin_lock_irqsave+0x96/0xf0 [ 13.383762] ? __pfx__raw_spin_lock_irqsave+0x10/0x10 [ 13.383768] ? devm_iio_device_alloc+0x34/0x50 [industrialio 3d261d5e5765625d2b052be40e526d62b1d2123b] [ 13.383790] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383795] ? __devm_add_action+0x167/0x1d0 [ 13.383806] hid_gyro_3d_probe+0x120/0x7f0 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383818] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383826] platform_probe+0xa2/0x150 [ 13.383832] really_probe+0x1e3/0x8a0 [ 13.383838] __driver_probe_device+0x18c/0x370 [ 13.383844] driver_probe_device+0x4a/0x120 [ 13.383851] __driver_attach+0x190/0x4a0 [ 13.383857] ? __pfx___driver_attach+0x10/0x10 [ 13.383863] bus_for_each_dev+0x106/0x180 [ 13.383868] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.383874] ? __pfx_bus_for_each_dev+0x10/0x10 [ 13.383880] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383887] bus_add_driver+0x29e/0x4d0 [ 13.383895] driver_register+0x1a5/0x360 [ 13.383902] ? __pfx_hid_gyro_3d_platform_driver_init+0x10/0x10 [hid_sensor_gyro_3d 63da36a143b775846ab2dbb86c343b401b5e3172] [ 13.383910] do_one_initcall+0xa7/0x380 [ 13.383919] ? __pfx_do_one_initcall+0x10/0x10 [ 13.383927] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.383933] ? kasan_unpoison+0x44/0x70 [ 13.383943] do_init_module+0x238/0x750 [ 13.383955] load_module+0x5011/0x6af0 [ 13.383962] ? kasan_save_stack+0x30/0x50 [ 13.383968] ? kasan_save_track+0x14/0x30 [ 13.383973] ? kasan_save_free_info+0x3b/0x60 [ 13.383980] ? poison_slab_object+0x109/0x180 [ 13.383993] ? __pfx_load_module+0x10/0x10 [ 13.384007] ? poison_slab_object+0x109/0x180 [ 13.384012] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384018] ? init_module_from_file+0x13d/0x150 [ 13.384025] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384032] ? init_module_from_file+0xdf/0x150 [ 13.384037] init_module_from_file+0xdf/0x150 [ 13.384044] ? __pfx_init_module_from_file+0x10/0x10 [ 13.384050] ? kasan_save_track+0x14/0x30 [ 13.384055] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384060] ? kasan_save_free_info+0x3b/0x60 [ 13.384066] ? poison_slab_object+0x109/0x180 [ 13.384071] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384080] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384085] ? _raw_spin_lock+0x85/0xe0 [ 13.384091] ? __pfx__raw_spin_lock+0x10/0x10 [ 13.384096] ? __rseq_handle_notify_resume+0x1a6/0xad0 [ 13.384106] idempotent_init_module+0x23b/0x650 [ 13.384114] ? __pfx_idempotent_init_module+0x10/0x10 [ 13.384120] ? __pfx___seccomp_filter+0x10/0x10 [ 13.384129] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384135] ? __fget_light+0x57/0x420 [ 13.384142] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384147] ? security_capable+0x74/0xb0 [ 13.384157] __x64_sys_finit_module+0xbe/0x130 [ 13.384164] do_syscall_64+0x82/0x190 [ 13.384174] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384179] ? irqtime_account_irq+0x3d/0x1f0 [ 13.384188] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384193] ? __irq_exit_rcu+0x4e/0x130 [ 13.384201] ? srso_alias_return_thunk+0x5/0xfbef5 [ 13.384206] entry_SYSCALL_64_after_hwframe+0x76/0x7e [ 13.384212] RIP: 0033:0x7a21f96ade9d [ 13.384263] Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 90 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 63 de 0c 00 f7 d8 64 89 01 48 [ 13.384267] RSP: 002b:00007ffeae934e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 13.384273] RAX: ffffffffffffffda RBX: 00005987276bfcf0 RCX: 00007a21f96ade9d [ 13.384277] RDX: 0000000000000004 RSI: 00007a21f8eda376 RDI: 000000000000001c [ 13.384280] RBP: 00007a21f8eda376 R08: 0000000000000001 R09: 00007ffeae934ec0 [ 13.384284] R10: 0000000000000050 R11: 0000000000000246 R12: 0000000000020000 [ 13.384288] R13: 00005987276c1250 R14: 0000000000000000 R15: 00005987276c4530 [ 13.384297] [ 13.384299] Modules linked in: soundwire_amd(+) hid_sensor_gyro_3d(+) hid_sensor_magn_3d hid_sensor_accel_3d soundwire_generic_allocation amdxcp hid_sensor_trigger drm_exec industrialio_triggered_buffer soundwire_bus gpu_sched kvm_amd kfifo_buf qmi_helpers joydev drm_buddy hid_sensor_iio_common mousedev snd_soc_core industrialio i2c_algo_bit mac80211 snd_compress drm_suballoc_helper kvm snd_hda_intel drm_ttm_helper ac97_bus snd_pcm_dmaengine snd_intel_dspcfg ttm thinkpad_acpi(+) snd_intel_sdw_acpi hid_sensor_hub snd_rpl_pci_acp6x drm_display_helper snd_hda_codec hid_multitouch libarc4 snd_acp_pci platform_profile think_lmi(+) hid_generic firmware_attributes_class wmi_bmof cec snd_acp_legacy_common sparse_keymap rapl snd_hda_core psmouse cfg80211 pcspkr snd_pci_acp6x snd_hwdep video snd_pcm snd_pci_acp5x snd_timer snd_rn_pci_acp3x ucsi_acpi snd_acp_config snd sp5100_tco rfkill snd_soc_acpi typec_ucsi thunderbolt amd_sfh k10temp mhi soundcore i2c_piix4 snd_pci_acp3x typec i2c_hid_acpi roles i2c_hid wmi acpi_tad amd_pmc [ 13.384454] mac_hid i2c_dev crypto_user loop nfnetlink zram ip_tables x_tables dm_crypt cbc encrypted_keys trusted asn1_encoder tee dm_mod crct10dif_pclmul crc32_pclmul polyval_clmulni polyval_generic gf128mul ghash_clmulni_intel serio_raw sha512_ssse3 atkbd sha256_ssse3 libps2 sha1_ssse3 vivaldi_fmap nvme aesni_intel crypto_simd nvme_core cryptd ccp xhci_pci i8042 nvme_auth xhci_pci_renesas serio vfat fat btrfs blake2b_generic libcrc32c crc32c_generic crc32c_intel xor raid6_pq [ 13.384552] ---[ end trace 0000000000000000 ]--- KASAN reports a use-after-free of hid->driver_data in function amd_sfh_get_report(). The backtrace indicates that the function is called by amdtp_hid_request() which is one of the callbacks of hid device. The current make sure that driver_data is freed only once hid_destroy_device() returned. Note that I observed the crash both on v6.9.9 and v6.10.0. The code seems to be as it was from the early days of the driver. Signed-off-by: Olivier Sobrie Acked-by: Basavaraj Natikar Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/amd-sfh-hid/amd_sfh_hid.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b5233706845..81f3024b7b1b51 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ int amdtp_hid_probe(u32 cur_hid_dev, struct amdtp_cl_data *cli_data) void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } From a4858b00a1ec57043697fb935565fe267f161833 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 4 Aug 2024 17:50:25 -0700 Subject: [PATCH 184/268] Input: uinput - reject requests with unreasonable number of slots [ Upstream commit 206f533a0a7c683982af473079c4111f4a0f9f5e ] From: Dmitry Torokhov When exercising uinput interface syzkaller may try setting up device with a really large number of slots, which causes memory allocation failure in input_mt_init_slots(). While this allocation failure is handled properly and request is rejected, it results in syzkaller reports. Additionally, such request may put undue burden on the system which will try to free a lot of memory for a bogus request. Fix it by limiting allowed number of slots to 100. This can easily be extended if we see devices that can track more than 100 contacts. Reported-by: Tetsuo Handa Reported-by: syzbot Closes: https://syzkaller.appspot.com/bug?extid=0122fa359a69694395d5 Link: https://lore.kernel.org/r/Zqgi7NYEbpRsJfa2@google.com Signed-off-by: Dmitry Torokhov Signed-off-by: Sasha Levin --- drivers/input/misc/uinput.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index d98212d55108c7..2c973f15cab7da 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -417,6 +417,20 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } + /* + * Limit number of contacts to a reasonable value (100). This + * ensures that we need less than 2 pages for struct input_mt + * (we are not using in-kernel slot assignment so not going to + * allocate memory for the "red" table), and we should have no + * trouble getting this much memory. + */ + if (code == ABS_MT_SLOT && max > 99) { + printk(KERN_DEBUG + "%s: unreasonably large number of slots requested: %d\n", + UINPUT_NAME, max); + return -EINVAL; + } + return 0; } From 6604d76253e7d03ea916f955a2841df92be9e97d Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Tue, 6 Aug 2024 19:28:05 +0200 Subject: [PATCH 185/268] usbnet: ipheth: race between ipheth_close and error handling [ Upstream commit e5876b088ba03a62124266fa20d00e65533c7269 ] ipheth_sndbulk_callback() can submit carrier_work as a part of its error handling. That means that the driver must make sure that the work is cancelled after it has made sure that no more URB can terminate with an error condition. Hence the order of actions in ipheth_close() needs to be inverted. Signed-off-by: Oliver Neukum Signed-off-by: Foster Snowhill Tested-by: Georgi Valkov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/ipheth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 687d70cfc55635..6eeef10edadad1 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -475,8 +475,8 @@ static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); - cancel_delayed_work_sync(&dev->carrier_work); netif_stop_queue(net); + cancel_delayed_work_sync(&dev->carrier_work); return 0; } From c3af7e460a526007e4bed1ce3623274a1a6afe5e Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Mon, 12 Aug 2024 00:28:21 +0100 Subject: [PATCH 186/268] Squashfs: sanity check symbolic link size [ Upstream commit 810ee43d9cd245d138a2733d87a24858a23f577d ] Syzkiller reports a "KMSAN: uninit-value in pick_link" bug. This is caused by an uninitialised page, which is ultimately caused by a corrupted symbolic link size read from disk. The reason why the corrupted symlink size causes an uninitialised page is due to the following sequence of events: 1. squashfs_read_inode() is called to read the symbolic link from disk. This assigns the corrupted value 3875536935 to inode->i_size. 2. Later squashfs_symlink_read_folio() is called, which assigns this corrupted value to the length variable, which being a signed int, overflows producing a negative number. 3. The following loop that fills in the page contents checks that the copied bytes is less than length, which being negative means the loop is skipped, producing an uninitialised page. This patch adds a sanity check which checks that the symbolic link size is not larger than expected. -- Signed-off-by: Phillip Lougher Link: https://lore.kernel.org/r/20240811232821.13903-1-phillip@squashfs.org.uk Reported-by: Lizhi Xu Reported-by: syzbot+24ac24ff58dc5b0d26b9@syzkaller.appspotmail.com Closes: https://lore.kernel.org/all/000000000000a90e8c061e86a76b@google.com/ V2: fix spelling mistake. Signed-off-by: Christian Brauner Signed-off-by: Sasha Levin --- fs/squashfs/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 16bd693d0b3aa2..d5918eba27e371 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; From 7ead730af11ee7da107f16fc77995613c58d292d Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Mon, 12 Aug 2024 12:06:51 +0200 Subject: [PATCH 187/268] of/irq: Prevent device address out-of-bounds read in interrupt map walk [ Upstream commit b739dffa5d570b411d4bdf4bb9b8dfd6b7d72305 ] When of_irq_parse_raw() is invoked with a device address smaller than the interrupt parent node (from #address-cells property), KASAN detects the following out-of-bounds read when populating the initial match table (dyndbg="func of_irq_parse_* +p"): OF: of_irq_parse_one: dev=/soc@0/picasso/watchdog, index=0 OF: parent=/soc@0/pci@878000000000/gpio0@17,0, intsize=2 OF: intspec=4 OF: of_irq_parse_raw: ipar=/soc@0/pci@878000000000/gpio0@17,0, size=2 OF: -> addrsize=3 ================================================================== BUG: KASAN: slab-out-of-bounds in of_irq_parse_raw+0x2b8/0x8d0 Read of size 4 at addr ffffff81beca5608 by task bash/764 CPU: 1 PID: 764 Comm: bash Tainted: G O 6.1.67-484c613561-nokia_sm_arm64 #1 Hardware name: Unknown Unknown Product/Unknown Product, BIOS 2023.01-12.24.03-dirty 01/01/2023 Call trace: dump_backtrace+0xdc/0x130 show_stack+0x1c/0x30 dump_stack_lvl+0x6c/0x84 print_report+0x150/0x448 kasan_report+0x98/0x140 __asan_load4+0x78/0xa0 of_irq_parse_raw+0x2b8/0x8d0 of_irq_parse_one+0x24c/0x270 parse_interrupts+0xc0/0x120 of_fwnode_add_links+0x100/0x2d0 fw_devlink_parse_fwtree+0x64/0xc0 device_add+0xb38/0xc30 of_device_add+0x64/0x90 of_platform_device_create_pdata+0xd0/0x170 of_platform_bus_create+0x244/0x600 of_platform_notify+0x1b0/0x254 blocking_notifier_call_chain+0x9c/0xd0 __of_changeset_entry_notify+0x1b8/0x230 __of_changeset_apply_notify+0x54/0xe4 of_overlay_fdt_apply+0xc04/0xd94 ... The buggy address belongs to the object at ffffff81beca5600 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 8 bytes inside of 128-byte region [ffffff81beca5600, ffffff81beca5680) The buggy address belongs to the physical page: page:00000000230d3d03 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1beca4 head:00000000230d3d03 order:1 compound_mapcount:0 compound_pincount:0 flags: 0x8000000000010200(slab|head|zone=2) raw: 8000000000010200 0000000000000000 dead000000000122 ffffff810000c300 raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffffff81beca5500: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffff81beca5580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffff81beca5600: 00 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffff81beca5680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffff81beca5700: 00 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc ================================================================== OF: -> got it ! Prevent the out-of-bounds read by copying the device address into a buffer of sufficient size. Signed-off-by: Stefan Wiehler Link: https://lore.kernel.org/r/20240812100652.3800963-1-stefan.wiehler@nokia.com Signed-off-by: Rob Herring (Arm) Signed-off-by: Sasha Levin --- drivers/of/irq.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb32..8fd63100ba8f08 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; From d942e855324a60107025c116245095632476613e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 10 Aug 2024 21:04:35 -0400 Subject: [PATCH 188/268] lib/generic-radix-tree.c: Fix rare race in __genradix_ptr_alloc() [ Upstream commit b2f11c6f3e1fc60742673b8675c95b78447f3dae ] If we need to increase the tree depth, allocate a new node, and then race with another thread that increased the tree depth before us, we'll still have a preallocated node that might be used later. If we then use that node for a new non-root node, it'll still have a pointer to the old root instead of being zeroed - fix this by zeroing it in the cmpxchg failure path. Signed-off-by: Kent Overstreet Signed-off-by: Sasha Levin --- lib/generic-radix-tree.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index 7dfa88282b006a..78f081d695d0b7 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -131,6 +131,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } From 32ee0520159f1e8c2d6597c19690df452c528f30 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 13 Aug 2024 10:59:08 +0100 Subject: [PATCH 189/268] MIPS: cevt-r4k: Don't call get_c0_compare_int if timer irq is installed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 50f2b98dc83de7809a5c5bf0ccf9af2e75c37c13 ] This avoids warning: [ 0.118053] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:283 Caused by get_c0_compare_int on secondary CPU. We also skipped saving IRQ number to struct clock_event_device *cd as it's never used by clockevent core, as per comments it's only meant for "non CPU local devices". Reported-by: Serge Semin Closes: https://lore.kernel.org/linux-mips/6szkkqxpsw26zajwysdrwplpjvhl5abpnmxgu2xuj3dkzjnvsf@4daqrz4mf44k/ Signed-off-by: Jiaxun Yang Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Serge Semin Tested-by: Serge Semin Signed-off-by: Thomas Bogendoerfer Signed-off-by: Sasha Levin --- arch/mips/kernel/cevt-r4k.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 368e8475870f08..5f6e9e2ebbdbb8 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -303,13 +303,6 @@ int r4k_clockevent_init(void) if (!c0_compare_int_usable()) return -ENXIO; - /* - * With vectored interrupts things are getting platform specific. - * get_c0_compare_int is a hook to allow a platform to return the - * interrupt number of its liking. - */ - irq = get_c0_compare_int(); - cd = &per_cpu(mips_clockevent_device, cpu); cd->name = "MIPS"; @@ -320,7 +313,6 @@ int r4k_clockevent_init(void) min_delta = calculate_min_delta(); cd->rating = 300; - cd->irq = irq; cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->event_handler = mips_event_handler; @@ -332,6 +324,13 @@ int r4k_clockevent_init(void) cp0_timer_irq_installed = 1; + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of its liking. + */ + irq = get_c0_compare_int(); + if (request_irq(irq, c0_compare_interrupt, flags, "timer", c0_compare_interrupt)) pr_err("Failed to request irq %d (timer)\n", irq); From ff62110ec522a8660188801414291fc4599033aa Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Tue, 20 Aug 2024 15:06:58 +0800 Subject: [PATCH 190/268] spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register [ Upstream commit 783bf5d09f86b9736605f3e01a3472e55ef98ff8 ] Referring to the errata ERR051608 of I.MX93, LPSPI TCR[PRESCALE] can only be configured to be 0 or 1, other values are not valid and will cause LPSPI to not work. Add the prescale limitation for LPSPI in I.MX93. Other platforms are not affected. Signed-off-by: Carlos Song Link: https://patch.msgid.link/20240820070658.672127-1-carlos.song@nxp.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- drivers/spi/spi-fsl-lpspi.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 3c0f7dc9614d15..f02b2f74168137 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -82,6 +82,10 @@ #define TCR_RXMSK BIT(19) #define TCR_TXMSK BIT(18) +struct fsl_lpspi_devtype_data { + u8 prescale_max; +}; + struct lpspi_config { u8 bpw; u8 chip_select; @@ -119,10 +123,25 @@ struct fsl_lpspi_data { bool usedma; struct completion dma_rx_completion; struct completion dma_tx_completion; + + const struct fsl_lpspi_devtype_data *devtype_data; +}; + +/* + * ERR051608 fixed or not: + * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf + */ +static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { + .prescale_max = 1, +}; + +static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { + .prescale_max = 8, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { - { .compatible = "fsl,imx7ulp-spi", }, + { .compatible = "fsl,imx7ulp-spi", .data = &imx7ulp_lpspi_devtype_data,}, + { .compatible = "fsl,imx93-spi", .data = &imx93_lpspi_devtype_data,}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids); @@ -297,9 +316,11 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; unsigned int perclk_rate, scldiv, div; + u8 prescale_max; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); + prescale_max = fsl_lpspi->devtype_data->prescale_max; if (!config.speed_hz) { dev_err(fsl_lpspi->dev, @@ -315,7 +336,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale < 8; prescale++) { + for (prescale = 0; prescale < prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; @@ -822,6 +843,7 @@ static int fsl_lpspi_init_rpm(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_probe(struct platform_device *pdev) { + const struct fsl_lpspi_devtype_data *devtype_data; struct fsl_lpspi_data *fsl_lpspi; struct spi_controller *controller; struct resource *res; @@ -830,6 +852,10 @@ static int fsl_lpspi_probe(struct platform_device *pdev) u32 temp; bool is_target; + devtype_data = of_device_get_match_data(&pdev->dev); + if (!devtype_data) + return -ENODEV; + is_target = of_property_read_bool((&pdev->dev)->of_node, "spi-slave"); if (is_target) controller = devm_spi_alloc_target(&pdev->dev, @@ -848,6 +874,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->is_target = is_target; fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node, "fsl,spi-only-use-cs1-sel"); + fsl_lpspi->devtype_data = devtype_data; init_completion(&fsl_lpspi->xfer_done); From b777131d03be76594849da084a14853f72cf0e37 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 20 Aug 2024 13:04:07 +1000 Subject: [PATCH 191/268] ata: pata_macio: Use WARN instead of BUG [ Upstream commit d4bc0a264fb482b019c84fbc7202dd3cab059087 ] The overflow/underflow conditions in pata_macio_qc_prep() should never happen. But if they do there's no need to kill the system entirely, a WARN and failing the IO request should be sufficient and might allow the system to keep running. Signed-off-by: Michael Ellerman Signed-off-by: Damien Le Moal Signed-off-by: Sasha Levin --- drivers/ata/pata_macio.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 17f6ccee53c7c2..ffbb2e8591cefc 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -541,7 +541,8 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) while (sg_len) { /* table overflow should never happen */ - BUG_ON (pi++ >= MAX_DCMDS); + if (WARN_ON_ONCE(pi >= MAX_DCMDS)) + return AC_ERR_SYSTEM; len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG; table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE); @@ -553,11 +554,13 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) addr += len; sg_len -= len; ++table; + ++pi; } } /* Should never happen according to Tejun */ - BUG_ON(!pi); + if (WARN_ON_ONCE(!pi)) + return AC_ERR_SYSTEM; /* Convert the last command to an input/output */ table--; From 07f384c5be1f8633b13f0a22616e227570450bc6 Mon Sep 17 00:00:00 2001 From: ChenXiaoSong Date: Thu, 22 Aug 2024 08:20:51 +0000 Subject: [PATCH 192/268] smb/server: fix potential null-ptr-deref of lease_ctx_info in smb2_open() [ Upstream commit 4e8771a3666c8f216eefd6bd2fd50121c6c437db ] null-ptr-deref will occur when (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) and parse_lease_state() return NULL. Fix this by check if 'lease_ctx_info' is NULL. Additionally, remove the redundant parentheses in parse_durable_handle_context(). Signed-off-by: ChenXiaoSong Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/server/oplock.c | 2 +- fs/smb/server/smb2pdu.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index a8f52c4ebbdadd..e546ffa57b55ab 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1510,7 +1510,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request * - * Return: oplock state, -ENOENT if create lease context not found + * Return: allocated lease context object on success, otherwise NULL */ struct lease_ctx_info *parse_lease_state(void *open_req) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index c55924f5164e20..458cc736286aae 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -2771,8 +2771,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, } } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { dh_info->CreateGuid = durable_v2_blob->CreateGuid; dh_info->persistent = @@ -2792,8 +2792,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { ksmbd_debug(SMB, "Request for durable open\n"); dh_info->type = dh_idx; } @@ -3415,7 +3415,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } } else { - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && lc) { if (S_ISDIR(file_inode(filp)->i_mode)) { lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE; lc->is_dir = true; From 6c4a878e1cc842cd2d1996243d5190ff067b087c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 21 Aug 2024 14:05:00 -0400 Subject: [PATCH 193/268] NFSv4: Add missing rescheduling points in nfs_client_return_marked_delegations [ Upstream commit a017ad1313fc91bdf235097fd0a02f673fc7bb11 ] We're seeing reports of soft lockups when iterating through the loops, so let's add rescheduling points. Signed-off-by: Trond Myklebust Reviewed-by: Jeff Layton Signed-off-by: Anna Schumaker Signed-off-by: Sasha Levin --- fs/nfs/super.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0d6473cb00cb3e..f63513e477c50c 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,7 @@ static int __nfs_list_for_each_server(struct list_head *head, ret = fn(server, data); if (ret) goto out; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); From 193b1fc1cbcab14983c572e8439e4f6c866b6564 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 13 Dec 2023 21:29:58 +0100 Subject: [PATCH 194/268] riscv: Use WRITE_ONCE() when setting page table entries commit c30fa83b49897e708a52e122dd10616a52a4c82b upstream. To avoid any compiler "weirdness" when accessing page table entries which are concurrently modified by the HW, let's use WRITE_ONCE() macro (commit 20a004e7b017 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables") gives a great explanation with more details). Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231213203001.179237-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/pgtable-64.h | 6 +++--- arch/riscv/include/asm/pgtable.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 7a5097202e1570..a65a352dcfbfb4 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -198,7 +198,7 @@ static inline int pud_user(pud_t pud) static inline void set_pud(pud_t *pudp, pud_t pud) { - *pudp = pud; + WRITE_ONCE(*pudp, pud); } static inline void pud_clear(pud_t *pudp) @@ -274,7 +274,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { if (pgtable_l4_enabled) - *p4dp = p4d; + WRITE_ONCE(*p4dp, p4d); else set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); } @@ -347,7 +347,7 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { if (pgtable_l5_enabled) - *pgdp = pgd; + WRITE_ONCE(*pgdp, pgd); else set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 719c3041ae1c26..f8e72df4113a65 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -248,7 +248,7 @@ static inline int pmd_leaf(pmd_t pmd) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); } static inline void pmd_clear(pmd_t *pmdp) @@ -515,7 +515,7 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) */ static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); } void flush_icache_pte(pte_t pte); From 1a8b2391e06e9917c3a48526f56b0dada51a5629 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 13 Dec 2023 21:29:59 +0100 Subject: [PATCH 195/268] mm: Introduce pudp/p4dp/pgdp_get() functions commit eba2591d99d1f14a04c8a8a845ab0795b93f5646 upstream. Instead of directly dereferencing page tables entries, which can cause issues (see commit 20a004e7b017 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables"), let's introduce new functions to get the pud/p4d/pgd entries (the pte and pmd versions already exist). Note that arm pgd_t is actually an array so pgdp_get() is defined as a macro to avoid a build error. Those new functions will be used in subsequent commits by the riscv architecture. Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231213203001.179237-3-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/pgtable.h | 2 ++ include/linux/pgtable.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 16b02f44c7d312..d657b84b6bf706 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -151,6 +151,8 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +#define pgdp_get(pgpd) READ_ONCE(*pgdp) + #define pud_page(pud) pmd_page(__pmd(pud_val(pud))) #define pud_write(pud) pmd_write(__pmd(pud_val(pud))) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index af7639c3b0a3a4..8b7daccd11bef2 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -292,6 +292,27 @@ static inline pmd_t pmdp_get(pmd_t *pmdp) } #endif +#ifndef pudp_get +static inline pud_t pudp_get(pud_t *pudp) +{ + return READ_ONCE(*pudp); +} +#endif + +#ifndef p4dp_get +static inline p4d_t p4dp_get(p4d_t *p4dp) +{ + return READ_ONCE(*p4dp); +} +#endif + +#ifndef pgdp_get +static inline pgd_t pgdp_get(pgd_t *pgdp) +{ + return READ_ONCE(*pgdp); +} +#endif + #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, From 59c9160a7ee3ff1cbabf5cd238a5b68819624d7d Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 13 Dec 2023 21:30:00 +0100 Subject: [PATCH 196/268] riscv: mm: Only compile pgtable.c if MMU commit d6508999d1882ddd0db8b3b4bd7967d83e9909fa upstream. All functions defined in there depend on MMU, so no need to compile it for !MMU configs. Signed-off-by: Alexandre Ghiti Link: https://lore.kernel.org/r/20231213203001.179237-4-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- arch/riscv/mm/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 3a4dfc8babcf8c..2c869f8026a889 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,10 +13,9 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o obj-y += cacheflush.o obj-y += context.o -obj-y += pgtable.o obj-y += pmem.o ifeq ($(CONFIG_MMU),y) From e0316069fae7fbc7a89e024103eb76a3c4ce5dce Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 13 Dec 2023 21:30:01 +0100 Subject: [PATCH 197/268] riscv: Use accessors to page table entries instead of direct dereference commit edf955647269422e387732870d04fc15933a25ea upstream. As very well explained in commit 20a004e7b017 ("arm64: mm: Use READ_ONCE/WRITE_ONCE when accessing page tables"), an architecture whose page table walker can modify the PTE in parallel must use READ_ONCE()/WRITE_ONCE() macro to avoid any compiler transformation. So apply that to riscv which is such architecture. Signed-off-by: Alexandre Ghiti Acked-by: Anup Patel Link: https://lore.kernel.org/r/20231213203001.179237-5-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- arch/riscv/include/asm/kfence.h | 4 +-- arch/riscv/include/asm/pgtable-64.h | 16 ++------- arch/riscv/include/asm/pgtable.h | 29 ++++------------ arch/riscv/kernel/efi.c | 2 +- arch/riscv/kvm/mmu.c | 22 ++++++------- arch/riscv/mm/fault.c | 16 ++++----- arch/riscv/mm/hugetlbpage.c | 12 +++---- arch/riscv/mm/kasan_init.c | 45 +++++++++++++------------ arch/riscv/mm/pageattr.c | 44 ++++++++++++------------- arch/riscv/mm/pgtable.c | 51 ++++++++++++++++++++++++++--- 10 files changed, 128 insertions(+), 113 deletions(-) diff --git a/arch/riscv/include/asm/kfence.h b/arch/riscv/include/asm/kfence.h index 0bbffd528096d9..7388edd88986f9 100644 --- a/arch/riscv/include/asm/kfence.h +++ b/arch/riscv/include/asm/kfence.h @@ -18,9 +18,9 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) pte_t *pte = virt_to_kpte(addr); if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~_PAGE_PRESENT)); else - set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | _PAGE_PRESENT)); flush_tlb_kernel_range(addr, addr + PAGE_SIZE); diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index a65a352dcfbfb4..3272ca7a5270bf 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -336,13 +336,7 @@ static inline struct page *p4d_page(p4d_t p4d) #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) #define pud_offset pud_offset -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) -{ - if (pgtable_l4_enabled) - return p4d_pgtable(*p4d) + pud_index(address); - - return (pud_t *)p4d; -} +pud_t *pud_offset(p4d_t *p4d, unsigned long address); static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -400,12 +394,6 @@ static inline struct page *pgd_page(pgd_t pgd) #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) #define p4d_offset p4d_offset -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) -{ - if (pgtable_l5_enabled) - return pgd_pgtable(*pgd) + p4d_index(address); - - return (p4d_t *)pgd; -} +p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index f8e72df4113a65..37829dab4a0a48 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -549,19 +549,12 @@ static inline void pte_clear(struct mm_struct *mm, __set_pte_at(ptep, __pte(0)); } -#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -static inline int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, - pte_t entry, int dirty) -{ - if (!pte_same(*ptep, entry)) - __set_pte_at(ptep, entry); - /* - * update_mmu_cache will unconditionally execute, handling both - * the case that the PTE changed and the spurious fault case. - */ - return true; -} +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS /* defined in mm/pgtable.c */ +extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep, pte_t entry, int dirty); +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG /* defined in mm/pgtable.c */ +extern int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, + pte_t *ptep); #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, @@ -574,16 +567,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, return pte; } -#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, - pte_t *ptep) -{ - if (!pte_young(*ptep)) - return 0; - return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep)); -} - #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c index aa6209a74c83ff..b64bf1624a0529 100644 --- a/arch/riscv/kernel/efi.c +++ b/arch/riscv/kernel/efi.c @@ -60,7 +60,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; - pte_t pte = READ_ONCE(*ptep); + pte_t pte = ptep_get(ptep); unsigned long val; if (md->attribute & EFI_MEMORY_RO) { diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 068c7459387102..a9e2fd7245e1e9 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -103,7 +103,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr, *ptep_level = current_level; ptep = (pte_t *)kvm->arch.pgd; ptep = &ptep[gstage_pte_index(addr, current_level)]; - while (ptep && pte_val(*ptep)) { + while (ptep && pte_val(ptep_get(ptep))) { if (gstage_pte_leaf(ptep)) { *ptep_level = current_level; *ptepp = ptep; @@ -113,7 +113,7 @@ static bool gstage_get_leaf_entry(struct kvm *kvm, gpa_t addr, if (current_level) { current_level--; *ptep_level = current_level; - ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); + ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); ptep = &ptep[gstage_pte_index(addr, current_level)]; } else { ptep = NULL; @@ -149,25 +149,25 @@ static int gstage_set_pte(struct kvm *kvm, u32 level, if (gstage_pte_leaf(ptep)) return -EEXIST; - if (!pte_val(*ptep)) { + if (!pte_val(ptep_get(ptep))) { if (!pcache) return -ENOMEM; next_ptep = kvm_mmu_memory_cache_alloc(pcache); if (!next_ptep) return -ENOMEM; - *ptep = pfn_pte(PFN_DOWN(__pa(next_ptep)), - __pgprot(_PAGE_TABLE)); + set_pte(ptep, pfn_pte(PFN_DOWN(__pa(next_ptep)), + __pgprot(_PAGE_TABLE))); } else { if (gstage_pte_leaf(ptep)) return -EEXIST; - next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); } current_level--; ptep = &next_ptep[gstage_pte_index(addr, current_level)]; } - *ptep = *new_pte; + set_pte(ptep, *new_pte); if (gstage_pte_leaf(ptep)) gstage_remote_tlb_flush(kvm, current_level, addr); @@ -239,11 +239,11 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr, BUG_ON(addr & (page_size - 1)); - if (!pte_val(*ptep)) + if (!pte_val(ptep_get(ptep))) return; if (ptep_level && !gstage_pte_leaf(ptep)) { - next_ptep = (pte_t *)gstage_pte_page_vaddr(*ptep); + next_ptep = (pte_t *)gstage_pte_page_vaddr(ptep_get(ptep)); next_ptep_level = ptep_level - 1; ret = gstage_level_to_page_size(next_ptep_level, &next_page_size); @@ -261,7 +261,7 @@ static void gstage_op_pte(struct kvm *kvm, gpa_t addr, if (op == GSTAGE_OP_CLEAR) set_pte(ptep, __pte(0)); else if (op == GSTAGE_OP_WP) - set_pte(ptep, __pte(pte_val(*ptep) & ~_PAGE_WRITE)); + set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE)); gstage_remote_tlb_flush(kvm, ptep_level, addr); } } @@ -603,7 +603,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) &ptep, &ptep_level)) return false; - return pte_young(*ptep); + return pte_young(ptep_get(ptep)); } int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu, diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 655b2b1bb529f3..8960f4c844976f 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -137,24 +137,24 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a pgd = (pgd_t *)pfn_to_virt(pfn) + index; pgd_k = init_mm.pgd + index; - if (!pgd_present(*pgd_k)) { + if (!pgd_present(pgdp_get(pgd_k))) { no_context(regs, addr); return; } - set_pgd(pgd, *pgd_k); + set_pgd(pgd, pgdp_get(pgd_k)); p4d_k = p4d_offset(pgd_k, addr); - if (!p4d_present(*p4d_k)) { + if (!p4d_present(p4dp_get(p4d_k))) { no_context(regs, addr); return; } pud_k = pud_offset(p4d_k, addr); - if (!pud_present(*pud_k)) { + if (!pud_present(pudp_get(pud_k))) { no_context(regs, addr); return; } - if (pud_leaf(*pud_k)) + if (pud_leaf(pudp_get(pud_k))) goto flush_tlb; /* @@ -162,11 +162,11 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * to copy individual PTEs */ pmd_k = pmd_offset(pud_k, addr); - if (!pmd_present(*pmd_k)) { + if (!pmd_present(pmdp_get(pmd_k))) { no_context(regs, addr); return; } - if (pmd_leaf(*pmd_k)) + if (pmd_leaf(pmdp_get(pmd_k))) goto flush_tlb; /* @@ -176,7 +176,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a * silently loop forever. */ pte_k = pte_offset_kernel(pmd_k, addr); - if (!pte_present(*pte_k)) { + if (!pte_present(ptep_get(pte_k))) { no_context(regs, addr); return; } diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index fbe918801667d5..5ef2a6891158a6 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -54,7 +54,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, } if (sz == PMD_SIZE) { - if (want_pmd_share(vma, addr) && pud_none(*pud)) + if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud))) pte = huge_pmd_share(mm, vma, addr, pud); else pte = (pte_t *)pmd_alloc(mm, pud, addr); @@ -93,11 +93,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, pmd_t *pmd; pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) return NULL; p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) return NULL; pud = pud_offset(p4d, addr); @@ -105,7 +105,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, /* must be pud huge, non-present or none */ return (pte_t *)pud; - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) return NULL; pmd = pmd_offset(pud, addr); @@ -113,7 +113,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm, /* must be pmd huge, non-present or none */ return (pte_t *)pmd; - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) return NULL; for_each_napot_order(order) { @@ -351,7 +351,7 @@ void huge_pte_clear(struct mm_struct *mm, pte_t *ptep, unsigned long sz) { - pte_t pte = READ_ONCE(*ptep); + pte_t pte = ptep_get(ptep); int i, pte_num; if (!pte_napot(pte)) { diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 5e39dcf23fdbc1..e962518530373d 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -31,7 +31,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned phys_addr_t phys_addr; pte_t *ptep, *p; - if (pmd_none(*pmd)) { + if (pmd_none(pmdp_get(pmd))) { p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE); set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -39,7 +39,7 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned ptep = pte_offset_kernel(pmd, vaddr); do { - if (pte_none(*ptep)) { + if (pte_none(ptep_get(ptep))) { phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL)); memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE); @@ -53,7 +53,7 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned pmd_t *pmdp, *p; unsigned long next; - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -63,7 +63,8 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned do { next = pmd_addr_end(vaddr, end); - if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) { + if (pmd_none(pmdp_get(pmdp)) && IS_ALIGNED(vaddr, PMD_SIZE) && + (next - vaddr) >= PMD_SIZE) { phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE); if (phys_addr) { set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL)); @@ -83,7 +84,7 @@ static void __init kasan_populate_pud(p4d_t *p4d, pud_t *pudp, *p; unsigned long next; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -93,7 +94,8 @@ static void __init kasan_populate_pud(p4d_t *p4d, do { next = pud_addr_end(vaddr, end); - if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { + if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) && + (next - vaddr) >= PUD_SIZE) { phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); if (phys_addr) { set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); @@ -113,7 +115,7 @@ static void __init kasan_populate_p4d(pgd_t *pgd, p4d_t *p4dp, *p; unsigned long next; - if (pgd_none(*pgd)) { + if (pgd_none(pgdp_get(pgd))) { p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE); set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } @@ -123,7 +125,8 @@ static void __init kasan_populate_p4d(pgd_t *pgd, do { next = p4d_addr_end(vaddr, end); - if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { + if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) && + (next - vaddr) >= P4D_SIZE) { phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE); if (phys_addr) { set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL)); @@ -145,7 +148,7 @@ static void __init kasan_populate_pgd(pgd_t *pgdp, do { next = pgd_addr_end(vaddr, end); - if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); if (phys_addr) { @@ -168,7 +171,7 @@ static void __init kasan_early_clear_pud(p4d_t *p4dp, if (!pgtable_l4_enabled) { pudp = (pud_t *)p4dp; } else { - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp)))); pudp = base_pud + pud_index(vaddr); } @@ -193,7 +196,7 @@ static void __init kasan_early_clear_p4d(pgd_t *pgdp, if (!pgtable_l5_enabled) { p4dp = (p4d_t *)pgdp; } else { - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp)))); p4dp = base_p4d + p4d_index(vaddr); } @@ -239,14 +242,14 @@ static void __init kasan_early_populate_pud(p4d_t *p4dp, if (!pgtable_l4_enabled) { pudp = (pud_t *)p4dp; } else { - base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp))); + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(p4dp_get(p4dp)))); pudp = base_pud + pud_index(vaddr); } do { next = pud_addr_end(vaddr, end); - if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && + if (pud_none(pudp_get(pudp)) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd); set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); @@ -277,14 +280,14 @@ static void __init kasan_early_populate_p4d(pgd_t *pgdp, if (!pgtable_l5_enabled) { p4dp = (p4d_t *)pgdp; } else { - base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp))); + base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(pgdp_get(pgdp)))); p4dp = base_p4d + p4d_index(vaddr); } do { next = p4d_addr_end(vaddr, end); - if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && + if (p4d_none(p4dp_get(p4dp)) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) { phys_addr = __pa((uintptr_t)kasan_early_shadow_pud); set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE)); @@ -305,7 +308,7 @@ static void __init kasan_early_populate_pgd(pgd_t *pgdp, do { next = pgd_addr_end(vaddr, end); - if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) && + if (pgd_none(pgdp_get(pgdp)) && IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d); set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); @@ -381,7 +384,7 @@ static void __init kasan_shallow_populate_pud(p4d_t *p4d, do { next = pud_addr_end(vaddr, end); - if (pud_none(*pud_k)) { + if (pud_none(pudp_get(pud_k))) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; @@ -401,7 +404,7 @@ static void __init kasan_shallow_populate_p4d(pgd_t *pgd, do { next = p4d_addr_end(vaddr, end); - if (p4d_none(*p4d_k)) { + if (p4d_none(p4dp_get(p4d_k))) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; @@ -420,7 +423,7 @@ static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long do { next = pgd_addr_end(vaddr, end); - if (pgd_none(*pgd_k)) { + if (pgd_none(pgdp_get(pgd_k))) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); continue; @@ -451,7 +454,7 @@ static void __init create_tmp_mapping(void) /* Copy the last p4d since it is shared with the kernel mapping. */ if (pgtable_l5_enabled) { - ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); + ptr = (p4d_t *)pgd_page_vaddr(pgdp_get(pgd_offset_k(KASAN_SHADOW_END))); memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D); set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)], pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE)); @@ -462,7 +465,7 @@ static void __init create_tmp_mapping(void) /* Copy the last pud since it is shared with the kernel mapping. */ if (pgtable_l4_enabled) { - ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END))); + ptr = (pud_t *)p4d_page_vaddr(p4dp_get(base_p4d + p4d_index(KASAN_SHADOW_END))); memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD); set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)], pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE)); diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index f61b2f8291e357..271d01a5ba4da1 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -29,7 +29,7 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, unsigned long next, struct mm_walk *walk) { - p4d_t val = READ_ONCE(*p4d); + p4d_t val = p4dp_get(p4d); if (p4d_leaf(val)) { val = __p4d(set_pageattr_masks(p4d_val(val), walk)); @@ -42,7 +42,7 @@ static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, static int pageattr_pud_entry(pud_t *pud, unsigned long addr, unsigned long next, struct mm_walk *walk) { - pud_t val = READ_ONCE(*pud); + pud_t val = pudp_get(pud); if (pud_leaf(val)) { val = __pud(set_pageattr_masks(pud_val(val), walk)); @@ -55,7 +55,7 @@ static int pageattr_pud_entry(pud_t *pud, unsigned long addr, static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { - pmd_t val = READ_ONCE(*pmd); + pmd_t val = pmdp_get(pmd); if (pmd_leaf(val)) { val = __pmd(set_pageattr_masks(pmd_val(val), walk)); @@ -68,7 +68,7 @@ static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, static int pageattr_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { - pte_t val = READ_ONCE(*pte); + pte_t val = ptep_get(pte); val = __pte(set_pageattr_masks(pte_val(val), walk)); set_pte(pte, val); @@ -108,10 +108,10 @@ static int __split_linear_mapping_pmd(pud_t *pudp, vaddr <= (vaddr & PMD_MASK) && end >= next) continue; - if (pmd_leaf(*pmdp)) { + if (pmd_leaf(pmdp_get(pmdp))) { struct page *pte_page; - unsigned long pfn = _pmd_pfn(*pmdp); - pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK); + unsigned long pfn = _pmd_pfn(pmdp_get(pmdp)); + pgprot_t prot = __pgprot(pmd_val(pmdp_get(pmdp)) & ~_PAGE_PFN_MASK); pte_t *ptep_new; int i; @@ -148,10 +148,10 @@ static int __split_linear_mapping_pud(p4d_t *p4dp, vaddr <= (vaddr & PUD_MASK) && end >= next) continue; - if (pud_leaf(*pudp)) { + if (pud_leaf(pudp_get(pudp))) { struct page *pmd_page; - unsigned long pfn = _pud_pfn(*pudp); - pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK); + unsigned long pfn = _pud_pfn(pudp_get(pudp)); + pgprot_t prot = __pgprot(pud_val(pudp_get(pudp)) & ~_PAGE_PFN_MASK); pmd_t *pmdp_new; int i; @@ -197,10 +197,10 @@ static int __split_linear_mapping_p4d(pgd_t *pgdp, vaddr <= (vaddr & P4D_MASK) && end >= next) continue; - if (p4d_leaf(*p4dp)) { + if (p4d_leaf(p4dp_get(p4dp))) { struct page *pud_page; - unsigned long pfn = _p4d_pfn(*p4dp); - pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK); + unsigned long pfn = _p4d_pfn(p4dp_get(p4dp)); + pgprot_t prot = __pgprot(p4d_val(p4dp_get(p4dp)) & ~_PAGE_PFN_MASK); pud_t *pudp_new; int i; @@ -427,29 +427,29 @@ bool kernel_page_present(struct page *page) pte_t *pte; pgd = pgd_offset_k(addr); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) return false; - if (pgd_leaf(*pgd)) + if (pgd_leaf(pgdp_get(pgd))) return true; p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) return false; - if (p4d_leaf(*p4d)) + if (p4d_leaf(p4dp_get(p4d))) return true; pud = pud_offset(p4d, addr); - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) return false; - if (pud_leaf(*pud)) + if (pud_leaf(pudp_get(pud))) return true; pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) return false; - if (pmd_leaf(*pmd)) + if (pmd_leaf(pmdp_get(pmd))) return true; pte = pte_offset_kernel(pmd, addr); - return pte_present(*pte); + return pte_present(ptep_get(pte)); } diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index fef4e7328e4905..ef887efcb67900 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -5,6 +5,47 @@ #include #include +int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + if (!pte_same(ptep_get(ptep), entry)) + __set_pte_at(ptep, entry); + /* + * update_mmu_cache will unconditionally execute, handling both + * the case that the PTE changed and the spurious fault case. + */ + return true; +} + +int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) +{ + if (!pte_young(ptep_get(ptep))) + return 0; + return test_and_clear_bit(_PAGE_ACCESSED_OFFSET, &pte_val(*ptep)); +} +EXPORT_SYMBOL_GPL(ptep_test_and_clear_young); + +#ifdef CONFIG_64BIT +pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + if (pgtable_l4_enabled) + return p4d_pgtable(p4dp_get(p4d)) + pud_index(address); + + return (pud_t *)p4d; +} + +p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +{ + if (pgtable_l5_enabled) + return pgd_pgtable(pgdp_get(pgd)) + p4d_index(address); + + return (p4d_t *)pgd; +} +#endif + #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { @@ -25,7 +66,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) int pud_clear_huge(pud_t *pud) { - if (!pud_leaf(READ_ONCE(*pud))) + if (!pud_leaf(pudp_get(pud))) return 0; pud_clear(pud); return 1; @@ -33,7 +74,7 @@ int pud_clear_huge(pud_t *pud) int pud_free_pmd_page(pud_t *pud, unsigned long addr) { - pmd_t *pmd = pud_pgtable(*pud); + pmd_t *pmd = pud_pgtable(pudp_get(pud)); int i; pud_clear(pud); @@ -63,7 +104,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { - if (!pmd_leaf(READ_ONCE(*pmd))) + if (!pmd_leaf(pmdp_get(pmd))) return 0; pmd_clear(pmd); return 1; @@ -71,7 +112,7 @@ int pmd_clear_huge(pmd_t *pmd) int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { - pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + pte_t *pte = (pte_t *)pmd_page_vaddr(pmdp_get(pmd)); pmd_clear(pmd); @@ -88,7 +129,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmdp))); /* * When leaf PTE entries (regular pages) are collapsed into a leaf * PMD entry (huge page), a valid non-leaf PTE is converted into a From 0b983c08ca810470a7020c212d6f6b0e38926928 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Fri, 19 Jan 2024 17:04:57 +0800 Subject: [PATCH 198/268] ACPI: CPPC: Add helper to get the highest performance value commit 12753d71e8c5c3e716cedba23ddeed508da0bdc4 upstream. Add support for getting the highest performance to the generic CPPC driver. This enables downstream drivers such as amd-pstate to discover and use these values. Refer to Chapter 8.4.6.1.1.1. Highest Performance of ACPI Specification 6.5 for details on continuous performance control of CPPC (linked below). Tested-by: Oleksandr Natalenko Reviewed-by: Mario Limonciello Reviewed-by: Wyes Karny Reviewed-by: Perry Yuan Acked-by: Huang Rui Signed-off-by: Meng Li Link: https://uefi.org/specs/ACPI/6.5/08_Processor_Configuration_and_Control.html?highlight=cppc#highest-performance [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/cppc_acpi.c | 13 +++++++++++++ include/acpi/cppc_acpi.h | 5 +++++ 2 files changed, 18 insertions(+) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d3b9da75a8155a..d6934ba7a3154e 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1196,6 +1196,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf); } +/** + * cppc_get_highest_perf - Get the highest performance register value. + * @cpunum: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_highest_perf); + /** * cppc_get_epp_perf - Get the epp register value. * @cpunum: CPU from which to get epp preference value. diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index 6126c977ece041..c0b69ffe7bdb4b 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -139,6 +139,7 @@ struct cppc_cpudata { #ifdef CONFIG_ACPI_CPPC_LIB extern int cppc_get_desired_perf(int cpunum, u64 *desired_perf); extern int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf); +extern int cppc_get_highest_perf(int cpunum, u64 *highest_perf); extern int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs); extern int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls); extern int cppc_set_enable(int cpu, bool enable); @@ -165,6 +166,10 @@ static inline int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) { return -ENOTSUPP; } +static inline int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return -ENOTSUPP; +} static inline int cppc_get_perf_ctrs(int cpu, struct cppc_perf_fb_ctrs *perf_fb_ctrs) { return -ENOTSUPP; From 1ec40a175a4841ad017022228fb99be8c6bac727 Mon Sep 17 00:00:00 2001 From: Meng Li Date: Fri, 19 Jan 2024 17:04:58 +0800 Subject: [PATCH 199/268] cpufreq: amd-pstate: Enable amd-pstate preferred core support commit f3a052391822b772b4e27f2594526cf1eb103cab upstream. amd-pstate driver utilizes the functions and data structures provided by the ITMT architecture to enable the scheduler to favor scheduling on cores which can be get a higher frequency with lower voltage. We call it amd-pstate preferrred core. Here sched_set_itmt_core_prio() is called to set priorities and sched_set_itmt_support() is called to enable ITMT feature. amd-pstate driver uses the highest performance value to indicate the priority of CPU. The higher value has a higher priority. The initial core rankings are set up by amd-pstate when the system boots. Add a variable hw_prefcore in cpudata structure. It will check if the processor and power firmware support preferred core feature. Add one new early parameter `disable` to allow user to disable the preferred core. Only when hardware supports preferred core and user set `enabled` in early parameter, amd pstate driver supports preferred core featue. Tested-by: Oleksandr Natalenko Reviewed-by: Huang Rui Reviewed-by: Wyes Karny Reviewed-by: Mario Limonciello Co-developed-by: Perry Yuan Signed-off-by: Perry Yuan Signed-off-by: Meng Li Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 131 ++++++++++++++++++++++++++++++++--- include/linux/amd-pstate.h | 4 ++ 2 files changed, 127 insertions(+), 8 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 23c74e9f04c48c..91905529ce311e 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -49,6 +50,7 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_PREFCORE_THRESHOLD 166 /* * TODO: We need more time to fine tune processors with shared memory solution @@ -64,6 +66,7 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; /* * AMD Energy Preference Performance (EPP) @@ -320,13 +323,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + /* For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as + * the default max perf. */ - highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -347,8 +351,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -709,6 +714,80 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); +} + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + int ret, prio; + u32 highest_perf; + + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + if (ret) + return; + + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } + + if (!amd_pstate_prefcore) + return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); + + schedule_work(&sched_prefcore_work); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -730,6 +809,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -880,6 +961,17 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1077,18 +1169,27 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_hw_prefcore, NULL, }; @@ -1096,6 +1197,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1103,6 +1205,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1154,6 +1257,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -1577,7 +1682,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h index 6ad02ad9c7b42d..68fc1bd8d851e9 100644 --- a/include/linux/amd-pstate.h +++ b/include/linux/amd-pstate.h @@ -52,6 +52,9 @@ struct amd_aperf_mperf { * @prev: Last Aperf/Mperf/tsc count value read from register * @freq: current cpu frequency value * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. * @epp_policy: Last saved policy used to set energy-performance preference * @epp_cached: Cached CPPC energy-performance preference value * @policy: Cpufreq policy value @@ -85,6 +88,7 @@ struct amd_cpudata { u64 freq; bool boost_supported; + bool hw_prefcore; /* EPP feature related attributes*/ s16 epp_policy; From 3b843046dbbed7de61e44832dffeeca9417ec34f Mon Sep 17 00:00:00 2001 From: Perry Yuan Date: Wed, 8 May 2024 13:47:03 +0800 Subject: [PATCH 200/268] cpufreq: amd-pstate: fix the highest frequency issue which limits performance commit bf202e654bfa57fb8cf9d93d4c6855890b70b9c4 upstream. To address the performance drop issue, an optimization has been implemented. The incorrect highest performance value previously set by the low-level power firmware for AMD CPUs with Family ID 0x19 and Model ID ranging from 0x70 to 0x7F series has been identified as the cause. To resolve this, a check has been implemented to accurately determine the CPU family and model ID. The correct highest performance value is now set and the performance drop caused by the incorrect highest performance value are eliminated. Before the fix, the highest frequency was set to 4200MHz, now it is set to 4971MHz which is correct. CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ MHZ 0 0 0 0 0:0:0:0 yes 4971.0000 400.0000 400.0000 1 0 0 0 0:0:0:0 yes 4971.0000 400.0000 400.0000 2 0 0 1 1:1:1:0 yes 4971.0000 400.0000 4865.8140 3 0 0 1 1:1:1:0 yes 4971.0000 400.0000 400.0000 Fixes: f3a052391822 ("cpufreq: amd-pstate: Enable amd-pstate preferred core support") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218759 Signed-off-by: Perry Yuan Co-developed-by: Mario Limonciello Signed-off-by: Mario Limonciello Tested-by: Gaha Bana Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/cpufreq/amd-pstate.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 91905529ce311e..f461f99eb040c6 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -50,7 +50,8 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 -#define AMD_PSTATE_PREFCORE_THRESHOLD 166 +#define CPPC_HIGHEST_PERF_PERFORMANCE 196 +#define CPPC_HIGHEST_PERF_DEFAULT 166 /* * TODO: We need more time to fine tune processors with shared memory solution @@ -313,6 +314,21 @@ static inline int amd_pstate_enable(bool enable) return static_call(amd_pstate_enable)(enable); } +static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + /* + * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, + * the highest performance level is set to 196. + * https://bugzilla.kernel.org/show_bug.cgi?id=218759 + */ + if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f)) + return CPPC_HIGHEST_PERF_PERFORMANCE; + + return CPPC_HIGHEST_PERF_DEFAULT; +} + static int pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; @@ -329,7 +345,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) * the default max perf. */ if (cpudata->hw_prefcore) - highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + highest_perf = amd_pstate_highest_perf_set(cpudata); else highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); @@ -352,7 +368,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) return ret; if (cpudata->hw_prefcore) - highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + highest_perf = amd_pstate_highest_perf_set(cpudata); else highest_perf = cppc_perf.highest_perf; From 4fe707a2978929b498d3730d77a6ab103881420d Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Wed, 24 Jul 2024 12:25:16 +0200 Subject: [PATCH 201/268] tcp: process the 3rd ACK with sk_socket for TFO/MPTCP commit c1668292689ad2ee16c9c1750a8044b0b0aad663 upstream. The 'Fixes' commit recently changed the behaviour of TCP by skipping the processing of the 3rd ACK when a sk->sk_socket is set. The goal was to skip tcp_ack_snd_check() in tcp_rcv_state_process() not to send an unnecessary ACK in case of simultaneous connect(). Unfortunately, that had an impact on TFO and MPTCP. I started to look at the impact on MPTCP, because the MPTCP CI found some issues with the MPTCP Packetdrill tests [1]. Then Paolo Abeni suggested me to look at the impact on TFO with "plain" TCP. For MPTCP, when receiving the 3rd ACK of a request adding a new path (MP_JOIN), sk->sk_socket will be set, and point to the MPTCP sock that has been created when the MPTCP connection got established before with the first path. The newly added 'goto' will then skip the processing of the segment text (step 7) and not go through tcp_data_queue() where the MPTCP options are validated, and some actions are triggered, e.g. sending the MPJ 4th ACK [2] as demonstrated by the new errors when running a packetdrill test [3] establishing a second subflow. This doesn't fully break MPTCP, mainly the 4th MPJ ACK that will be delayed. Still, we don't want to have this behaviour as it delays the switch to the fully established mode, and invalid MPTCP options in this 3rd ACK will not be caught any more. This modification also affects the MPTCP + TFO feature as well, and being the reason why the selftests started to be unstable the last few days [4]. For TFO, the existing 'basic-cookie-not-reqd' test [5] was no longer passing: if the 3rd ACK contains data, and the connection is accept()ed before receiving them, these data would no longer be processed, and thus not ACKed. One last thing about MPTCP, in case of simultaneous connect(), a fallback to TCP will be done, which seems fine: `../common/defaults.sh` 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_MPTCP) = 3 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0 > S 0:0(0) +0 < S 0:0(0) win 1000 +0 > S. 0:0(0) ack 1 +0 < S. 0:0(0) ack 1 win 65535 +0 > . 1:1(0) ack 1 Simultaneous SYN-data crossing is also not supported by TFO, see [6]. Kuniyuki Iwashima suggested to restrict the processing to SYN+ACK only: that's a more generic solution than the one initially proposed, and also enough to fix the issues described above. Later on, Eric Dumazet mentioned that an ACK should still be sent in reaction to the second SYN+ACK that is received: not sending a DUPACK here seems wrong and could hurt: 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0 > S 0:0(0) +0 < S 0:0(0) win 1000 +0 > S. 0:0(0) ack 1 +0 < S. 0:0(0) ack 1 win 1000 +0 > . 1:1(0) ack 1 // <== Here So in this version, the 'goto consume' is dropped, to always send an ACK when switching from TCP_SYN_RECV to TCP_ESTABLISHED. This ACK will be seen as a DUPACK -- with DSACK if SACK has been negotiated -- in case of simultaneous SYN crossing: that's what is expected here. Link: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/9936227696 [1] Link: https://datatracker.ietf.org/doc/html/rfc8684#fig_tokens [2] Link: https://github.com/multipath-tcp/packetdrill/blob/mptcp-net-next/gtests/net/mptcp/syscalls/accept.pkt#L28 [3] Link: https://netdev.bots.linux.dev/contest.html?executor=vmksft-mptcp-dbg&test=mptcp-connect-sh [4] Link: https://github.com/google/packetdrill/blob/master/gtests/net/tcp/fastopen/server/basic-cookie-not-reqd.pkt#L21 [5] Link: https://github.com/google/packetdrill/blob/master/gtests/net/tcp/fastopen/client/simultaneous-fast-open.pkt [6] Fixes: 23e89e8ee7be ("tcp: Don't drop SYN+ACK for simultaneous connect().") Suggested-by: Paolo Abeni Suggested-by: Kuniyuki Iwashima Suggested-by: Eric Dumazet Signed-off-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240724-upstream-net-next-20240716-tcp-3rd-ack-consume-sk_socket-v3-1-d48339764ce9@kernel.org Signed-off-by: Paolo Abeni Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 336bc97e86d5ee..b565c8a8e7badf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6679,9 +6679,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_fast_path_on(tp); if (sk->sk_shutdown & SEND_SHUTDOWN) tcp_shutdown(sk, SEND_SHUTDOWN); - - if (sk->sk_socket) - goto consume; break; case TCP_FIN_WAIT1: { From 5c007a9804507f3cf46f326ab281ef506117f3de Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 18 Feb 2024 09:42:21 +0200 Subject: [PATCH 202/268] intel: legacy: Partial revert of field get conversion commit ba54b1a276a6b69d80649942fe5334d19851443e upstream. Refactoring of the field get conversion introduced a regression in the legacy Wake On Lan from a magic packet with i219 devices. Rx address copied not correctly from MAC to PHY with FIELD_GET macro. Fixes: b9a452545075 ("intel: legacy: field get conversion") Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Naama Meir Signed-off-by: Tony Nguyen Cc: Florian Larysch Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 4d83c9a0c023a7..f9e94be36e97f2 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -2573,7 +2573,7 @@ void e1000_copy_rx_addrs_to_phy_ich8lan(struct e1000_hw *hw) hw->phy.ops.write_reg_page(hw, BM_RAR_H(i), (u16)(mac_reg & 0xFFFF)); hw->phy.ops.write_reg_page(hw, BM_RAR_CTRL(i), - FIELD_GET(E1000_RAH_AV, mac_reg)); + (u16)((mac_reg & E1000_RAH_AV) >> 16)); } e1000_disable_phy_wakeup_reg_access_bm(hw, &phy_reg); From dc12e49f970b08d8b007b8981b97e2eb93c0e89d Mon Sep 17 00:00:00 2001 From: Aleksandr Mishin Date: Wed, 3 Jul 2024 18:45:06 +0300 Subject: [PATCH 203/268] staging: iio: frequency: ad9834: Validate frequency parameter value commit b48aa991758999d4e8f9296c5bbe388f293ef465 upstream. In ad9834_write_frequency() clk_get_rate() can return 0. In such case ad9834_calc_freqreg() call will lead to division by zero. Checking 'if (fout > (clk_freq / 2))' doesn't protect in case of 'fout' is 0. ad9834_write_frequency() is called from ad9834_write(), where fout is taken from text buffer, which can contain any value. Modify parameters checking. Found by Linux Verification Center (linuxtesting.org) with SVACE. Fixes: 12b9d5bf76bf ("Staging: IIO: DDS: AD9833 / AD9834 driver") Suggested-by: Dan Carpenter Signed-off-by: Aleksandr Mishin Reviewed-by: Dan Carpenter Link: https://patch.msgid.link/20240703154506.25584-1-amishin@t-argos.ru Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/staging/iio/frequency/ad9834.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c index 285df0e489a625..cfdfe66d74f17c 100644 --- a/drivers/staging/iio/frequency/ad9834.c +++ b/drivers/staging/iio/frequency/ad9834.c @@ -114,7 +114,7 @@ static int ad9834_write_frequency(struct ad9834_state *st, clk_freq = clk_get_rate(st->mclk); - if (fout > (clk_freq / 2)) + if (!clk_freq || fout > (clk_freq / 2)) return -EINVAL; regval = ad9834_calc_freqreg(clk_freq, fout); From cb0f3f0c102ba2ef07a80e7bcad9f8dee61c81e9 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Tue, 23 Jul 2024 11:32:21 -0500 Subject: [PATCH 204/268] iio: buffer-dmaengine: fix releasing dma channel on error commit 84c65d8008764a8fb4e627ff02de01ec4245f2c4 upstream. If dma_get_slave_caps() fails, we need to release the dma channel before returning an error to avoid leaking the channel. Fixes: 2d6ca60f3284 ("iio: Add a DMAengine framework based buffer") Signed-off-by: David Lechner Link: https://patch.msgid.link/20240723-iio-fix-dmaengine-free-on-error-v1-1-2c7cbc9b92ff@baylibre.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/buffer/industrialio-buffer-dmaengine.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c index 0d53c0a07b0d6f..db5dbd60cf6759 100644 --- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c +++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c @@ -180,7 +180,7 @@ struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, ret = dma_get_slave_caps(chan, &caps); if (ret < 0) - goto err_free; + goto err_release; /* Needs to be aligned to the maximum of the minimums */ if (caps.src_addr_widths) @@ -206,6 +206,8 @@ struct iio_buffer *iio_dmaengine_buffer_alloc(struct device *dev, return &dmaengine_buffer->queue.buffer; +err_release: + dma_release_channel(chan); err_free: kfree(dmaengine_buffer); return ERR_PTR(ret); From ecc8e1bcac2778045c15de21242089926f7f806a Mon Sep 17 00:00:00 2001 From: Matteo Martelli Date: Tue, 30 Jul 2024 10:11:53 +0200 Subject: [PATCH 205/268] iio: fix scale application in iio_convert_raw_to_processed_unlocked commit 8a3dcc970dc57b358c8db2702447bf0af4e0d83a upstream. When the scale_type is IIO_VAL_INT_PLUS_MICRO or IIO_VAL_INT_PLUS_NANO the scale passed as argument is only applied to the fractional part of the value. Fix it by also multiplying the integer part by the scale provided. Fixes: 48e44ce0f881 ("iio:inkern: Add function to read the processed value") Signed-off-by: Matteo Martelli Link: https://patch.msgid.link/20240730-iio-fix-scale-v1-1-6246638c8daa@gmail.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/inkern.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index b8555653847573..80e1c45485c9b0 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -680,17 +680,17 @@ static int iio_convert_raw_to_processed_unlocked(struct iio_channel *chan, break; case IIO_VAL_INT_PLUS_MICRO: if (scale_val2 < 0) - *processed = -raw64 * scale_val; + *processed = -raw64 * scale_val * scale; else - *processed = raw64 * scale_val; + *processed = raw64 * scale_val * scale; *processed += div_s64(raw64 * (s64)scale_val2 * scale, 1000000LL); break; case IIO_VAL_INT_PLUS_NANO: if (scale_val2 < 0) - *processed = -raw64 * scale_val; + *processed = -raw64 * scale_val * scale; else - *processed = raw64 * scale_val; + *processed = raw64 * scale_val * scale; *processed += div_s64(raw64 * (s64)scale_val2 * scale, 1000000000LL); break; From fb5d58f238c46a790d6408de426c8b3702602dc2 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Wed, 31 Jul 2024 15:37:23 +0300 Subject: [PATCH 206/268] iio: adc: ad7124: fix config comparison commit 2f6b92d0f69f04d9e2ea0db1228ab7f82f3173af upstream. The ad7124_find_similar_live_cfg() computes the compare size by substracting the address of the cfg struct from the address of the live field. Because the live field is the first field in the struct, the result is 0. Also, the memcmp() call is made from the start of the cfg struct, which includes the live and cfg_slot fields, which are not relevant for the comparison. Fix by grouping the relevant fields with struct_group() and use the size of the group to compute the compare size; make the memcmp() call from the address of the group. Fixes: 7b8d045e497a ("iio: adc: ad7124: allow more than 8 channels") Signed-off-by: Dumitru Ceclan Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240731-ad7124-fix-v1-2-46a76aa4b9be@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7124.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index b9b206fcd748f5..c56227f42a5a36 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -146,15 +146,18 @@ struct ad7124_chip_info { struct ad7124_channel_config { bool live; unsigned int cfg_slot; - enum ad7124_ref_sel refsel; - bool bipolar; - bool buf_positive; - bool buf_negative; - unsigned int vref_mv; - unsigned int pga_bits; - unsigned int odr; - unsigned int odr_sel_bits; - unsigned int filter_type; + /* Following fields are used to compare equality. */ + struct_group(config_props, + enum ad7124_ref_sel refsel; + bool bipolar; + bool buf_positive; + bool buf_negative; + unsigned int vref_mv; + unsigned int pga_bits; + unsigned int odr; + unsigned int odr_sel_bits; + unsigned int filter_type; + ); }; struct ad7124_channel { @@ -333,11 +336,12 @@ static struct ad7124_channel_config *ad7124_find_similar_live_cfg(struct ad7124_ ptrdiff_t cmp_size; int i; - cmp_size = (u8 *)&cfg->live - (u8 *)cfg; + cmp_size = sizeof_field(struct ad7124_channel_config, config_props); for (i = 0; i < st->num_channels; i++) { cfg_aux = &st->channels[i].cfg; - if (cfg_aux->live && !memcmp(cfg, cfg_aux, cmp_size)) + if (cfg_aux->live && + !memcmp(&cfg->config_props, &cfg_aux->config_props, cmp_size)) return cfg_aux; } From 66d0d59afe556c1c7d33d450d0b4c488a124efce Mon Sep 17 00:00:00 2001 From: Guillaume Stols Date: Tue, 2 Jul 2024 12:52:51 +0000 Subject: [PATCH 207/268] iio: adc: ad7606: remove frstdata check for serial mode commit 90826e08468ba7fb35d8b39645b22d9e80004afe upstream. The current implementation attempts to recover from an eventual glitch in the clock by checking frstdata state after reading the first channel's sample: If frstdata is low, it will reset the chip and return -EIO. This will only work in parallel mode, where frstdata pin is set low after the 2nd sample read starts. For the serial mode, according to the datasheet, "The FRSTDATA output returns to a logic low following the 16th SCLK falling edge.", thus after the Xth pulse, X being the number of bits in a sample, the check will always be true, and the driver will not work at all in serial mode if frstdata(optional) is defined in the devicetree as it will reset the chip, and return -EIO every time read_sample is called. Hence, this check must be removed for serial mode. Fixes: b9618c0cacd7 ("staging: IIO: ADC: New driver for AD7606/AD7606-6/AD7606-4") Signed-off-by: Guillaume Stols Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240702-cleanup-ad7606-v3-1-18d5ea18770e@baylibre.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7606.c | 28 ++------------------- drivers/iio/adc/ad7606.h | 2 ++ drivers/iio/adc/ad7606_par.c | 48 +++++++++++++++++++++++++++++++++--- 3 files changed, 49 insertions(+), 29 deletions(-) diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c index 1928d9ae5bcffd..1c08c0921ee712 100644 --- a/drivers/iio/adc/ad7606.c +++ b/drivers/iio/adc/ad7606.c @@ -49,7 +49,7 @@ static const unsigned int ad7616_oversampling_avail[8] = { 1, 2, 4, 8, 16, 32, 64, 128, }; -static int ad7606_reset(struct ad7606_state *st) +int ad7606_reset(struct ad7606_state *st) { if (st->gpio_reset) { gpiod_set_value(st->gpio_reset, 1); @@ -60,6 +60,7 @@ static int ad7606_reset(struct ad7606_state *st) return -ENODEV; } +EXPORT_SYMBOL_NS_GPL(ad7606_reset, IIO_AD7606); static int ad7606_reg_access(struct iio_dev *indio_dev, unsigned int reg, @@ -88,31 +89,6 @@ static int ad7606_read_samples(struct ad7606_state *st) { unsigned int num = st->chip_info->num_channels - 1; u16 *data = st->data; - int ret; - - /* - * The frstdata signal is set to high while and after reading the sample - * of the first channel and low for all other channels. This can be used - * to check that the incoming data is correctly aligned. During normal - * operation the data should never become unaligned, but some glitch or - * electrostatic discharge might cause an extra read or clock cycle. - * Monitoring the frstdata signal allows to recover from such failure - * situations. - */ - - if (st->gpio_frstdata) { - ret = st->bops->read_block(st->dev, 1, data); - if (ret) - return ret; - - if (!gpiod_get_value(st->gpio_frstdata)) { - ad7606_reset(st); - return -EIO; - } - - data++; - num--; - } return st->bops->read_block(st->dev, num, data); } diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index 0c6a88cc469585..6649e84d25de64 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -151,6 +151,8 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address, const char *name, unsigned int id, const struct ad7606_bus_ops *bops); +int ad7606_reset(struct ad7606_state *st); + enum ad7606_supported_device_ids { ID_AD7605_4, ID_AD7606_8, diff --git a/drivers/iio/adc/ad7606_par.c b/drivers/iio/adc/ad7606_par.c index d8408052262e4d..6bc587b20f05da 100644 --- a/drivers/iio/adc/ad7606_par.c +++ b/drivers/iio/adc/ad7606_par.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -21,8 +22,29 @@ static int ad7606_par16_read_block(struct device *dev, struct iio_dev *indio_dev = dev_get_drvdata(dev); struct ad7606_state *st = iio_priv(indio_dev); - insw((unsigned long)st->base_address, buf, count); + /* + * On the parallel interface, the frstdata signal is set to high while + * and after reading the sample of the first channel and low for all + * other channels. This can be used to check that the incoming data is + * correctly aligned. During normal operation the data should never + * become unaligned, but some glitch or electrostatic discharge might + * cause an extra read or clock cycle. Monitoring the frstdata signal + * allows to recover from such failure situations. + */ + int num = count; + u16 *_buf = buf; + + if (st->gpio_frstdata) { + insw((unsigned long)st->base_address, _buf, 1); + if (!gpiod_get_value(st->gpio_frstdata)) { + ad7606_reset(st); + return -EIO; + } + _buf++; + num--; + } + insw((unsigned long)st->base_address, _buf, num); return 0; } @@ -35,8 +57,28 @@ static int ad7606_par8_read_block(struct device *dev, { struct iio_dev *indio_dev = dev_get_drvdata(dev); struct ad7606_state *st = iio_priv(indio_dev); - - insb((unsigned long)st->base_address, buf, count * 2); + /* + * On the parallel interface, the frstdata signal is set to high while + * and after reading the sample of the first channel and low for all + * other channels. This can be used to check that the incoming data is + * correctly aligned. During normal operation the data should never + * become unaligned, but some glitch or electrostatic discharge might + * cause an extra read or clock cycle. Monitoring the frstdata signal + * allows to recover from such failure situations. + */ + int num = count; + u16 *_buf = buf; + + if (st->gpio_frstdata) { + insb((unsigned long)st->base_address, _buf, 2); + if (!gpiod_get_value(st->gpio_frstdata)) { + ad7606_reset(st); + return -EIO; + } + _buf++; + num--; + } + insb((unsigned long)st->base_address, _buf, num * 2); return 0; } From 314125cbae4803f079e786870d633e1a74b9de36 Mon Sep 17 00:00:00 2001 From: Dumitru Ceclan Date: Wed, 31 Jul 2024 15:37:22 +0300 Subject: [PATCH 208/268] iio: adc: ad7124: fix chip ID mismatch commit 96f9ab0d5933c1c00142dd052f259fce0bc3ced2 upstream. The ad7124_soft_reset() function has the assumption that the chip will assert the "power-on reset" bit in the STATUS register after a software reset without any delay. The POR bit =0 is used to check if the chip initialization is done. A chip ID mismatch probe error appears intermittently when the probe continues too soon and the ID register does not contain the expected value. Fix by adding a 200us delay after the software reset command is issued. Fixes: b3af341bbd96 ("iio: adc: Add ad7124 support") Signed-off-by: Dumitru Ceclan Reviewed-by: Nuno Sa Link: https://patch.msgid.link/20240731-ad7124-fix-v1-1-46a76aa4b9be@analog.com Cc: Signed-off-by: Jonathan Cameron Signed-off-by: Greg Kroah-Hartman --- drivers/iio/adc/ad7124.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index c56227f42a5a36..2976c62b58c075 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -765,6 +765,7 @@ static int ad7124_soft_reset(struct ad7124_state *st) if (ret < 0) return ret; + fsleep(200); timeout = 100; do { ret = ad_sd_read_reg(&st->sd, AD7124_STATUS, 1, &readval); From 6ef746b0b63f94055503f1eb6ddf80f6b251bb41 Mon Sep 17 00:00:00 2001 From: Faisal Hassan Date: Thu, 29 Aug 2024 15:15:02 +0530 Subject: [PATCH 209/268] usb: dwc3: core: update LC timer as per USB Spec V3.2 commit 9149c9b0c7e046273141e41eebd8a517416144ac upstream. This fix addresses STAR 9001285599, which only affects DWC_usb3 version 3.20a. The timer value for PM_LC_TIMER in DWC_usb3 3.20a for the Link ECN changes is incorrect. If the PM TIMER ECN is enabled via GUCTL2[19], the link compliance test (TD7.21) may fail. If the ECN is not enabled (GUCTL2[19] = 0), the controller will use the old timer value (5us), which is still acceptable for the link compliance test. Therefore, clear GUCTL2[19] to pass the USB link compliance test: TD 7.21. Cc: stable@vger.kernel.org Signed-off-by: Faisal Hassan Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240829094502.26502-1-quic_faisalh@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 15 +++++++++++++++ drivers/usb/dwc3/core.h | 2 ++ 2 files changed, 17 insertions(+) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 21ff6cbe5e49f0..9955091c533608 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1288,6 +1288,21 @@ static int dwc3_core_init(struct dwc3 *dwc) dwc3_writel(dwc->regs, DWC3_GUCTL2, reg); } + /* + * STAR 9001285599: This issue affects DWC_usb3 version 3.20a + * only. If the PM TIMER ECM is enabled through GUCTL2[19], the + * link compliance test (TD7.21) may fail. If the ECN is not + * enabled (GUCTL2[19] = 0), the controller will use the old timer + * value (5us), which is still acceptable for the link compliance + * test. Therefore, do not enable PM TIMER ECM in 3.20a by + * setting GUCTL2[19] by default; instead, use GUCTL2[19] = 0. + */ + if (DWC3_VER_IS(DWC3, 320A)) { + reg = dwc3_readl(dwc->regs, DWC3_GUCTL2); + reg &= ~DWC3_GUCTL2_LC_TIMER; + dwc3_writel(dwc->regs, DWC3_GUCTL2, reg); + } + /* * When configured in HOST mode, after issuing U3/L2 exit controller * fails to send proper CRC checksum in CRC5 feild. Because of this diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 07b062c2f6479b..db1a793a9b13ff 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -408,6 +408,7 @@ /* Global User Control Register 2 */ #define DWC3_GUCTL2_RST_ACTBITLATER BIT(14) +#define DWC3_GUCTL2_LC_TIMER BIT(19) /* Global User Control Register 3 */ #define DWC3_GUCTL3_SPLITDISABLE BIT(14) @@ -1238,6 +1239,7 @@ struct dwc3 { #define DWC3_REVISION_290A 0x5533290a #define DWC3_REVISION_300A 0x5533300a #define DWC3_REVISION_310A 0x5533310a +#define DWC3_REVISION_320A 0x5533320a #define DWC3_REVISION_330A 0x5533330a #define DWC31_REVISION_ANY 0x0 From f224f3729779ff990db8628def5246a3f23e7369 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Mon, 2 Sep 2024 11:09:16 +0000 Subject: [PATCH 210/268] usb: cdns2: Fix controller reset issue commit e2940928115e83d707b21bf00b0db7d6c15f8341 upstream. Patch fixes the procedure of resetting controller. The CPUCTRL register is write only and reading returns 0. Waiting for reset to complite is incorrect. Fixes: 3eb1f1efe204 ("usb: cdns2: Add main part of Cadence USBHS driver") cc: stable@vger.kernel.org Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/PH7PR07MB9538D56D75F1F399D0BB96F0DD922@PH7PR07MB9538.namprd07.prod.outlook.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/cdns2/cdns2-gadget.c | 12 +++--------- drivers/usb/gadget/udc/cdns2/cdns2-gadget.h | 9 +++++++++ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/usb/gadget/udc/cdns2/cdns2-gadget.c b/drivers/usb/gadget/udc/cdns2/cdns2-gadget.c index 0eed0e03842cff..d394affb707236 100644 --- a/drivers/usb/gadget/udc/cdns2/cdns2-gadget.c +++ b/drivers/usb/gadget/udc/cdns2/cdns2-gadget.c @@ -2251,7 +2251,6 @@ static int cdns2_gadget_start(struct cdns2_device *pdev) { u32 max_speed; void *buf; - int val; int ret; pdev->usb_regs = pdev->regs; @@ -2261,14 +2260,9 @@ static int cdns2_gadget_start(struct cdns2_device *pdev) pdev->adma_regs = pdev->regs + CDNS2_ADMA_REGS_OFFSET; /* Reset controller. */ - set_reg_bit_8(&pdev->usb_regs->cpuctrl, CPUCTRL_SW_RST); - - ret = readl_poll_timeout_atomic(&pdev->usb_regs->cpuctrl, val, - !(val & CPUCTRL_SW_RST), 1, 10000); - if (ret) { - dev_err(pdev->dev, "Error: reset controller timeout\n"); - return -EINVAL; - } + writeb(CPUCTRL_SW_RST | CPUCTRL_UPCLK | CPUCTRL_WUEN, + &pdev->usb_regs->cpuctrl); + usleep_range(5, 10); usb_initialize_gadget(pdev->dev, &pdev->gadget, NULL); diff --git a/drivers/usb/gadget/udc/cdns2/cdns2-gadget.h b/drivers/usb/gadget/udc/cdns2/cdns2-gadget.h index 71e2f62d653a51..b5d5ec12e986e0 100644 --- a/drivers/usb/gadget/udc/cdns2/cdns2-gadget.h +++ b/drivers/usb/gadget/udc/cdns2/cdns2-gadget.h @@ -292,8 +292,17 @@ struct cdns2_usb_regs { #define SPEEDCTRL_HSDISABLE BIT(7) /* CPUCTRL- bitmasks. */ +/* UP clock enable */ +#define CPUCTRL_UPCLK BIT(0) /* Controller reset bit. */ #define CPUCTRL_SW_RST BIT(1) +/** + * If the wuen bit is ‘1’, the upclken is automatically set to ‘1’ after + * detecting rising edge of wuintereq interrupt. If the wuen bit is ‘0’, + * the wuintereq interrupt is ignored. + */ +#define CPUCTRL_WUEN BIT(7) + /** * struct cdns2_adma_regs - ADMA controller registers. From 7d301dd2722f71e39d5b4a3f7cadff310fbffa03 Mon Sep 17 00:00:00 2001 From: Prashanth K Date: Wed, 28 Aug 2024 12:13:02 +0530 Subject: [PATCH 211/268] usb: dwc3: Avoid waking up gadget during startxfer commit 00dcf2fa449f23a263343d7fe051741bdde65d0b upstream. When operating in High-Speed, it is observed that DSTS[USBLNKST] doesn't update link state immediately after receiving the wakeup interrupt. Since wakeup event handler calls the resume callbacks, there is a chance that function drivers can perform an ep queue, which in turn tries to perform remote wakeup from send_gadget_ep_cmd(STARTXFER). This happens because DSTS[[21:18] wasn't updated to U0 yet, it's observed that the latency of DSTS can be in order of milli-seconds. Hence avoid calling gadget_wakeup during startxfer to prevent unnecessarily issuing remote wakeup to host. Fixes: c36d8e947a56 ("usb: dwc3: gadget: put link to U0 before Start Transfer") Cc: stable@vger.kernel.org Suggested-by: Thinh Nguyen Signed-off-by: Prashanth K Acked-by: Thinh Nguyen Link: https://lore.kernel.org/r/20240828064302.3796315-1-quic_prashk@quicinc.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 41 ++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 579d90efc281a1..1a1da4a0570196 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -287,6 +287,23 @@ static int __dwc3_gadget_wakeup(struct dwc3 *dwc, bool async); * * Caller should handle locking. This function will issue @cmd with given * @params to @dep and wait for its completion. + * + * According to the programming guide, if the link state is in L1/L2/U3, + * then sending the Start Transfer command may not complete. The + * programming guide suggested to bring the link state back to ON/U0 by + * performing remote wakeup prior to sending the command. However, don't + * initiate remote wakeup when the user/function does not send wakeup + * request via wakeup ops. Send the command when it's allowed. + * + * Notes: + * For L1 link state, issuing a command requires the clearing of + * GUSB2PHYCFG.SUSPENDUSB2, which turns on the signal required to complete + * the given command (usually within 50us). This should happen within the + * command timeout set by driver. No additional step is needed. + * + * For L2 or U3 link state, the gadget is in USB suspend. Care should be + * taken when sending Start Transfer command to ensure that it's done after + * USB resume. */ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, struct dwc3_gadget_ep_cmd_params *params) @@ -327,30 +344,6 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } - if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { - int link_state; - - /* - * Initiate remote wakeup if the link state is in U3 when - * operating in SS/SSP or L1/L2 when operating in HS/FS. If the - * link state is in U1/U2, no remote wakeup is needed. The Start - * Transfer command will initiate the link recovery. - */ - link_state = dwc3_gadget_get_link_state(dwc); - switch (link_state) { - case DWC3_LINK_STATE_U2: - if (dwc->gadget->speed >= USB_SPEED_SUPER) - break; - - fallthrough; - case DWC3_LINK_STATE_U3: - ret = __dwc3_gadget_wakeup(dwc, false); - dev_WARN_ONCE(dwc->dev, ret, "wakeup failed --> %d\n", - ret); - break; - } - } - /* * For some commands such as Update Transfer command, DEPCMDPARn * registers are reserved. Since the driver often sends Update Transfer From f77dc8a75859e559f3238a6d906206259227985e Mon Sep 17 00:00:00 2001 From: Sukrut Bellary Date: Mon, 2 Sep 2024 15:14:09 +0100 Subject: [PATCH 212/268] misc: fastrpc: Fix double free of 'buf' in error path commit e8c276d4dc0e19ee48385f74426aebc855b49aaf upstream. smatch warning: drivers/misc/fastrpc.c:1926 fastrpc_req_mmap() error: double free of 'buf' In fastrpc_req_mmap() error path, the fastrpc buffer is freed in fastrpc_req_munmap_impl() if unmap is successful. But in the end, there is an unconditional call to fastrpc_buf_free(). So the above case triggers the double free of fastrpc buf. Fixes: 72fa6f7820c4 ("misc: fastrpc: Rework fastrpc_req_munmap") Reviewed-by: Shuah Khan Reviewed-by: Dan Carpenter Reviewed-by: Srinivas Kandagatla Signed-off-by: Sukrut Bellary Signed-off-by: Srinivas Kandagatla Cc: stable Link: https://lore.kernel.org/r/20240902141409.70371-2-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/misc/fastrpc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index f0626814d56b9a..4df0d7a0cd1184 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1912,7 +1912,8 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) &args[0]); if (err) { dev_err(dev, "mmap error (len 0x%08llx)\n", buf->size); - goto err_invoke; + fastrpc_buf_free(buf); + return err; } /* update the buffer to be able to deallocate the memory on the DSP */ @@ -1950,8 +1951,6 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp) err_assign: fastrpc_req_munmap_impl(fl, buf); -err_invoke: - fastrpc_buf_free(buf); return err; } From 1f33d9f1d9ac3f0129f8508925000900c2fe5bb0 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Thu, 22 Aug 2024 18:23:52 +0000 Subject: [PATCH 213/268] binder: fix UAF caused by offsets overwrite commit 4df153652cc46545722879415937582028c18af5 upstream. Binder objects are processed and copied individually into the target buffer during transactions. Any raw data in-between these objects is copied as well. However, this raw data copy lacks an out-of-bounds check. If the raw data exceeds the data section size then the copy overwrites the offsets section. This eventually triggers an error that attempts to unwind the processed objects. However, at this point the offsets used to index these objects are now corrupted. Unwinding with corrupted offsets can result in decrements of arbitrary nodes and lead to their premature release. Other users of such nodes are left with a dangling pointer triggering a use-after-free. This issue is made evident by the following KASAN report (trimmed): ================================================================== BUG: KASAN: slab-use-after-free in _raw_spin_lock+0xe4/0x19c Write of size 4 at addr ffff47fc91598f04 by task binder-util/743 CPU: 9 UID: 0 PID: 743 Comm: binder-util Not tainted 6.11.0-rc4 #1 Hardware name: linux,dummy-virt (DT) Call trace: _raw_spin_lock+0xe4/0x19c binder_free_buf+0x128/0x434 binder_thread_write+0x8a4/0x3260 binder_ioctl+0x18f0/0x258c [...] Allocated by task 743: __kmalloc_cache_noprof+0x110/0x270 binder_new_node+0x50/0x700 binder_transaction+0x413c/0x6da8 binder_thread_write+0x978/0x3260 binder_ioctl+0x18f0/0x258c [...] Freed by task 745: kfree+0xbc/0x208 binder_thread_read+0x1c5c/0x37d4 binder_ioctl+0x16d8/0x258c [...] ================================================================== To avoid this issue, let's check that the raw data copy is within the boundaries of the data section. Fixes: 6d98eb95b450 ("binder: avoid potential data leakage when copying txn") Cc: Todd Kjos Cc: stable@vger.kernel.org Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20240822182353.2129600-1-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ccaceedbc2c01e..94f10c6eb336a5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3342,6 +3342,7 @@ static void binder_transaction(struct binder_proc *proc, */ copy_size = object_offset - user_offset; if (copy_size && (user_offset > object_offset || + object_offset > tr->data_size || binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, user_offset, From 3d1baf322a3a69b38b6b2d511cfe0d611d1b5462 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 2 Sep 2024 15:25:09 +0100 Subject: [PATCH 214/268] nvmem: Fix return type of devm_nvmem_device_get() in kerneldoc commit c69f37f6559a8948d70badd2b179db7714dedd62 upstream. devm_nvmem_device_get() returns an nvmem device, not an nvmem cell. Fixes: e2a5402ec7c6d044 ("nvmem: Add nvmem_device based consumer apis.") Cc: stable Signed-off-by: Geert Uytterhoeven Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20240902142510.71096-3-srinivas.kandagatla@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/nvmem/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 040dfa01fa12e2..e7fd1315d7edc2 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -1253,13 +1253,13 @@ void nvmem_device_put(struct nvmem_device *nvmem) EXPORT_SYMBOL_GPL(nvmem_device_put); /** - * devm_nvmem_device_get() - Get nvmem cell of device form a given id + * devm_nvmem_device_get() - Get nvmem device of device form a given id * * @dev: Device that requests the nvmem device. * @id: name id for the requested nvmem device. * - * Return: ERR_PTR() on error or a valid pointer to a struct nvmem_cell - * on success. The nvmem_cell will be freed by the automatically once the + * Return: ERR_PTR() on error or a valid pointer to a struct nvmem_device + * on success. The nvmem_device will be freed by the automatically once the * device is freed. */ struct nvmem_device *devm_nvmem_device_get(struct device *dev, const char *id) From de6946be9c8bc7d2279123433495af7c21011b99 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Thu, 29 Aug 2024 12:43:11 +0530 Subject: [PATCH 215/268] uio_hv_generic: Fix kernel NULL pointer dereference in hv_uio_rescind commit fb1adbd7e50f3d2de56d0a2bb0700e2e819a329e upstream. For primary VM Bus channels, primary_channel pointer is always NULL. This pointer is valid only for the secondary channels. Also, rescind callback is meant for primary channels only. Fix NULL pointer dereference by retrieving the device_obj from the parent for the primary channel. Cc: stable@vger.kernel.org Fixes: ca3cda6fcf1e ("uio_hv_generic: add rescind support") Signed-off-by: Saurabh Sengar Signed-off-by: Naman Jain Link: https://lore.kernel.org/r/20240829071312.1595-2-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/uio/uio_hv_generic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 6be3462b109ff2..94a1fe82e4d519 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -104,10 +104,11 @@ static void hv_uio_channel_cb(void *context) /* * Callback from vmbus_event when channel is rescinded. + * It is meant for rescind of primary channels only. */ static void hv_uio_rescind(struct vmbus_channel *channel) { - struct hv_device *hv_dev = channel->primary_channel->device_obj; + struct hv_device *hv_dev = channel->device_obj; struct hv_uio_private_data *pdata = hv_get_drvdata(hv_dev); /* From 6ed45748c19bf960a03526d37de8b0f51cf6d72c Mon Sep 17 00:00:00 2001 From: Naman Jain Date: Thu, 29 Aug 2024 12:43:12 +0530 Subject: [PATCH 216/268] Drivers: hv: vmbus: Fix rescind handling in uio_hv_generic commit 6fd28941447bf2c8ca0f26fda612a1cabc41663f upstream. Rescind offer handling relies on rescind callbacks for some of the resources cleanup, if they are registered. It does not unregister vmbus device for the primary channel closure, when callback is registered. Without it, next onoffer does not come, rescind flag remains set and device goes to unusable state. Add logic to unregister vmbus for the primary channel in rescind callback to ensure channel removal and relid release, and to ensure that next onoffer can be received and handled properly. Cc: stable@vger.kernel.org Fixes: ca3cda6fcf1e ("uio_hv_generic: add rescind support") Signed-off-by: Naman Jain Reviewed-by: Saurabh Sengar Link: https://lore.kernel.org/r/20240829071312.1595-3-namjain@linux.microsoft.com Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 1 + drivers/uio/uio_hv_generic.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index edbb38f6956b94..756aebf324735f 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1962,6 +1962,7 @@ void vmbus_device_unregister(struct hv_device *device_obj) */ device_unregister(&device_obj->device); } +EXPORT_SYMBOL_GPL(vmbus_device_unregister); #ifdef CONFIG_ACPI /* diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c index 94a1fe82e4d519..a2c7abf8c289ef 100644 --- a/drivers/uio/uio_hv_generic.c +++ b/drivers/uio/uio_hv_generic.c @@ -119,6 +119,14 @@ static void hv_uio_rescind(struct vmbus_channel *channel) /* Wake up reader */ uio_event_notify(&pdata->info); + + /* + * With rescind callback registered, rescind path will not unregister the device + * from vmbus when the primary channel is rescinded. + * Without it, rescind handling is incomplete and next onoffer msg does not come. + * Unregister the device from vmbus here. + */ + vmbus_device_unregister(channel->device_obj); } /* Sysfs API to allow mmap of the ring buffers From 39e7e593418ccdbd151f2925fa6be1a616d16c96 Mon Sep 17 00:00:00 2001 From: David Fernandez Gonzalez Date: Wed, 28 Aug 2024 15:43:37 +0000 Subject: [PATCH 217/268] VMCI: Fix use-after-free when removing resource in vmci_resource_remove() commit 48b9a8dabcc3cf5f961b2ebcd8933bf9204babb7 upstream. When removing a resource from vmci_resource_table in vmci_resource_remove(), the search is performed using the resource handle by comparing context and resource fields. It is possible though to create two resources with different types but same handle (same context and resource fields). When trying to remove one of the resources, vmci_resource_remove() may not remove the intended one, but the object will still be freed as in the case of the datagram type in vmci_datagram_destroy_handle(). vmci_resource_table will still hold a pointer to this freed resource leading to a use-after-free vulnerability. BUG: KASAN: use-after-free in vmci_handle_is_equal include/linux/vmw_vmci_defs.h:142 [inline] BUG: KASAN: use-after-free in vmci_resource_remove+0x3a1/0x410 drivers/misc/vmw_vmci/vmci_resource.c:147 Read of size 4 at addr ffff88801c16d800 by task syz-executor197/1592 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x82/0xa9 lib/dump_stack.c:106 print_address_description.constprop.0+0x21/0x366 mm/kasan/report.c:239 __kasan_report.cold+0x7f/0x132 mm/kasan/report.c:425 kasan_report+0x38/0x51 mm/kasan/report.c:442 vmci_handle_is_equal include/linux/vmw_vmci_defs.h:142 [inline] vmci_resource_remove+0x3a1/0x410 drivers/misc/vmw_vmci/vmci_resource.c:147 vmci_qp_broker_detach+0x89a/0x11b9 drivers/misc/vmw_vmci/vmci_queue_pair.c:2182 ctx_free_ctx+0x473/0xbe1 drivers/misc/vmw_vmci/vmci_context.c:444 kref_put include/linux/kref.h:65 [inline] vmci_ctx_put drivers/misc/vmw_vmci/vmci_context.c:497 [inline] vmci_ctx_destroy+0x170/0x1d6 drivers/misc/vmw_vmci/vmci_context.c:195 vmci_host_close+0x125/0x1ac drivers/misc/vmw_vmci/vmci_host.c:143 __fput+0x261/0xa34 fs/file_table.c:282 task_work_run+0xf0/0x194 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop+0x184/0x189 kernel/entry/common.c:187 exit_to_user_mode_prepare+0x11b/0x123 kernel/entry/common.c:220 __syscall_exit_to_user_mode_work kernel/entry/common.c:302 [inline] syscall_exit_to_user_mode+0x18/0x42 kernel/entry/common.c:313 do_syscall_64+0x41/0x85 arch/x86/entry/common.c:86 entry_SYSCALL_64_after_hwframe+0x6e/0x0 This change ensures the type is also checked when removing the resource from vmci_resource_table in vmci_resource_remove(). Fixes: bc63dedb7d46 ("VMCI: resource object implementation.") Cc: stable@vger.kernel.org Reported-by: George Kennedy Signed-off-by: David Fernandez Gonzalez Link: https://lore.kernel.org/r/20240828154338.754746-1-david.fernandez.gonzalez@oracle.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/vmw_vmci/vmci_resource.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/vmw_vmci/vmci_resource.c b/drivers/misc/vmw_vmci/vmci_resource.c index 692daa9eff3411..19c9d2cdd277bf 100644 --- a/drivers/misc/vmw_vmci/vmci_resource.c +++ b/drivers/misc/vmw_vmci/vmci_resource.c @@ -144,7 +144,8 @@ void vmci_resource_remove(struct vmci_resource *resource) spin_lock(&vmci_resource_table.lock); hlist_for_each_entry(r, &vmci_resource_table.entries[idx], node) { - if (vmci_handle_is_equal(r->handle, resource->handle)) { + if (vmci_handle_is_equal(r->handle, resource->handle) && + resource->type == r->type) { hlist_del_init_rcu(&r->node); break; } From c4f27b17d384b7c3381bf8e8336599e26c67afe1 Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Thu, 25 Jul 2024 15:33:54 -0400 Subject: [PATCH 218/268] clocksource/drivers/imx-tpm: Fix return -ETIME when delta exceeds INT_MAX commit 5b8843fcd49827813da80c0f590a17ae4ce93c5d upstream. In tpm_set_next_event(delta), return -ETIME by wrong cast to int when delta is larger than INT_MAX. For example: tpm_set_next_event(delta = 0xffff_fffe) { ... next = tpm_read_counter(); // assume next is 0x10 next += delta; // next will 0xffff_fffe + 0x10 = 0x1_0000_000e now = tpm_read_counter(); // now is 0x10 ... return (int)(next - now) <= 0 ? -ETIME : 0; ^^^^^^^^^^ 0x1_0000_000e - 0x10 = 0xffff_fffe, which is -2 when cast to int. So return -ETIME. } To fix this, introduce a 'prev' variable and check if 'now - prev' is larger than delta. Cc: stable@vger.kernel.org Fixes: 059ab7b82eec ("clocksource/drivers/imx-tpm: Add imx tpm timer support") Signed-off-by: Jacky Bai Reviewed-by: Peng Fan Reviewed-by: Ye Li Reviewed-by: Jason Liu Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240725193355.1436005-1-Frank.Li@nxp.com Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-imx-tpm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index bd64a8a8427f3c..cd23caf1e59995 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -83,10 +83,10 @@ static u64 notrace tpm_read_sched_clock(void) static int tpm_set_next_event(unsigned long delta, struct clock_event_device *evt) { - unsigned long next, now; + unsigned long next, prev, now; - next = tpm_read_counter(); - next += delta; + prev = tpm_read_counter(); + next = prev + delta; writel(next, timer_base + TPM_C0V); now = tpm_read_counter(); @@ -96,7 +96,7 @@ static int tpm_set_next_event(unsigned long delta, * of writing CNT registers which may cause the min_delta event got * missed, so we need add a ETIME check here in case it happened. */ - return (int)(next - now) <= 0 ? -ETIME : 0; + return (now - prev) >= delta ? -ETIME : 0; } static int tpm_set_state_oneshot(struct clock_event_device *evt) From 444c3927a0c1de58abfa6c4df9b18080bb078bdf Mon Sep 17 00:00:00 2001 From: Jacky Bai Date: Thu, 25 Jul 2024 15:33:55 -0400 Subject: [PATCH 219/268] clocksource/drivers/imx-tpm: Fix next event not taking effect sometime commit 3d5c2f8e75a55cfb11a85086c71996af0354a1fb upstream. The value written into the TPM CnV can only be updated into the hardware when the counter increases. Additional writes to the CnV write buffer are ignored until the register has been updated. Therefore, we need to check if the CnV has been updated before continuing. This may require waiting for 1 counter cycle in the worst case. Cc: stable@vger.kernel.org Fixes: 059ab7b82eec ("clocksource/drivers/imx-tpm: Add imx tpm timer support") Signed-off-by: Jacky Bai Reviewed-by: Peng Fan Reviewed-by: Ye Li Reviewed-by: Jason Liu Signed-off-by: Frank Li Link: https://lore.kernel.org/r/20240725193355.1436005-2-Frank.Li@nxp.com Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-imx-tpm.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/clocksource/timer-imx-tpm.c b/drivers/clocksource/timer-imx-tpm.c index cd23caf1e59995..92c025b70eb62f 100644 --- a/drivers/clocksource/timer-imx-tpm.c +++ b/drivers/clocksource/timer-imx-tpm.c @@ -90,6 +90,14 @@ static int tpm_set_next_event(unsigned long delta, writel(next, timer_base + TPM_C0V); now = tpm_read_counter(); + /* + * Need to wait CNT increase at least 1 cycle to make sure + * the C0V has been updated into HW. + */ + if ((next & 0xffffffff) != readl(timer_base + TPM_C0V)) + while (now == tpm_read_counter()) + ; + /* * NOTE: We observed in a very small probability, the bus fabric * contention between GPU and A7 may results a few cycles delay From 7eeb7189c4d4b258fc80435a98181735e710f02f Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Mon, 19 Aug 2024 12:03:35 +0200 Subject: [PATCH 220/268] clocksource/drivers/timer-of: Remove percpu irq related code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 471ef0b5a8aaca4296108e756b970acfc499ede4 upstream. GCC's named address space checks errors out with: drivers/clocksource/timer-of.c: In function ‘timer_of_irq_exit’: drivers/clocksource/timer-of.c:29:46: error: passing argument 2 of ‘free_percpu_irq’ from pointer to non-enclosed address space 29 | free_percpu_irq(of_irq->irq, clkevt); | ^~~~~~ In file included from drivers/clocksource/timer-of.c:8: ./include/linux/interrupt.h:201:43: note: expected ‘__seg_gs void *’ but argument is of type ‘struct clock_event_device *’ 201 | extern void free_percpu_irq(unsigned int, void __percpu *); | ^~~~~~~~~~~~~~~ drivers/clocksource/timer-of.c: In function ‘timer_of_irq_init’: drivers/clocksource/timer-of.c:74:51: error: passing argument 4 of ‘request_percpu_irq’ from pointer to non-enclosed address space 74 | np->full_name, clkevt) : | ^~~~~~ ./include/linux/interrupt.h:190:56: note: expected ‘__seg_gs void *’ but argument is of type ‘struct clock_event_device *’ 190 | const char *devname, void __percpu *percpu_dev_id) Sparse warns about: timer-of.c:29:46: warning: incorrect type in argument 2 (different address spaces) timer-of.c:29:46: expected void [noderef] __percpu * timer-of.c:29:46: got struct clock_event_device *clkevt timer-of.c:74:51: warning: incorrect type in argument 4 (different address spaces) timer-of.c:74:51: expected void [noderef] __percpu *percpu_dev_id timer-of.c:74:51: got struct clock_event_device *clkevt It appears the code is incorrect as reported by Uros Bizjak: "The referred code is questionable as it tries to reuse the clkevent pointer once as percpu pointer and once as generic pointer, which should be avoided." This change removes the percpu related code as no drivers is using it. [Daniel: Fixed the description] Fixes: dc11bae785295 ("clocksource/drivers: Add timer-of common init routine") Reported-by: Uros Bizjak Tested-by: Uros Bizjak Link: https://lore.kernel.org/r/20240819100335.2394751-1-daniel.lezcano@linaro.org Signed-off-by: Daniel Lezcano Signed-off-by: Greg Kroah-Hartman --- drivers/clocksource/timer-of.c | 17 ++++------------- drivers/clocksource/timer-of.h | 1 - 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/drivers/clocksource/timer-of.c b/drivers/clocksource/timer-of.c index c3f54d9912be79..420202bf76e42c 100644 --- a/drivers/clocksource/timer-of.c +++ b/drivers/clocksource/timer-of.c @@ -25,10 +25,7 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) struct clock_event_device *clkevt = &to->clkevt; - if (of_irq->percpu) - free_percpu_irq(of_irq->irq, clkevt); - else - free_irq(of_irq->irq, clkevt); + free_irq(of_irq->irq, clkevt); } /** @@ -42,9 +39,6 @@ static __init void timer_of_irq_exit(struct of_timer_irq *of_irq) * - Get interrupt number by name * - Get interrupt number by index * - * When the interrupt is per CPU, 'request_percpu_irq()' is called, - * otherwise 'request_irq()' is used. - * * Returns 0 on success, < 0 otherwise */ static __init int timer_of_irq_init(struct device_node *np, @@ -69,12 +63,9 @@ static __init int timer_of_irq_init(struct device_node *np, return -EINVAL; } - ret = of_irq->percpu ? - request_percpu_irq(of_irq->irq, of_irq->handler, - np->full_name, clkevt) : - request_irq(of_irq->irq, of_irq->handler, - of_irq->flags ? of_irq->flags : IRQF_TIMER, - np->full_name, clkevt); + ret = request_irq(of_irq->irq, of_irq->handler, + of_irq->flags ? of_irq->flags : IRQF_TIMER, + np->full_name, clkevt); if (ret) { pr_err("Failed to request irq %d for %pOF\n", of_irq->irq, np); return ret; diff --git a/drivers/clocksource/timer-of.h b/drivers/clocksource/timer-of.h index a5478f3e8589df..01a2c6b7db0659 100644 --- a/drivers/clocksource/timer-of.h +++ b/drivers/clocksource/timer-of.h @@ -11,7 +11,6 @@ struct of_timer_irq { int irq; int index; - int percpu; const char *name; unsigned long flags; irq_handler_t handler; From 9faed52b98e11fb2638b6d252f1a0d7fd7de691f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 3 Sep 2024 12:23:12 +0200 Subject: [PATCH 221/268] uprobes: Use kzalloc to allocate xol area commit e240b0fde52f33670d1336697c22d90a4fe33c84 upstream. To prevent unitialized members, use kzalloc to allocate the xol area. Fixes: b059a453b1cf1 ("x86/vdso: Add mremap hook to vm_special_mapping") Signed-off-by: Sven Schnelle Signed-off-by: Peter Zijlstra (Intel) Acked-by: Oleg Nesterov Link: https://lore.kernel.org/r/20240903102313.3402529-1-svens@linux.ibm.com Signed-off-by: Greg Kroah-Hartman --- kernel/events/uprobes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3048589e2e8516..4705571f803458 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1480,7 +1480,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) uprobe_opcode_t insn = UPROBE_SWBP_INSN; struct xol_area *area; - area = kmalloc(sizeof(*area), GFP_KERNEL); + area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1490,7 +1490,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) goto free_area; area->xol_mapping.name = "[uprobes]"; - area->xol_mapping.fault = NULL; area->xol_mapping.pages = area->pages; area->pages[0] = alloc_page(GFP_HIGHUSER); if (!area->pages[0]) From c4b69bee3f4ef76809288fe6827bc14d4ae788ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 2 Sep 2024 10:14:24 +0200 Subject: [PATCH 222/268] perf/aux: Fix AUX buffer serialization commit 2ab9d830262c132ab5db2f571003d80850d56b2a upstream. Ole reported that event->mmap_mutex is strictly insufficient to serialize the AUX buffer, add a per RB mutex to fully serialize it. Note that in the lock order comment the perf_event::mmap_mutex order was already wrong, that is, it nesting under mmap_lock is not new with this patch. Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: Ole Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- kernel/events/core.c | 18 ++++++++++++------ kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 2 ++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0f2b5610933d76..4d0abdace4e7cc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1255,8 +1255,9 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::mutex * perf_event::child_mutex; * perf_event_context::lock - * perf_event::mmap_mutex * mmap_lock + * perf_event::mmap_mutex + * perf_buffer::aux_mutex * perf_addr_filters_head::lock * * cpu_hotplug_lock @@ -6352,12 +6353,11 @@ static void perf_mmap_close(struct vm_area_struct *vma) event->pmu->event_unmapped(event, vma->vm_mm); /* - * rb->aux_mmap_count will always drop before rb->mmap_count and - * event->mmap_count, so it is ok to use event->mmap_mutex to - * serialize with perf_mmap here. + * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex + * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6374,7 +6374,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount)); - mutex_unlock(&event->mmap_mutex); + mutex_unlock(&rb->aux_mutex); } if (atomic_dec_and_test(&rb->mmap_count)) @@ -6462,6 +6462,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; unsigned long vma_size; @@ -6510,6 +6511,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock; + aux_mutex = &rb->aux_mutex; + mutex_lock(aux_mutex); + aux_offset = READ_ONCE(rb->user_page->aux_offset); aux_size = READ_ONCE(rb->user_page->aux_size); @@ -6660,6 +6664,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&rb->mmap_count); } aux_unlock: + if (aux_mutex) + mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); /* diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 386d21c7edfa03..f376b057320ce8 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct perf_buffer { struct user_struct *mmap_user; /* AUX area */ + struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f1f4a627f93db1..b0930b41855276 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -333,6 +333,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) */ if (!rb->nr_pages) rb->paused = 1; + + mutex_init(&rb->aux_mutex); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) From 9a9974713de85e82ce6e51714925dddf2c9028a5 Mon Sep 17 00:00:00 2001 From: Vern Hao Date: Fri, 25 Aug 2023 15:57:34 +0800 Subject: [PATCH 223/268] mm/vmscan: use folio_migratetype() instead of get_pageblock_migratetype() [ Upstream commit 97144ce008f918249fa7275ee1d29f6f27665c34 ] In skip_cma(), we can use folio_migratetype() to replace get_pageblock_migratetype(). Link: https://lkml.kernel.org/r/20230825075735.52436-1-user@VERNHAO-MC1 Signed-off-by: Vern Hao Reviewed-by: David Hildenbrand Cc: Zhaoyang Huang Signed-off-by: Andrew Morton Stable-dep-of: bfe0857c20c6 ("Revert "mm: skip CMA pages when they are not available"") Signed-off-by: Sasha Levin --- mm/vmscan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 83fa8e924f8aea..7175ff9b97d939 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2271,7 +2271,7 @@ static bool skip_cma(struct folio *folio, struct scan_control *sc) { return !current_is_kswapd() && gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && - get_pageblock_migratetype(&folio->page) == MIGRATE_CMA; + folio_migratetype(folio) == MIGRATE_CMA; } #else static bool skip_cma(struct folio *folio, struct scan_control *sc) From 0eceaa9d0560304c773d409eb2e61c00b8b021cf Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Wed, 21 Aug 2024 20:26:07 +0100 Subject: [PATCH 224/268] Revert "mm: skip CMA pages when they are not available" [ Upstream commit bfe0857c20c663fcc1592fa4e3a61ca12b07dac9 ] This reverts commit 5da226dbfce3 ("mm: skip CMA pages when they are not available") and b7108d66318a ("Multi-gen LRU: skip CMA pages when they are not eligible"). lruvec->lru_lock is highly contended and is held when calling isolate_lru_folios. If the lru has a large number of CMA folios consecutively, while the allocation type requested is not MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long time while it skips those. For FIO workload, ~150million order=0 folios were skipped to isolate a few ZONE_DMA folios [1]. This can cause lockups [1] and high memory pressure for extended periods of time [2]. Remove skipping CMA for MGLRU as well, as it was introduced in sort_folio for the same resaon as 5da226dbfce3a2f44978c2c7cf88166e69a6788b. [1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/ [2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/ [usamaarif642@gmail.com: also revert b7108d66318a, per Johannes] Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com Link: https://lkml.kernel.org/r/357ac325-4c61-497a-92a3-bdbd230d5ec9@gmail.com Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com Fixes: 5da226dbfce3 ("mm: skip CMA pages when they are not available") Signed-off-by: Usama Arif Acked-by: Johannes Weiner Cc: Bharata B Rao Cc: Breno Leitao Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Rik van Riel Cc: Vlastimil Babka Cc: Yu Zhao Cc: Zhaoyang Huang Cc: Zhaoyang Huang Cc: Signed-off-by: Andrew Morton Signed-off-by: Sasha Levin --- mm/vmscan.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 7175ff9b97d939..81533bed0b4629 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2261,25 +2261,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, } -#ifdef CONFIG_CMA -/* - * It is waste of effort to scan and reclaim CMA pages if it is not available - * for current allocation context. Kswapd can not be enrolled as it can not - * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL - */ -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return !current_is_kswapd() && - gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE && - folio_migratetype(folio) == MIGRATE_CMA; -} -#else -static bool skip_cma(struct folio *folio, struct scan_control *sc) -{ - return false; -} -#endif - /* * Isolating page from the lruvec to fill in @dst list by nr_to_scan times. * @@ -2326,8 +2307,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan, nr_pages = folio_nr_pages(folio); total_scan += nr_pages; - if (folio_zonenum(folio) > sc->reclaim_idx || - skip_cma(folio, sc)) { + if (folio_zonenum(folio) > sc->reclaim_idx) { nr_skipped[folio_zonenum(folio)] += nr_pages; move_to = &folios_skipped; goto move; @@ -4971,7 +4951,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c } /* ineligible */ - if (zone > sc->reclaim_idx || skip_cma(folio, sc)) { + if (zone > sc->reclaim_idx) { gen = folio_inc_gen(lruvec, folio, false); list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); return true; From 5ff0a441419576b8694a2b19c4a689f5c04095e5 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jun 2024 21:42:44 +1000 Subject: [PATCH 225/268] workqueue: wq_watchdog_touch is always called with valid CPU [ Upstream commit 18e24deb1cc92f2068ce7434a94233741fbd7771 ] Warn in the case it is called with cpu == -1. This does not appear to happen anywhere. Signed-off-by: Nicholas Piggin Reviewed-by: Paul E. McKenney Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/workqueue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8c7bafbee1b137..a1665c2e04b463 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6458,6 +6458,8 @@ notrace void wq_watchdog_touch(int cpu) { if (cpu >= 0) per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; + else + WARN_ONCE(1, "%s should be called with valid CPU", __func__); wq_watchdog_touched = jiffies; } From 241bce1c757d0587721512296952e6bba69631ed Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 25 Jun 2024 21:42:45 +1000 Subject: [PATCH 226/268] workqueue: Improve scalability of workqueue watchdog touch [ Upstream commit 98f887f820c993e05a12e8aa816c80b8661d4c87 ] On a ~2000 CPU powerpc system, hard lockups have been observed in the workqueue code when stop_machine runs (in this case due to CPU hotplug). This is due to lots of CPUs spinning in multi_cpu_stop, calling touch_nmi_watchdog() which ends up calling wq_watchdog_touch(). wq_watchdog_touch() writes to the global variable wq_watchdog_touched, and that can find itself in the same cacheline as other important workqueue data, which slows down operations to the point of lockups. In the case of the following abridged trace, worker_pool_idr was in the hot line, causing the lockups to always appear at idr_find. watchdog: CPU 1125 self-detected hard LOCKUP @ idr_find Call Trace: get_work_pool __queue_work call_timer_fn run_timer_softirq __do_softirq do_softirq_own_stack irq_exit timer_interrupt decrementer_common_virt * interrupt: 900 (timer) at multi_cpu_stop multi_cpu_stop cpu_stopper_thread smpboot_thread_fn kthread Fix this by having wq_watchdog_touch() only write to the line if the last time a touch was recorded exceeds 1/4 of the watchdog threshold. Reported-by: Srikar Dronamraju Signed-off-by: Nicholas Piggin Reviewed-by: Paul E. McKenney Signed-off-by: Tejun Heo Signed-off-by: Sasha Levin --- kernel/workqueue.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a1665c2e04b463..7fa1c7c9151aef 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -6456,12 +6456,18 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) notrace void wq_watchdog_touch(int cpu) { + unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; + unsigned long touch_ts = READ_ONCE(wq_watchdog_touched); + unsigned long now = jiffies; + if (cpu >= 0) - per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; + per_cpu(wq_watchdog_touched_cpu, cpu) = now; else WARN_ONCE(1, "%s should be called with valid CPU", __func__); - wq_watchdog_touched = jiffies; + /* Don't unnecessarily store to global cacheline */ + if (time_after(now, touch_ts + thresh / 4)) + WRITE_ONCE(wq_watchdog_touched, jiffies); } static void wq_watchdog_set_thresh(unsigned long thresh) From 6bf77014db580905fa0be1df262f029965289726 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:31 +0100 Subject: [PATCH 227/268] ACPI: processor: Return an error if acpi_processor_get_info() fails in processor_add() [ Upstream commit fadf231f0a06a6748a7fc4a2c29ac9ef7bca6bfd ] Rafael observed [1] that returning 0 from processor_add() will result in acpi_default_enumeration() being called which will attempt to create a platform device, but that makes little sense when the processor is known to be not available. So just return the error code from acpi_processor_get_info() instead. Link: https://lore.kernel.org/all/CAJZ5v0iKU8ra9jR+EmgxbuNm=Uwx2m1-8vn_RAZ+aCiUVLe3Pw@mail.gmail.com/ [1] Suggested-by: Rafael J. Wysocki Acked-by: Rafael J. Wysocki Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-5-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- drivers/acpi/acpi_processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 0f5218e361df5c..5f760bc62219b5 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -415,7 +415,7 @@ static int acpi_processor_add(struct acpi_device *device, result = acpi_processor_get_info(device); if (result) /* Processor is not physically present or unavailable */ - return 0; + return result; BUG_ON(pr->id >= nr_cpu_ids); From 47c310fbaaf40a72e35e3b027033b15f6c0cc9f0 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:32 +0100 Subject: [PATCH 228/268] ACPI: processor: Fix memory leaks in error paths of processor_add() [ Upstream commit 47ec9b417ed9b6b8ec2a941cd84d9de62adc358a ] If acpi_processor_get_info() returned an error, pr and the associated pr->throttling.shared_cpu_map were leaked. The unwind code was in the wrong order wrt to setup, relying on some unwind actions having no affect (clearing variables that were never set etc). That makes it harder to reason about so reorder and add appropriate labels to only undo what was actually set up in the first place. Acked-by: Rafael J. Wysocki Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-6-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- drivers/acpi/acpi_processor.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 5f760bc62219b5..7053f1b9fc1ddc 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -415,7 +415,7 @@ static int acpi_processor_add(struct acpi_device *device, result = acpi_processor_get_info(device); if (result) /* Processor is not physically present or unavailable */ - return result; + goto err_clear_driver_data; BUG_ON(pr->id >= nr_cpu_ids); @@ -430,7 +430,7 @@ static int acpi_processor_add(struct acpi_device *device, "BIOS reported wrong ACPI id %d for the processor\n", pr->id); /* Give up, but do not abort the namespace scan. */ - goto err; + goto err_clear_driver_data; } /* * processor_device_array is not cleared on errors to allow buggy BIOS @@ -442,12 +442,12 @@ static int acpi_processor_add(struct acpi_device *device, dev = get_cpu_device(pr->id); if (!dev) { result = -ENODEV; - goto err; + goto err_clear_per_cpu; } result = acpi_bind_one(dev, device); if (result) - goto err; + goto err_clear_per_cpu; pr->dev = dev; @@ -458,10 +458,11 @@ static int acpi_processor_add(struct acpi_device *device, dev_err(dev, "Processor driver could not be attached\n"); acpi_unbind_one(dev); - err: - free_cpumask_var(pr->throttling.shared_cpu_map); - device->driver_data = NULL; + err_clear_per_cpu: per_cpu(processors, pr->id) = NULL; + err_clear_driver_data: + device->driver_data = NULL; + free_cpumask_var(pr->throttling.shared_cpu_map); err_free_pr: kfree(pr); return result; From acf9ef8d1b1fc124125993ec70a54920706d56a0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 29 May 2024 14:34:38 +0100 Subject: [PATCH 229/268] arm64: acpi: Move get_cpu_for_acpi_id() to a header [ Upstream commit 8d34b6f17b9ac93faa2791eb037dcb08bdf755de ] ACPI identifies CPUs by UID. get_cpu_for_acpi_id() maps the ACPI UID to the Linux CPU number. The helper to retrieve this mapping is only available in arm64's NUMA code. Move it to live next to get_acpi_id_for_cpu(). Signed-off-by: James Morse Reviewed-by: Jonathan Cameron Reviewed-by: Gavin Shan Tested-by: Miguel Luis Tested-by: Vishnu Pajjuri Tested-by: Jianyong Wu Signed-off-by: Russell King (Oracle) Acked-by: Hanjun Guo Signed-off-by: Jonathan Cameron Reviewed-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20240529133446.28446-12-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/acpi.h | 11 +++++++++++ arch/arm64/kernel/acpi_numa.c | 11 ----------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 6792a1f83f2ad4..bc9a6656fc0c55 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -119,6 +119,17 @@ static inline u32 get_acpi_id_for_cpu(unsigned int cpu) return acpi_cpu_get_madt_gicc(cpu)->uid; } +static inline int get_cpu_for_acpi_id(u32 uid) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (uid == get_acpi_id_for_cpu(cpu)) + return cpu; + + return -EINVAL; +} + static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); int apei_claim_sea(struct pt_regs *regs); diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index ccbff21ce1faf1..2465f291c7e17c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -34,17 +34,6 @@ int __init acpi_numa_get_nid(unsigned int cpu) return acpi_early_node_map[cpu]; } -static inline int get_cpu_for_acpi_id(u32 uid) -{ - int cpu; - - for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) - return cpu; - - return -EINVAL; -} - static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header, const unsigned long end) { From 62ca6d3a905b4c40cd942f3cc645a6718f8bc7e7 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Wed, 29 May 2024 14:34:39 +0100 Subject: [PATCH 230/268] arm64: acpi: Harden get_cpu_for_acpi_id() against missing CPU entry [ Upstream commit 2488444274c70038eb6b686cba5f1ce48ebb9cdd ] In a review discussion of the changes to support vCPU hotplug where a check was added on the GICC being enabled if was online, it was noted that there is need to map back to the cpu and use that to index into a cpumask. As such, a valid ID is needed. If an MPIDR check fails in acpi_map_gic_cpu_interface() it is possible for the entry in cpu_madt_gicc[cpu] == NULL. This function would then cause a NULL pointer dereference. Whilst a path to trigger this has not been established, harden this caller against the possibility. Reviewed-by: Gavin Shan Signed-off-by: Jonathan Cameron Link: https://lore.kernel.org/r/20240529133446.28446-13-Jonathan.Cameron@huawei.com Signed-off-by: Catalin Marinas Signed-off-by: Sasha Levin --- arch/arm64/include/asm/acpi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bc9a6656fc0c55..a407f9cd549edc 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -124,7 +124,8 @@ static inline int get_cpu_for_acpi_id(u32 uid) int cpu; for (cpu = 0; cpu < nr_cpu_ids; cpu++) - if (uid == get_acpi_id_for_cpu(cpu)) + if (acpi_cpu_get_madt_gicc(cpu) && + uid == get_acpi_id_for_cpu(cpu)) return cpu; return -EINVAL; From 2370061f07078b1c7cce461f09b129c20f9b4861 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Jan 2023 20:25:48 +0100 Subject: [PATCH 231/268] can: mcp251xfd: mcp251xfd_handle_rxif_ring_uinc(): factor out in separate function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit d49184b7b585f9da7ee546b744525f62117019f6 ] This is a preparation patch. Sending the UINC messages followed by incrementing the tail pointer will be called in more than one place in upcoming patches, so factor this out into a separate function. Also make mcp251xfd_handle_rxif_ring_uinc() safe to be called with a "len" of 0. Tested-by: Stefan Althöfer Tested-by: Thomas Kopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 48 +++++++++++++------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index ced8d9c81f8c6b..5e2f39de88f393 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -197,6 +197,37 @@ mcp251xfd_rx_obj_read(const struct mcp251xfd_priv *priv, return err; } +static int +mcp251xfd_handle_rxif_ring_uinc(const struct mcp251xfd_priv *priv, + struct mcp251xfd_rx_ring *ring, + u8 len) +{ + int offset; + int err; + + if (!len) + return 0; + + /* Increment the RX FIFO tail pointer 'len' times in a + * single SPI message. + * + * Note: + * Calculate offset, so that the SPI transfer ends on + * the last message of the uinc_xfer array, which has + * "cs_change == 0", to properly deactivate the chip + * select. + */ + offset = ARRAY_SIZE(ring->uinc_xfer) - len; + err = spi_sync_transfer(priv->spi, + ring->uinc_xfer + offset, len); + if (err) + return err; + + ring->tail += len; + + return 0; +} + static int mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, struct mcp251xfd_rx_ring *ring) @@ -210,8 +241,6 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, return err; while ((len = mcp251xfd_get_rx_linear_len(ring))) { - int offset; - rx_tail = mcp251xfd_get_rx_tail(ring); err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj, @@ -227,22 +256,9 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, return err; } - /* Increment the RX FIFO tail pointer 'len' times in a - * single SPI message. - * - * Note: - * Calculate offset, so that the SPI transfer ends on - * the last message of the uinc_xfer array, which has - * "cs_change == 0", to properly deactivate the chip - * select. - */ - offset = ARRAY_SIZE(ring->uinc_xfer) - len; - err = spi_sync_transfer(priv->spi, - ring->uinc_xfer + offset, len); + err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, len); if (err) return err; - - ring->tail += len; } return 0; From bf501ab4cbfe841dd294526422193a310bc6e2c2 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Jan 2023 21:07:03 +0100 Subject: [PATCH 232/268] can: mcp251xfd: rx: prepare to workaround broken RX FIFO head index erratum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 85505e585637a737e4713c1386c30e37c325b82e ] This is a preparatory patch to work around erratum DS80000789E 6 of the mcp2518fd, the other variants of the chip family (mcp2517fd and mcp251863) are probably also affected. When handling the RX interrupt, the driver iterates over all pending FIFOs (which are implemented as ring buffers in hardware) and reads the FIFO header index from the RX FIFO STA register of the chip. In the bad case, the driver reads a too large head index. In the original code, the driver always trusted the read value, which caused old CAN frames that were already processed, or new, incompletely written CAN frames to be (re-)processed. Instead of reading and trusting the head index, read the head index and calculate the number of CAN frames that were supposedly received - replace mcp251xfd_rx_ring_update() with mcp251xfd_get_rx_len(). The mcp251xfd_handle_rxif_ring() function reads the received CAN frames from the chip, iterates over them and pushes them into the network stack. Prepare that the iteration can be stopped if an old CAN frame is detected. The actual code to detect old or incomplete frames and abort will be added in the next patch. Link: https://lore.kernel.org/all/BL3PR11MB64844C1C95CA3BDADAE4D8CCFBC99@BL3PR11MB6484.namprd11.prod.outlook.com Reported-by: Stefan Althöfer Closes: https://lore.kernel.org/all/FR0P281MB1966273C216630B120ABB6E197E89@FR0P281MB1966.DEUP281.PROD.OUTLOOK.COM Tested-by: Stefan Althöfer Tested-by: Thomas Kopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 2 + drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 89 +++++++++++-------- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 12 +-- 3 files changed, 56 insertions(+), 47 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 3a941a71c78fb8..5f92aed62ff954 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -523,6 +523,8 @@ int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv) } rx_ring->obj_num = rx_obj_num; + rx_ring->obj_num_shift_to_u8 = BITS_PER_TYPE(rx_ring->obj_num_shift_to_u8) - + ilog2(rx_obj_num); rx_ring->obj_size = rx_obj_size; priv->rx[i] = rx_ring; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index 5e2f39de88f393..5d0fb1c454cdc4 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -2,7 +2,7 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix, // Marc Kleine-Budde // // Based on: @@ -16,23 +16,14 @@ #include "mcp251xfd.h" -static inline int -mcp251xfd_rx_head_get_from_chip(const struct mcp251xfd_priv *priv, - const struct mcp251xfd_rx_ring *ring, - u8 *rx_head, bool *fifo_empty) +static inline bool mcp251xfd_rx_fifo_sta_empty(const u32 fifo_sta) { - u32 fifo_sta; - int err; - - err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr), - &fifo_sta); - if (err) - return err; - - *rx_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta); - *fifo_empty = !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF); + return !(fifo_sta & MCP251XFD_REG_FIFOSTA_TFNRFNIF); +} - return 0; +static inline bool mcp251xfd_rx_fifo_sta_full(const u32 fifo_sta) +{ + return fifo_sta & MCP251XFD_REG_FIFOSTA_TFERFFIF; } static inline int @@ -80,29 +71,49 @@ mcp251xfd_check_rx_tail(const struct mcp251xfd_priv *priv, } static int -mcp251xfd_rx_ring_update(const struct mcp251xfd_priv *priv, - struct mcp251xfd_rx_ring *ring) +mcp251xfd_get_rx_len(const struct mcp251xfd_priv *priv, + const struct mcp251xfd_rx_ring *ring, + u8 *len_p) { - u32 new_head; - u8 chip_rx_head; - bool fifo_empty; + const u8 shift = ring->obj_num_shift_to_u8; + u8 chip_head, tail, len; + u32 fifo_sta; int err; - err = mcp251xfd_rx_head_get_from_chip(priv, ring, &chip_rx_head, - &fifo_empty); - if (err || fifo_empty) + err = regmap_read(priv->map_reg, MCP251XFD_REG_FIFOSTA(ring->fifo_nr), + &fifo_sta); + if (err) + return err; + + if (mcp251xfd_rx_fifo_sta_empty(fifo_sta)) { + *len_p = 0; + return 0; + } + + if (mcp251xfd_rx_fifo_sta_full(fifo_sta)) { + *len_p = ring->obj_num; + return 0; + } + + chip_head = FIELD_GET(MCP251XFD_REG_FIFOSTA_FIFOCI_MASK, fifo_sta); + + err = mcp251xfd_check_rx_tail(priv, ring); + if (err) return err; + tail = mcp251xfd_get_rx_tail(ring); - /* chip_rx_head, is the next RX-Object filled by the HW. - * The new RX head must be >= the old head. + /* First shift to full u8. The subtraction works on signed + * values, that keeps the difference steady around the u8 + * overflow. The right shift acts on len, which is an u8. */ - new_head = round_down(ring->head, ring->obj_num) + chip_rx_head; - if (new_head <= ring->head) - new_head += ring->obj_num; + BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(chip_head)); + BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(tail)); + BUILD_BUG_ON(sizeof(ring->obj_num) != sizeof(len)); - ring->head = new_head; + len = (chip_head << shift) - (tail << shift); + *len_p = len >> shift; - return mcp251xfd_check_rx_tail(priv, ring); + return 0; } static void @@ -208,6 +219,8 @@ mcp251xfd_handle_rxif_ring_uinc(const struct mcp251xfd_priv *priv, if (!len) return 0; + ring->head += len; + /* Increment the RX FIFO tail pointer 'len' times in a * single SPI message. * @@ -233,22 +246,22 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, struct mcp251xfd_rx_ring *ring) { struct mcp251xfd_hw_rx_obj_canfd *hw_rx_obj = ring->obj; - u8 rx_tail, len; + u8 rx_tail, len, l; int err, i; - err = mcp251xfd_rx_ring_update(priv, ring); + err = mcp251xfd_get_rx_len(priv, ring, &len); if (err) return err; - while ((len = mcp251xfd_get_rx_linear_len(ring))) { + while ((l = mcp251xfd_get_rx_linear_len(ring, len))) { rx_tail = mcp251xfd_get_rx_tail(ring); err = mcp251xfd_rx_obj_read(priv, ring, hw_rx_obj, - rx_tail, len); + rx_tail, l); if (err) return err; - for (i = 0; i < len; i++) { + for (i = 0; i < l; i++) { err = mcp251xfd_handle_rxif_one(priv, ring, (void *)hw_rx_obj + i * ring->obj_size); @@ -256,9 +269,11 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, return err; } - err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, len); + err = mcp251xfd_handle_rxif_ring_uinc(priv, ring, l); if (err) return err; + + len -= l; } return 0; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 4628bf847bc9be..2e5cee6ad0c4d0 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -558,6 +558,7 @@ struct mcp251xfd_rx_ring { u8 nr; u8 fifo_nr; u8 obj_num; + u8 obj_num_shift_to_u8; u8 obj_size; union mcp251xfd_write_reg_buf irq_enable_buf; @@ -907,18 +908,9 @@ static inline u8 mcp251xfd_get_rx_tail(const struct mcp251xfd_rx_ring *ring) return ring->tail & (ring->obj_num - 1); } -static inline u8 mcp251xfd_get_rx_len(const struct mcp251xfd_rx_ring *ring) -{ - return ring->head - ring->tail; -} - static inline u8 -mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring) +mcp251xfd_get_rx_linear_len(const struct mcp251xfd_rx_ring *ring, u8 len) { - u8 len; - - len = mcp251xfd_get_rx_len(ring); - return min_t(u8, len, ring->obj_num - mcp251xfd_get_rx_tail(ring)); } From 6cdc3fc4fb2fcf75d2bc039ca192ac104ba5d9e5 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Jan 2023 11:48:16 +0100 Subject: [PATCH 233/268] can: mcp251xfd: clarify the meaning of timestamp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e793c724b48ca8cae9693bc3be528e85284c126a ] The mcp251xfd chip is configured to provide a timestamp with each received and transmitted CAN frame. The timestamp is derived from the internal free-running timer, which can also be read from the TBC register via SPI. The timer is 32 bits wide and is clocked by the external oscillator (typically 20 or 40 MHz). To avoid confusion, we call this timestamp "timestamp_raw" or "ts_raw" for short. Using the timecounter framework, the "ts_raw" is converted to 64 bit nanoseconds since the epoch. This is what we call "timestamp". This is a preparation for the next patches which use the "timestamp" to work around a bug where so far only the "ts_raw" is used. Tested-by: Stefan Althöfer Tested-by: Thomas Kopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- .../net/can/spi/mcp251xfd/mcp251xfd-core.c | 28 +++++++++---------- drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 2 +- drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c | 2 +- .../can/spi/mcp251xfd/mcp251xfd-timestamp.c | 22 ++++----------- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 27 ++++++++++++++---- 5 files changed, 43 insertions(+), 38 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 1665f78abb5c94..a9bafa96e2f926 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2,7 +2,7 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2019, 2020, 2021 Pengutronix, +// Copyright (c) 2019, 2020, 2021, 2023 Pengutronix, // Marc Kleine-Budde // // Based on: @@ -867,18 +867,18 @@ static int mcp251xfd_get_berr_counter(const struct net_device *ndev, static struct sk_buff * mcp251xfd_alloc_can_err_skb(struct mcp251xfd_priv *priv, - struct can_frame **cf, u32 *timestamp) + struct can_frame **cf, u32 *ts_raw) { struct sk_buff *skb; int err; - err = mcp251xfd_get_timestamp(priv, timestamp); + err = mcp251xfd_get_timestamp_raw(priv, ts_raw); if (err) return NULL; skb = alloc_can_err_skb(priv->ndev, cf); if (skb) - mcp251xfd_skb_set_timestamp(priv, skb, *timestamp); + mcp251xfd_skb_set_timestamp_raw(priv, skb, *ts_raw); return skb; } @@ -889,7 +889,7 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv) struct mcp251xfd_rx_ring *ring; struct sk_buff *skb; struct can_frame *cf; - u32 timestamp, rxovif; + u32 ts_raw, rxovif; int err, i; stats->rx_over_errors++; @@ -924,14 +924,14 @@ static int mcp251xfd_handle_rxovif(struct mcp251xfd_priv *priv) return err; } - skb = mcp251xfd_alloc_can_err_skb(priv, &cf, ×tamp); + skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw); if (!skb) return 0; cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; - err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw); if (err) stats->rx_fifo_errors++; @@ -948,12 +948,12 @@ static int mcp251xfd_handle_txatif(struct mcp251xfd_priv *priv) static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv) { struct net_device_stats *stats = &priv->ndev->stats; - u32 bdiag1, timestamp; + u32 bdiag1, ts_raw; struct sk_buff *skb; struct can_frame *cf = NULL; int err; - err = mcp251xfd_get_timestamp(priv, ×tamp); + err = mcp251xfd_get_timestamp_raw(priv, &ts_raw); if (err) return err; @@ -1035,8 +1035,8 @@ static int mcp251xfd_handle_ivmif(struct mcp251xfd_priv *priv) if (!cf) return 0; - mcp251xfd_skb_set_timestamp(priv, skb, timestamp); - err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp); + mcp251xfd_skb_set_timestamp_raw(priv, skb, ts_raw); + err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw); if (err) stats->rx_fifo_errors++; @@ -1049,7 +1049,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv) struct sk_buff *skb; struct can_frame *cf = NULL; enum can_state new_state, rx_state, tx_state; - u32 trec, timestamp; + u32 trec, ts_raw; int err; err = regmap_read(priv->map_reg, MCP251XFD_REG_TREC, &trec); @@ -1079,7 +1079,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv) /* The skb allocation might fail, but can_change_state() * handles cf == NULL. */ - skb = mcp251xfd_alloc_can_err_skb(priv, &cf, ×tamp); + skb = mcp251xfd_alloc_can_err_skb(priv, &cf, &ts_raw); can_change_state(priv->ndev, cf, tx_state, rx_state); if (new_state == CAN_STATE_BUS_OFF) { @@ -1110,7 +1110,7 @@ static int mcp251xfd_handle_cerrif(struct mcp251xfd_priv *priv) cf->data[7] = bec.rxerr; } - err = can_rx_offload_queue_timestamp(&priv->offload, skb, timestamp); + err = can_rx_offload_queue_timestamp(&priv->offload, skb, ts_raw); if (err) stats->rx_fifo_errors++; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index 5d0fb1c454cdc4..a79e6c661ecc11 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -160,7 +160,7 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) memcpy(cfd->data, hw_rx_obj->data, cfd->len); - mcp251xfd_skb_set_timestamp(priv, skb, hw_rx_obj->ts); + mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_rx_obj->ts); } static int diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c index 5b0c7890d4b44f..3886476a8f8efb 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-tef.c @@ -97,7 +97,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv, tef_tail = mcp251xfd_get_tef_tail(priv); skb = priv->can.echo_skb[tef_tail]; if (skb) - mcp251xfd_skb_set_timestamp(priv, skb, hw_tef_obj->ts); + mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_tef_obj->ts); stats->tx_bytes += can_rx_offload_get_echo_skb_queue_timestamp(&priv->offload, tef_tail, hw_tef_obj->ts, diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c index 712e091869870c..1db99aabe85c56 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-timestamp.c @@ -2,7 +2,7 @@ // // mcp251xfd - Microchip MCP251xFD Family CAN controller driver // -// Copyright (c) 2021 Pengutronix, +// Copyright (c) 2021, 2023 Pengutronix, // Marc Kleine-Budde // @@ -11,20 +11,20 @@ #include "mcp251xfd.h" -static u64 mcp251xfd_timestamp_read(const struct cyclecounter *cc) +static u64 mcp251xfd_timestamp_raw_read(const struct cyclecounter *cc) { const struct mcp251xfd_priv *priv; - u32 timestamp = 0; + u32 ts_raw = 0; int err; priv = container_of(cc, struct mcp251xfd_priv, cc); - err = mcp251xfd_get_timestamp(priv, ×tamp); + err = mcp251xfd_get_timestamp_raw(priv, &ts_raw); if (err) netdev_err(priv->ndev, "Error %d while reading timestamp. HW timestamps may be inaccurate.", err); - return timestamp; + return ts_raw; } static void mcp251xfd_timestamp_work(struct work_struct *work) @@ -39,21 +39,11 @@ static void mcp251xfd_timestamp_work(struct work_struct *work) MCP251XFD_TIMESTAMP_WORK_DELAY_SEC * HZ); } -void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, - struct sk_buff *skb, u32 timestamp) -{ - struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); - u64 ns; - - ns = timecounter_cyc2time(&priv->tc, timestamp); - hwtstamps->hwtstamp = ns_to_ktime(ns); -} - void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv) { struct cyclecounter *cc = &priv->cc; - cc->read = mcp251xfd_timestamp_read; + cc->read = mcp251xfd_timestamp_raw_read; cc->mask = CYCLECOUNTER_MASK(32); cc->shift = 1; cc->mult = clocksource_hz2mult(priv->can.clock.freq, cc->shift); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index 2e5cee6ad0c4d0..ae35845d4ce1a2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -2,7 +2,7 @@ * * mcp251xfd - Microchip MCP251xFD Family CAN controller driver * - * Copyright (c) 2019, 2020, 2021 Pengutronix, + * Copyright (c) 2019, 2020, 2021, 2023 Pengutronix, * Marc Kleine-Budde * Copyright (c) 2019 Martin Sperl */ @@ -812,10 +812,27 @@ mcp251xfd_spi_cmd_write(const struct mcp251xfd_priv *priv, return data; } -static inline int mcp251xfd_get_timestamp(const struct mcp251xfd_priv *priv, - u32 *timestamp) +static inline int mcp251xfd_get_timestamp_raw(const struct mcp251xfd_priv *priv, + u32 *ts_raw) { - return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, timestamp); + return regmap_read(priv->map_reg, MCP251XFD_REG_TBC, ts_raw); +} + +static inline void mcp251xfd_skb_set_timestamp(struct sk_buff *skb, u64 ns) +{ + struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb); + + hwtstamps->hwtstamp = ns_to_ktime(ns); +} + +static inline +void mcp251xfd_skb_set_timestamp_raw(const struct mcp251xfd_priv *priv, + struct sk_buff *skb, u32 ts_raw) +{ + u64 ns; + + ns = timecounter_cyc2time(&priv->tc, ts_raw); + mcp251xfd_skb_set_timestamp(skb, ns); } static inline u16 mcp251xfd_get_tef_obj_addr(u8 n) @@ -936,8 +953,6 @@ void mcp251xfd_ring_free(struct mcp251xfd_priv *priv); int mcp251xfd_ring_alloc(struct mcp251xfd_priv *priv); int mcp251xfd_handle_rxif(struct mcp251xfd_priv *priv); int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv); -void mcp251xfd_skb_set_timestamp(const struct mcp251xfd_priv *priv, - struct sk_buff *skb, u32 timestamp); void mcp251xfd_timestamp_init(struct mcp251xfd_priv *priv); void mcp251xfd_timestamp_stop(struct mcp251xfd_priv *priv); From 5ea24ddc26a7f67cc4d1992699d63c120b817472 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Wed, 11 Jan 2023 11:53:50 +0100 Subject: [PATCH 234/268] can: mcp251xfd: rx: add workaround for erratum DS80000789E 6 of mcp2518fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 24436be590c6fbb05f6161b0dfba7d9da60214aa ] This patch tries to works around erratum DS80000789E 6 of the mcp2518fd, the other variants of the chip family (mcp2517fd and mcp251863) are probably also affected. In the bad case, the driver reads a too large head index. In the original code, the driver always trusted the read value, which caused old, already processed CAN frames or new, incompletely written CAN frames to be (re-)processed. To work around this issue, keep a per FIFO timestamp [1] of the last valid received CAN frame and compare against the timestamp of every received CAN frame. If an old CAN frame is detected, abort the iteration and mark the number of valid CAN frames as processed in the chip by incrementing the FIFO's tail index. Further tests showed that this workaround can recognize old CAN frames, but a small time window remains in which partially written CAN frames [2] are not recognized but then processed. These CAN frames have the correct data and time stamps, but the DLC has not yet been updated. [1] As the raw timestamp overflows every 107 seconds (at the usual clock rate of 40 MHz) convert it to nanoseconds with the timecounter framework and use this to detect stale CAN frames. Link: https://lore.kernel.org/all/BL3PR11MB64844C1C95CA3BDADAE4D8CCFBC99@BL3PR11MB6484.namprd11.prod.outlook.com [2] Reported-by: Stefan Althöfer Closes: https://lore.kernel.org/all/FR0P281MB1966273C216630B120ABB6E197E89@FR0P281MB1966.DEUP281.PROD.OUTLOOK.COM Tested-by: Stefan Althöfer Tested-by: Thomas Kopp Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- .../net/can/spi/mcp251xfd/mcp251xfd-ring.c | 1 + drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c | 32 +++++++++++++++++-- drivers/net/can/spi/mcp251xfd/mcp251xfd.h | 3 ++ 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c index 5f92aed62ff954..f72582d4d3e8e2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-ring.c @@ -206,6 +206,7 @@ mcp251xfd_ring_init_rx(struct mcp251xfd_priv *priv, u16 *base, u8 *fifo_nr) int i, j; mcp251xfd_for_each_rx_ring(priv, rx_ring, i) { + rx_ring->last_valid = timecounter_read(&priv->tc); rx_ring->head = 0; rx_ring->tail = 0; rx_ring->base = *base; diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c index a79e6c661ecc11..fe897f3e4c12a2 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-rx.c @@ -159,8 +159,6 @@ mcp251xfd_hw_rx_obj_to_skb(const struct mcp251xfd_priv *priv, if (!(hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_RTR)) memcpy(cfd->data, hw_rx_obj->data, cfd->len); - - mcp251xfd_skb_set_timestamp_raw(priv, skb, hw_rx_obj->ts); } static int @@ -171,8 +169,26 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, struct net_device_stats *stats = &priv->ndev->stats; struct sk_buff *skb; struct canfd_frame *cfd; + u64 timestamp; int err; + /* According to mcp2518fd erratum DS80000789E 6. the FIFOCI + * bits of a FIFOSTA register, here the RX FIFO head index + * might be corrupted and we might process past the RX FIFO's + * head into old CAN frames. + * + * Compare the timestamp of currently processed CAN frame with + * last valid frame received. Abort with -EBADMSG if an old + * CAN frame is detected. + */ + timestamp = timecounter_cyc2time(&priv->tc, hw_rx_obj->ts); + if (timestamp <= ring->last_valid) { + stats->rx_fifo_errors++; + + return -EBADMSG; + } + ring->last_valid = timestamp; + if (hw_rx_obj->flags & MCP251XFD_OBJ_FLAGS_FDF) skb = alloc_canfd_skb(priv->ndev, &cfd); else @@ -183,6 +199,7 @@ mcp251xfd_handle_rxif_one(struct mcp251xfd_priv *priv, return 0; } + mcp251xfd_skb_set_timestamp(skb, timestamp); mcp251xfd_hw_rx_obj_to_skb(priv, hw_rx_obj, skb); err = can_rx_offload_queue_timestamp(&priv->offload, skb, hw_rx_obj->ts); if (err) @@ -265,7 +282,16 @@ mcp251xfd_handle_rxif_ring(struct mcp251xfd_priv *priv, err = mcp251xfd_handle_rxif_one(priv, ring, (void *)hw_rx_obj + i * ring->obj_size); - if (err) + + /* -EBADMSG means we're affected by mcp2518fd + * erratum DS80000789E 6., i.e. the timestamp + * in the RX object is older that the last + * valid received CAN frame. Don't process any + * further and mark processed frames as good. + */ + if (err == -EBADMSG) + return mcp251xfd_handle_rxif_ring_uinc(priv, ring, i); + else if (err) return err; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h index ae35845d4ce1a2..991662fbba42e8 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h @@ -554,6 +554,9 @@ struct mcp251xfd_rx_ring { unsigned int head; unsigned int tail; + /* timestamp of the last valid received CAN frame */ + u64 last_valid; + u16 base; u8 nr; u8 fifo_nr; From 5f2a2bf25395f50b1b2cb7c04ae2d5986520be5f Mon Sep 17 00:00:00 2001 From: Aurabindo Pillai Date: Fri, 2 Feb 2024 14:00:27 -0500 Subject: [PATCH 235/268] drm/amd: Add gfx12 swizzle mode defs [ Upstream commit 7ceb94e87bffff7c12b61eb29749e1d8ac976896 ] Add GFX12 swizzle mode definitions for use with DCN401 Signed-off-by: Aurabindo Pillai Acked-by: Rodrigo Siqueira Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- include/uapi/drm/drm_fourcc.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 8db7fd3f743e4f..fb304067781559 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1474,6 +1474,7 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_VER_GFX10 2 #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3 #define AMD_FMT_MOD_TILE_VER_GFX11 4 +#define AMD_FMT_MOD_TILE_VER_GFX12 5 /* * 64K_S is the same for GFX9/GFX10/GFX10_RBPLUS and hence has GFX9 as canonical @@ -1484,6 +1485,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * 64K_D for non-32 bpp is the same for GFX9/GFX10/GFX10_RBPLUS and hence has * GFX9 as canonical version. + * + * 64K_D_2D on GFX12 is identical to 64K_D on GFX11. */ #define AMD_FMT_MOD_TILE_GFX9_64K_D 10 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25 @@ -1491,6 +1494,19 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27 #define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31 +/* Gfx12 swizzle modes: + * 0 - LINEAR + * 1 - 256B_2D - 2D block dimensions + * 2 - 4KB_2D + * 3 - 64KB_2D + * 4 - 256KB_2D + * 5 - 4KB_3D - 3D block dimensions + * 6 - 64KB_3D + * 7 - 256KB_3D + */ +#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 +#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 + #define AMD_FMT_MOD_DCC_BLOCK_64B 0 #define AMD_FMT_MOD_DCC_BLOCK_128B 1 #define AMD_FMT_MOD_DCC_BLOCK_256B 2 From 302ba299c31e0de54cea431ac1d281dbab7fd0b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Sat, 1 Jun 2024 19:53:01 -0400 Subject: [PATCH 236/268] drm/amdgpu: handle gfx12 in amdgpu_display_verify_sizes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 8dd1426e2c80e32ac1995007330c8f95ffa28ebb ] It verified GFX9-11 swizzle modes on GFX12, which has undefined behavior. Signed-off-by: Marek Olšák Acked-by: Alex Deucher Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 27 ++++++++++++++++++++- include/uapi/drm/drm_fourcc.h | 2 ++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 82ad2b01f2e9ee..5fbb9caa7415fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1033,6 +1033,30 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) block_width = 256 / format_info->cpp[i]; block_height = 1; block_size_log2 = 8; + } else if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX12) { + int swizzle = AMD_FMT_MOD_GET(TILE, modifier); + + switch (swizzle) { + case AMD_FMT_MOD_TILE_GFX12_256B_2D: + block_size_log2 = 8; + break; + case AMD_FMT_MOD_TILE_GFX12_4K_2D: + block_size_log2 = 12; + break; + case AMD_FMT_MOD_TILE_GFX12_64K_2D: + block_size_log2 = 16; + break; + case AMD_FMT_MOD_TILE_GFX12_256K_2D: + block_size_log2 = 18; + break; + default: + drm_dbg_kms(rfb->base.dev, + "Gfx12 swizzle mode with unknown block size: %d\n", swizzle); + return -EINVAL; + } + + get_block_dimensions(block_size_log2, format_info->cpp[i], + &block_width, &block_height); } else { int swizzle = AMD_FMT_MOD_GET(TILE, modifier); @@ -1068,7 +1092,8 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) return ret; } - if (AMD_FMT_MOD_GET(DCC, modifier)) { + if (AMD_FMT_MOD_GET(TILE_VERSION, modifier) <= AMD_FMT_MOD_TILE_VER_GFX11 && + AMD_FMT_MOD_GET(DCC, modifier)) { if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { block_size_log2 = get_dcc_block_size(modifier, false, false); get_block_dimensions(block_size_log2 + 8, format_info->cpp[0], diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index fb304067781559..5eed091d4c291f 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1504,6 +1504,8 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * 6 - 64KB_3D * 7 - 256KB_3D */ +#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1 +#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2 #define AMD_FMT_MOD_TILE_GFX12_64K_2D 3 #define AMD_FMT_MOD_TILE_GFX12_256K_2D 4 From c8d4acb325302fd832e10d5ba17b1b8eba9d08f9 Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Tue, 2 Jul 2024 02:47:32 +0000 Subject: [PATCH 237/268] ata: libata-scsi: Remove redundant sense_buffer memsets [ Upstream commit 3f6d903b54a137e9e438d9c3b774b5d0432917bc ] SCSI layer clears sense_buffer in scsi_queue_rq() so there is no need for libata to clear it again. Reviewed-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Igor Pylypiv Link: https://lore.kernel.org/r/20240702024735.1152293-5-ipylypiv@google.com Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 277bf0e8ed0918..27e0c87236ac04 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -926,11 +926,8 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) { struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; - unsigned char *sb = cmd->sense_buffer; u8 sense_key, asc, ascq; - memset(sb, 0, SCSI_SENSE_BUFFERSIZE); - /* * Use ata_to_sense_error() to map status register bits * onto sense key, asc & ascq. @@ -976,8 +973,6 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) u64 block; u8 sense_key, asc, ascq; - memset(sb, 0, SCSI_SENSE_BUFFERSIZE); - if (ata_dev_disabled(dev)) { /* Device disabled after error recovery */ /* LOGICAL UNIT NOT READY, HARD RESET REQUIRED */ From 872f86e1757bbb0a334ee739b824e47c448f5ebc Mon Sep 17 00:00:00 2001 From: Igor Pylypiv Date: Tue, 2 Jul 2024 02:47:35 +0000 Subject: [PATCH 238/268] ata: libata-scsi: Check ATA_QCFLAG_RTF_FILLED before using result_tf [ Upstream commit 816be86c7993d3c5832c3017c0056297e86f978c ] qc->result_tf contents are only valid when the ATA_QCFLAG_RTF_FILLED flag is set. The ATA_QCFLAG_RTF_FILLED flag should be always set for commands that failed or for commands that have the ATA_QCFLAG_RESULT_TF flag set. Reviewed-by: Hannes Reinecke Reviewed-by: Damien Le Moal Reviewed-by: Niklas Cassel Signed-off-by: Igor Pylypiv Link: https://lore.kernel.org/r/20240702024735.1152293-8-ipylypiv@google.com Signed-off-by: Niklas Cassel Signed-off-by: Sasha Levin --- drivers/ata/libata-scsi.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 27e0c87236ac04..c91f8746289f4a 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -242,10 +242,17 @@ void ata_scsi_set_sense_information(struct ata_device *dev, */ static void ata_scsi_set_passthru_sense_fields(struct ata_queued_cmd *qc) { + struct ata_device *dev = qc->dev; struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; unsigned char *sb = cmd->sense_buffer; + if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { + ata_dev_dbg(dev, + "missing result TF: can't set ATA PT sense fields\n"); + return; + } + if ((sb[0] & 0x7f) >= 0x72) { unsigned char *desc; u8 len; @@ -924,10 +931,17 @@ static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk, */ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) { + struct ata_device *dev = qc->dev; struct scsi_cmnd *cmd = qc->scsicmd; struct ata_taskfile *tf = &qc->result_tf; u8 sense_key, asc, ascq; + if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { + ata_dev_dbg(dev, + "missing result TF: can't generate ATA PT sense data\n"); + return; + } + /* * Use ata_to_sense_error() to map status register bits * onto sense key, asc & ascq. @@ -979,6 +993,13 @@ static void ata_gen_ata_sense(struct ata_queued_cmd *qc) ata_scsi_set_sense(dev, cmd, NOT_READY, 0x04, 0x21); return; } + + if (!(qc->flags & ATA_QCFLAG_RTF_FILLED)) { + ata_dev_dbg(dev, + "missing result TF: can't generate sense data\n"); + return; + } + /* Use ata_to_sense_error() to map status register bits * onto sense key, asc & ascq. */ From 02b3f8860963e458377ae42ae74b61df29c04bc5 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Wed, 26 Jun 2024 09:40:42 +0800 Subject: [PATCH 239/268] crypto: starfive - Align rsa input data to 32-bit [ Upstream commit 6aad7019f697ab0bed98eba737d19bd7f67713de ] Hardware expects RSA input plain/ciphertext to be 32-bit aligned. Set fixed length for preallocated buffer to the maximum supported keysize of the hardware and shift input text accordingly. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/starfive/jh7110-cryp.h | 3 ++- drivers/crypto/starfive/jh7110-rsa.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index fe011d50473d76..f386e989789696 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -30,6 +30,7 @@ #define MAX_KEY_SIZE SHA512_BLOCK_SIZE #define STARFIVE_AES_IV_LEN AES_BLOCK_SIZE #define STARFIVE_AES_CTR_LEN AES_BLOCK_SIZE +#define STARFIVE_RSA_MAX_KEYSZ 256 union starfive_aes_csr { u32 v; @@ -217,7 +218,7 @@ struct starfive_cryp_request_ctx { unsigned int digsize; unsigned long in_sg_len; unsigned char *adata; - u8 rsa_data[] __aligned(sizeof(u32)); + u8 rsa_data[STARFIVE_RSA_MAX_KEYSZ] __aligned(sizeof(u32)); }; struct starfive_cryp_dev *starfive_cryp_find_dev(struct starfive_cryp_ctx *ctx); diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index f31bbd825f883f..fbc06f8ee95f74 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -37,7 +37,6 @@ // A * A * R mod N ==> A #define CRYPTO_CMD_AARN 0x7 -#define STARFIVE_RSA_MAX_KEYSZ 256 #define STARFIVE_RSA_RESET 0x2 static inline int starfive_pka_wait_done(struct starfive_cryp_ctx *ctx) @@ -91,7 +90,7 @@ static int starfive_rsa_montgomery_form(struct starfive_cryp_ctx *ctx, { struct starfive_cryp_dev *cryp = ctx->cryp; struct starfive_cryp_request_ctx *rctx = ctx->rctx; - int count = rctx->total / sizeof(u32) - 1; + int count = (ALIGN(rctx->total, 4) / 4) - 1; int loop; u32 temp; u8 opsize; @@ -274,12 +273,17 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc) struct starfive_cryp_dev *cryp = ctx->cryp; struct starfive_cryp_request_ctx *rctx = ctx->rctx; struct starfive_rsa_key *key = &ctx->rsa_key; - int ret = 0; + int ret = 0, shift = 0; writel(STARFIVE_RSA_RESET, cryp->base + STARFIVE_PKA_CACR_OFFSET); + if (!IS_ALIGNED(rctx->total, sizeof(u32))) { + shift = sizeof(u32) - (rctx->total & 0x3); + memset(rctx->rsa_data, 0, shift); + } + rctx->total = sg_copy_to_buffer(rctx->in_sg, rctx->nents, - rctx->rsa_data, rctx->total); + rctx->rsa_data + shift, rctx->total); if (enc) { key->bitlen = key->e_bitlen; From 37b65ea6c7adcb98449e47d27c55b388c0d3b705 Mon Sep 17 00:00:00 2001 From: Jia Jie Ho Date: Wed, 26 Jun 2024 09:40:43 +0800 Subject: [PATCH 240/268] crypto: starfive - Fix nent assignment in rsa dec [ Upstream commit 8323c036789b8b4a61925fce439a89dba17b7f2f ] Missing src scatterlist nent assignment in rsa decrypt function. Removing all unneeded assignment and use nents value from req->src instead. Signed-off-by: Jia Jie Ho Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/starfive/jh7110-cryp.h | 1 - drivers/crypto/starfive/jh7110-rsa.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index f386e989789696..607f70292b215d 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -213,7 +213,6 @@ struct starfive_cryp_request_ctx { struct scatterlist *out_sg; struct ahash_request ahash_fbk_req; size_t total; - size_t nents; unsigned int blksize; unsigned int digsize; unsigned long in_sg_len; diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index fbc06f8ee95f74..1db9a3d02848b5 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -282,7 +282,7 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc) memset(rctx->rsa_data, 0, shift); } - rctx->total = sg_copy_to_buffer(rctx->in_sg, rctx->nents, + rctx->total = sg_copy_to_buffer(rctx->in_sg, sg_nents(rctx->in_sg), rctx->rsa_data + shift, rctx->total); if (enc) { @@ -333,7 +333,6 @@ static int starfive_rsa_enc(struct akcipher_request *req) rctx->in_sg = req->src; rctx->out_sg = req->dst; rctx->total = req->src_len; - rctx->nents = sg_nents(rctx->in_sg); ctx->rctx = rctx; return starfive_rsa_enc_core(ctx, 1); From eaccebe663b2513ae4ea306617caa0680b233400 Mon Sep 17 00:00:00 2001 From: devi priya Date: Tue, 6 Aug 2024 11:41:05 +0530 Subject: [PATCH 241/268] clk: qcom: ipq9574: Update the alpha PLL type for GPLLs [ Upstream commit 6357efe3abead68048729adf11a9363881657939 ] Update PLL offsets to DEFAULT_EVO to configure MDIO to 800MHz. The incorrect clock frequency leads to an incorrect MDIO clock. This, in turn, affects the MDIO hardware configurations as the divider is calculated from the MDIO clock frequency. If the clock frequency is not as expected, the MDIO register fails due to the generation of an incorrect MDIO frequency. This issue is critical as it results in incorrect MDIO configurations and ultimately leads to the MDIO function not working. This results in a complete feature failure affecting all Ethernet PHYs. Specifically, Ethernet will not work on IPQ9574 due to this issue. Currently, the clock frequency is set to CLK_ALPHA_PLL_TYPE_DEFAULT. However, this setting does not yield the expected clock frequency. To rectify this, we need to change this to CLK_ALPHA_PLL_TYPE_DEFAULT_EVO. This modification ensures that the clock frequency aligns with our expectations, thereby resolving the MDIO register failure and ensuring the proper functioning of the Ethernet on IPQ9574. Fixes: d75b82cff488 ("clk: qcom: Add Global Clock Controller driver for IPQ9574") Signed-off-by: devi priya Signed-off-by: Amandeep Singh Link: https://lore.kernel.org/r/20240806061105.2849944-1-quic_amansing@quicinc.com Signed-off-by: Bjorn Andersson Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-ipq9574.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/gcc-ipq9574.c b/drivers/clk/qcom/gcc-ipq9574.c index f8b9a1e93bef28..cdbbf2cc9c5d19 100644 --- a/drivers/clk/qcom/gcc-ipq9574.c +++ b/drivers/clk/qcom/gcc-ipq9574.c @@ -65,7 +65,7 @@ static const struct clk_parent_data gcc_sleep_clk_data[] = { static struct clk_alpha_pll gpll0_main = { .offset = 0x20000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .clkr = { .enable_reg = 0x0b000, .enable_mask = BIT(0), @@ -93,7 +93,7 @@ static struct clk_fixed_factor gpll0_out_main_div2 = { static struct clk_alpha_pll_postdiv gpll0 = { .offset = 0x20000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .width = 4, .clkr.hw.init = &(const struct clk_init_data) { .name = "gpll0", @@ -107,7 +107,7 @@ static struct clk_alpha_pll_postdiv gpll0 = { static struct clk_alpha_pll gpll4_main = { .offset = 0x22000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .clkr = { .enable_reg = 0x0b000, .enable_mask = BIT(2), @@ -122,7 +122,7 @@ static struct clk_alpha_pll gpll4_main = { static struct clk_alpha_pll_postdiv gpll4 = { .offset = 0x22000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .width = 4, .clkr.hw.init = &(const struct clk_init_data) { .name = "gpll4", @@ -136,7 +136,7 @@ static struct clk_alpha_pll_postdiv gpll4 = { static struct clk_alpha_pll gpll2_main = { .offset = 0x21000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .clkr = { .enable_reg = 0x0b000, .enable_mask = BIT(1), @@ -151,7 +151,7 @@ static struct clk_alpha_pll gpll2_main = { static struct clk_alpha_pll_postdiv gpll2 = { .offset = 0x21000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT_EVO], .width = 4, .clkr.hw.init = &(const struct clk_init_data) { .name = "gpll2", From 8ebe3bb3688a47eacf6f0f93b9dc0a51b46ee688 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Jul 2024 15:51:13 +0200 Subject: [PATCH 242/268] powerpc/64e: remove unused IBM HTW code [ Upstream commit 88715b6e5d529f4ef3830ad2a893e4624c6af0b8 ] Patch series "Reimplement huge pages without hugepd on powerpc (8xx, e500, book3s/64)", v7. Unlike most architectures, powerpc 8xx HW requires a two-level pagetable topology for all page sizes. So a leaf PMD-contig approach is not feasible as such. Possible sizes on 8xx are 4k, 16k, 512k and 8M. First level (PGD/PMD) covers 4M per entry. For 8M pages, two PMD entries must point to a single entry level-2 page table. Until now that was done using hugepd. This series changes it to use standard page tables where the entry is replicated 1024 times on each of the two pagetables refered by the two associated PMD entries for that 8M page. For e500 and book3s/64 there are less constraints because it is not tied to the HW assisted tablewalk like on 8xx, so it is easier to use leaf PMDs (and PUDs). On e500 the supported page sizes are 4M, 16M, 64M, 256M and 1G. All at PMD level on e500/32 (mpc85xx) and mix of PMD and PUD for e500/64. We encode page size with 4 available bits in PTE entries. On e300/32 PGD entries size is increases to 64 bits in order to allow leaf-PMD entries because PTE are 64 bits on e500. On book3s/64 only the hash-4k mode is concerned. It supports 16M pages as cont-PMD and 16G pages as cont-PUD. In other modes (radix-4k, radix-6k and hash-64k) the sizes match with PMD and PUD sizes so that's just leaf entries. The hash processing make things a bit more complex. To ease things, __hash_page_huge() is modified to bail out when DIRTY or ACCESSED bits are missing, leaving it to mm core to fix it. This patch (of 23): The nohash HTW_IBM (Hardware Table Walk) code is unused since support for A2 was removed in commit fb5a515704d7 ("powerpc: Remove platforms/ wsp and associated pieces") (2014). The remaining supported CPUs use either no HTW (data_tlb_miss_bolted), or the e6500 HTW (data_tlb_miss_e6500). Link: https://lkml.kernel.org/r/cover.1719928057.git.christophe.leroy@csgroup.eu Link: https://lkml.kernel.org/r/820dd1385ecc931f07b0d7a0fa827b1613917ab6.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Michael Ellerman Signed-off-by: Christophe Leroy Cc: Jason Gunthorpe Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Stable-dep-of: d92b5cc29c79 ("powerpc/64e: Define mmu_pte_psize static") Signed-off-by: Sasha Levin --- arch/powerpc/include/asm/nohash/mmu-e500.h | 3 +- arch/powerpc/mm/nohash/tlb.c | 57 +----- arch/powerpc/mm/nohash/tlb_low_64e.S | 195 --------------------- 3 files changed, 2 insertions(+), 253 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h index 6ddced0415cb5c..7dc24b8632d7c2 100644 --- a/arch/powerpc/include/asm/nohash/mmu-e500.h +++ b/arch/powerpc/include/asm/nohash/mmu-e500.h @@ -303,8 +303,7 @@ extern unsigned long linear_map_top; extern int book3e_htw_mode; #define PPC_HTW_NONE 0 -#define PPC_HTW_IBM 1 -#define PPC_HTW_E6500 2 +#define PPC_HTW_E6500 1 /* * 64-bit booke platforms don't load the tlb in the tlb miss handler code. diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index 5ffa0af4328af8..a5bb87ec857826 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -400,9 +400,8 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) static void __init setup_page_sizes(void) { unsigned int tlb0cfg; - unsigned int tlb0ps; unsigned int eptcfg; - int i, psize; + int psize; #ifdef CONFIG_PPC_E500 unsigned int mmucfg = mfspr(SPRN_MMUCFG); @@ -471,50 +470,6 @@ static void __init setup_page_sizes(void) goto out; } #endif - - tlb0cfg = mfspr(SPRN_TLB0CFG); - tlb0ps = mfspr(SPRN_TLB0PS); - eptcfg = mfspr(SPRN_EPTCFG); - - /* Look for supported direct sizes */ - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (tlb0ps & (1U << (def->shift - 10))) - def->flags |= MMU_PAGE_SIZE_DIRECT; - } - - /* Indirect page sizes supported ? */ - if ((tlb0cfg & TLBnCFG_IND) == 0 || - (tlb0cfg & TLBnCFG_PT) == 0) - goto out; - - book3e_htw_mode = PPC_HTW_IBM; - - /* Now, we only deal with one IND page size for each - * direct size. Hopefully all implementations today are - * unambiguous, but we might want to be careful in the - * future. - */ - for (i = 0; i < 3; i++) { - unsigned int ps, sps; - - sps = eptcfg & 0x1f; - eptcfg >>= 5; - ps = eptcfg & 0x1f; - eptcfg >>= 5; - if (!ps || !sps) - continue; - for (psize = 0; psize < MMU_PAGE_COUNT; psize++) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (ps == (def->shift - 10)) - def->flags |= MMU_PAGE_SIZE_INDIRECT; - if (sps == (def->shift - 10)) - def->ind = ps + 10; - } - } - out: /* Cleanup array and print summary */ pr_info("MMU: Supported page sizes\n"); @@ -543,10 +498,6 @@ static void __init setup_mmu_htw(void) */ switch (book3e_htw_mode) { - case PPC_HTW_IBM: - patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e); - break; #ifdef CONFIG_PPC_E500 case PPC_HTW_E6500: extlb_level_exc = EX_TLB_SIZE; @@ -577,12 +528,6 @@ static void early_init_this_mmu(void) mmu_pte_psize = MMU_PAGE_2M; break; - case PPC_HTW_IBM: - mas4 |= MAS4_INDD; - mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT; - mmu_pte_psize = MMU_PAGE_1M; - break; - case PPC_HTW_NONE: mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; mmu_pte_psize = mmu_virtual_psize; diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 7e0b8fe1c27975..b0eb3f7eaed149 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -893,201 +893,6 @@ virt_page_table_tlb_miss_whacko_fault: TLB_MISS_EPILOG_ERROR b exc_data_storage_book3e - -/************************************************************** - * * - * TLB miss handling for Book3E with hw page table support * - * * - **************************************************************/ - - -/* Data TLB miss */ - START_EXCEPTION(data_tlb_miss_htw) - TLB_MISS_PROLOG - - /* Now we handle the fault proper. We only save DEAR in normal - * fault case since that's the only interesting values here. - * We could probably also optimize by not saving SRR0/1 in the - * linear mapping case but I'll leave that for later - */ - mfspr r14,SPRN_ESR - mfspr r16,SPRN_DEAR /* get faulting address */ - srdi r11,r16,44 /* get region */ - xoris r11,r11,0xc - cmpldi cr0,r11,0 /* linear mapping ? */ - beq tlb_load_linear /* yes -> go to linear map load */ - cmpldi cr1,r11,1 /* vmalloc mapping ? */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - srdi. r11,r16,60 /* Check for user region */ - ld r15,PACAPGD(r13) /* Load user pgdir */ - beq htw_tlb_miss - - /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ cr1,htw_tlb_miss - - /* We got a crappy address, just fault with whatever DEAR and ESR - * are here - */ - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e - -/* Instruction TLB miss */ - START_EXCEPTION(instruction_tlb_miss_htw) - TLB_MISS_PROLOG - - /* If we take a recursive fault, the second level handler may need - * to know whether we are handling a data or instruction fault in - * order to get to the right store fault handler. We provide that - * info by keeping a crazy value for ESR in r14 - */ - li r14,-1 /* store to exception frame is done later */ - - /* Now we handle the fault proper. We only save DEAR in the non - * linear mapping case since we know the linear mapping case will - * not re-enter. We could indeed optimize and also not save SRR0/1 - * in the linear mapping case but I'll leave that for later - * - * Faulting address is SRR0 which is already in r16 - */ - srdi r11,r16,44 /* get region */ - xoris r11,r11,0xc - cmpldi cr0,r11,0 /* linear mapping ? */ - beq tlb_load_linear /* yes -> go to linear map load */ - cmpldi cr1,r11,1 /* vmalloc mapping ? */ - - /* We do the user/kernel test for the PID here along with the RW test - */ - srdi. r11,r16,60 /* Check for user region */ - ld r15,PACAPGD(r13) /* Load user pgdir */ - beq htw_tlb_miss - - /* XXX replace the RMW cycles with immediate loads + writes */ -1: mfspr r10,SPRN_MAS1 - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */ - beq+ htw_tlb_miss - - /* We got a crappy address, just fault */ - TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - - -/* - * This is the guts of the second-level TLB miss handler for direct - * misses. We are entered with: - * - * r16 = virtual page table faulting address - * r15 = PGD pointer - * r14 = ESR - * r13 = PACA - * r12 = TLB exception frame in PACA - * r11 = crap (free to use) - * r10 = crap (free to use) - * - * It can be re-entered by the linear mapping miss handler. However, to - * avoid too much complication, it will save/restore things for us - */ -htw_tlb_miss: -#ifdef CONFIG_PPC_KUAP - mfspr r10,SPRN_MAS1 - rlwinm. r10,r10,0,0x3fff0000 - beq- htw_tlb_miss_fault /* KUAP fault */ -#endif - /* Search if we already have a TLB entry for that virtual address, and - * if we do, bail out. - * - * MAS1:IND should be already set based on MAS4 - */ - PPC_TLBSRX_DOT(0,R16) - beq htw_tlb_miss_done - - /* Now, we need to walk the page tables. First check if we are in - * range. - */ - rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4 - bne- htw_tlb_miss_fault - - /* Get the PGD pointer */ - cmpldi cr0,r15,0 - beq- htw_tlb_miss_fault - - /* Get to PGD entry */ - rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault - - /* Get to PUD entry */ - rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault - - /* Get to PMD entry */ - rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3 - clrrdi r10,r11,3 - ldx r15,r10,r15 - cmpdi cr0,r15,0 - bge htw_tlb_miss_fault - - /* Ok, we're all right, we can now create an indirect entry for - * a 1M or 256M page. - * - * The last trick is now that because we use "half" pages for - * the HTW (1M IND is 2K and 256M IND is 32K) we need to account - * for an added LSB bit to the RPN. For 64K pages, there is no - * problem as we already use 32K arrays (half PTE pages), but for - * 4K page we need to extract a bit from the virtual address and - * insert it into the "PA52" bit of the RPN. - */ - rlwimi r15,r16,32-9,20,20 - /* Now we build the MAS: - * - * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG - * MAS 1 : Almost fully setup - * - PID already updated by caller if necessary - * - TSIZE for now is base ind page size always - * MAS 2 : Use defaults - * MAS 3+7 : Needs to be done - */ - ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT) - - srdi r16,r10,32 - mtspr SPRN_MAS3,r10 - mtspr SPRN_MAS7,r16 - - tlbwe - -htw_tlb_miss_done: - /* We don't bother with restoring DEAR or ESR since we know we are - * level 0 and just going back to userland. They are only needed - * if you are going to take an access fault - */ - TLB_MISS_EPILOG_SUCCESS - rfi - -htw_tlb_miss_fault: - /* We need to check if it was an instruction miss. We know this - * though because r14 would contain -1 - */ - cmpdi cr0,r14,-1 - beq 1f - mtspr SPRN_DEAR,r16 - mtspr SPRN_ESR,r14 - TLB_MISS_EPILOG_ERROR - b exc_data_storage_book3e -1: TLB_MISS_EPILOG_ERROR - b exc_instruction_storage_book3e - /* * This is the guts of "any" level TLB miss handler for kernel linear * mapping misses. We are entered with: From 8ea58996f5396df6d5081e0b556ce60c1d76ffec Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 2 Jul 2024 15:51:14 +0200 Subject: [PATCH 243/268] powerpc/64e: split out nohash Book3E 64-bit code [ Upstream commit a898530eea3d0ba08c17a60865995a3bb468d1bc ] A reasonable chunk of nohash/tlb.c is 64-bit only code, split it out into a separate file. Link: https://lkml.kernel.org/r/cb2b118f9d8a86f82d01bfb9ad309d1d304480a1.1719928057.git.christophe.leroy@csgroup.eu Signed-off-by: Michael Ellerman Signed-off-by: Christophe Leroy Cc: Jason Gunthorpe Cc: Nicholas Piggin Cc: Oscar Salvador Cc: Peter Xu Signed-off-by: Andrew Morton Stable-dep-of: d92b5cc29c79 ("powerpc/64e: Define mmu_pte_psize static") Signed-off-by: Sasha Levin --- arch/powerpc/mm/nohash/Makefile | 2 +- arch/powerpc/mm/nohash/tlb.c | 343 +---------------------------- arch/powerpc/mm/nohash/tlb_64e.c | 361 +++++++++++++++++++++++++++++++ 3 files changed, 363 insertions(+), 343 deletions(-) create mode 100644 arch/powerpc/mm/nohash/tlb_64e.c diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile index f3894e79d5f700..24b445a5fcaccd 100644 --- a/arch/powerpc/mm/nohash/Makefile +++ b/arch/powerpc/mm/nohash/Makefile @@ -3,7 +3,7 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += mmu_context.o tlb.o tlb_low.o kup.o -obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o +obj-$(CONFIG_PPC_BOOK3E_64) += tlb_64e.o tlb_low_64e.o book3e_pgtable.o obj-$(CONFIG_40x) += 40x.o obj-$(CONFIG_44x) += 44x.o obj-$(CONFIG_PPC_8xx) += 8xx.o diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index a5bb87ec857826..f57dc721d0636a 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -110,28 +110,6 @@ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = { }; #endif -/* The variables below are currently only used on 64-bit Book3E - * though this will probably be made common with other nohash - * implementations at some point - */ -#ifdef CONFIG_PPC64 - -int mmu_pte_psize; /* Page size used for PTE pages */ -int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ -int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ -unsigned long linear_map_top; /* Top of linear mapping */ - - -/* - * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug - * exceptions. This is used for bolted and e6500 TLB miss handlers which - * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, - * this is set to zero. - */ -int extlb_level_exc; - -#endif /* CONFIG_PPC64 */ - #ifdef CONFIG_PPC_E500 /* next_tlbcam_idx is used to round-robin tlbcam entry assignment */ DEFINE_PER_CPU(int, next_tlbcam_idx); @@ -358,326 +336,7 @@ void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } -/* - * Below are functions specific to the 64-bit variant of Book3E though that - * may change in the future - */ - -#ifdef CONFIG_PPC64 - -/* - * Handling of virtual linear page tables or indirect TLB entries - * flushing when PTE pages are freed - */ -void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) -{ - int tsize = mmu_psize_defs[mmu_pte_psize].enc; - - if (book3e_htw_mode != PPC_HTW_NONE) { - unsigned long start = address & PMD_MASK; - unsigned long end = address + PMD_SIZE; - unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; - - /* This isn't the most optimal, ideally we would factor out the - * while preempt & CPU mask mucking around, or even the IPI but - * it will do for now - */ - while (start < end) { - __flush_tlb_page(tlb->mm, start, tsize, 1); - start += size; - } - } else { - unsigned long rmask = 0xf000000000000000ul; - unsigned long rid = (address & rmask) | 0x1000000000000000ul; - unsigned long vpte = address & ~rmask; - - vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; - vpte |= rid; - __flush_tlb_page(tlb->mm, vpte, tsize, 0); - } -} - -static void __init setup_page_sizes(void) -{ - unsigned int tlb0cfg; - unsigned int eptcfg; - int psize; - -#ifdef CONFIG_PPC_E500 - unsigned int mmucfg = mfspr(SPRN_MMUCFG); - int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); - - if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { - unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); - unsigned int min_pg, max_pg; - - min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; - max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; - - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def; - unsigned int shift; - - def = &mmu_psize_defs[psize]; - shift = def->shift; - - if (shift == 0 || shift & 1) - continue; - - /* adjust to be in terms of 4^shift Kb */ - shift = (shift - 10) >> 1; - - if ((shift >= min_pg) && (shift <= max_pg)) - def->flags |= MMU_PAGE_SIZE_DIRECT; - } - - goto out; - } - - if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { - u32 tlb1cfg, tlb1ps; - - tlb0cfg = mfspr(SPRN_TLB0CFG); - tlb1cfg = mfspr(SPRN_TLB1CFG); - tlb1ps = mfspr(SPRN_TLB1PS); - eptcfg = mfspr(SPRN_EPTCFG); - - if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) - book3e_htw_mode = PPC_HTW_E6500; - - /* - * We expect 4K subpage size and unrestricted indirect size. - * The lack of a restriction on indirect size is a Freescale - * extension, indicated by PSn = 0 but SPSn != 0. - */ - if (eptcfg != 2) - book3e_htw_mode = PPC_HTW_NONE; - - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - - if (!def->shift) - continue; - - if (tlb1ps & (1U << (def->shift - 10))) { - def->flags |= MMU_PAGE_SIZE_DIRECT; - - if (book3e_htw_mode && psize == MMU_PAGE_2M) - def->flags |= MMU_PAGE_SIZE_INDIRECT; - } - } - - goto out; - } -#endif -out: - /* Cleanup array and print summary */ - pr_info("MMU: Supported page sizes\n"); - for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { - struct mmu_psize_def *def = &mmu_psize_defs[psize]; - const char *__page_type_names[] = { - "unsupported", - "direct", - "indirect", - "direct & indirect" - }; - if (def->flags == 0) { - def->shift = 0; - continue; - } - pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), - __page_type_names[def->flags & 0x3]); - } -} - -static void __init setup_mmu_htw(void) -{ - /* - * If we want to use HW tablewalk, enable it by patching the TLB miss - * handlers to branch to the one dedicated to it. - */ - - switch (book3e_htw_mode) { -#ifdef CONFIG_PPC_E500 - case PPC_HTW_E6500: - extlb_level_exc = EX_TLB_SIZE; - patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); - patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); - break; -#endif - } - pr_info("MMU: Book3E HW tablewalk %s\n", - book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); -} - -/* - * Early initialization of the MMU TLB code - */ -static void early_init_this_mmu(void) -{ - unsigned int mas4; - - /* Set MAS4 based on page table setting */ - - mas4 = 0x4 << MAS4_WIMGED_SHIFT; - switch (book3e_htw_mode) { - case PPC_HTW_E6500: - mas4 |= MAS4_INDD; - mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; - mas4 |= MAS4_TLBSELD(1); - mmu_pte_psize = MMU_PAGE_2M; - break; - - case PPC_HTW_NONE: - mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; - mmu_pte_psize = mmu_virtual_psize; - break; - } - mtspr(SPRN_MAS4, mas4); - -#ifdef CONFIG_PPC_E500 - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - unsigned int num_cams; - bool map = true; - - /* use a quarter of the TLBCAM for bolted linear map */ - num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; - - /* - * Only do the mapping once per core, or else the - * transient mapping would cause problems. - */ -#ifdef CONFIG_SMP - if (hweight32(get_tensr()) > 1) - map = false; -#endif - - if (map) - linear_map_top = map_mem_in_cams(linear_map_top, - num_cams, false, true); - } -#endif - - /* A sync won't hurt us after mucking around with - * the MMU configuration - */ - mb(); -} - -static void __init early_init_mmu_global(void) -{ - /* XXX This should be decided at runtime based on supported - * page sizes in the TLB, but for now let's assume 16M is - * always there and a good fit (which it probably is) - * - * Freescale booke only supports 4K pages in TLB0, so use that. - */ - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - mmu_vmemmap_psize = MMU_PAGE_4K; - else - mmu_vmemmap_psize = MMU_PAGE_16M; - - /* XXX This code only checks for TLB 0 capabilities and doesn't - * check what page size combos are supported by the HW. It - * also doesn't handle the case where a separate array holds - * the IND entries from the array loaded by the PT. - */ - /* Look for supported page sizes */ - setup_page_sizes(); - - /* Look for HW tablewalk support */ - setup_mmu_htw(); - -#ifdef CONFIG_PPC_E500 - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - if (book3e_htw_mode == PPC_HTW_NONE) { - extlb_level_exc = EX_TLB_SIZE; - patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); - patch_exception(0x1e0, - exc_instruction_tlb_miss_bolted_book3e); - } - } -#endif - - /* Set the global containing the top of the linear mapping - * for use by the TLB miss code - */ - linear_map_top = memblock_end_of_DRAM(); - - ioremap_bot = IOREMAP_BASE; -} - -static void __init early_mmu_set_memory_limit(void) -{ -#ifdef CONFIG_PPC_E500 - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - /* - * Limit memory so we dont have linear faults. - * Unlike memblock_set_current_limit, which limits - * memory available during early boot, this permanently - * reduces the memory available to Linux. We need to - * do this because highmem is not supported on 64-bit. - */ - memblock_enforce_memory_limit(linear_map_top); - } -#endif - - memblock_set_current_limit(linear_map_top); -} - -/* boot cpu only */ -void __init early_init_mmu(void) -{ - early_init_mmu_global(); - early_init_this_mmu(); - early_mmu_set_memory_limit(); -} - -void early_init_mmu_secondary(void) -{ - early_init_this_mmu(); -} - -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) -{ - /* On non-FSL Embedded 64-bit, we adjust the RMA size to match - * the bolted TLB entry. We know for now that only 1G - * entries are supported though that may eventually - * change. - * - * on FSL Embedded 64-bit, usually all RAM is bolted, but with - * unusual memory sizes it's possible for some RAM to not be mapped - * (such RAM is not used at all by Linux, since we don't support - * highmem on 64-bit). We limit ppc64_rma_size to what would be - * mappable if this memblock is the only one. Additional memblocks - * can only increase, not decrease, the amount that ends up getting - * mapped. We still limit max to 1G even if we'll eventually map - * more. This is due to what the early init code is set up to do. - * - * We crop it to the size of the first MEMBLOCK to - * avoid going over total available memory just in case... - */ -#ifdef CONFIG_PPC_E500 - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { - unsigned long linear_sz; - unsigned int num_cams; - - /* use a quarter of the TLBCAM for bolted linear map */ - num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; - - linear_sz = map_mem_in_cams(first_memblock_size, num_cams, - true, true); - - ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); - } else -#endif - ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); - - /* Finally limit subsequent allocations */ - memblock_set_current_limit(first_memblock_base + ppc64_rma_size); -} -#else /* ! CONFIG_PPC64 */ +#ifndef CONFIG_PPC64 void __init early_init_mmu(void) { unsigned long root = of_get_flat_dt_root(); diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c new file mode 100644 index 00000000000000..1dcda261554cc6 --- /dev/null +++ b/arch/powerpc/mm/nohash/tlb_64e.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2008,2009 Ben Herrenschmidt + * IBM Corp. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* The variables below are currently only used on 64-bit Book3E + * though this will probably be made common with other nohash + * implementations at some point + */ +int mmu_pte_psize; /* Page size used for PTE pages */ +int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ +int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ +unsigned long linear_map_top; /* Top of linear mapping */ + + +/* + * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug + * exceptions. This is used for bolted and e6500 TLB miss handlers which + * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, + * this is set to zero. + */ +int extlb_level_exc; + +/* + * Handling of virtual linear page tables or indirect TLB entries + * flushing when PTE pages are freed + */ +void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address) +{ + int tsize = mmu_psize_defs[mmu_pte_psize].enc; + + if (book3e_htw_mode != PPC_HTW_NONE) { + unsigned long start = address & PMD_MASK; + unsigned long end = address + PMD_SIZE; + unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift; + + /* This isn't the most optimal, ideally we would factor out the + * while preempt & CPU mask mucking around, or even the IPI but + * it will do for now + */ + while (start < end) { + __flush_tlb_page(tlb->mm, start, tsize, 1); + start += size; + } + } else { + unsigned long rmask = 0xf000000000000000ul; + unsigned long rid = (address & rmask) | 0x1000000000000000ul; + unsigned long vpte = address & ~rmask; + + vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful; + vpte |= rid; + __flush_tlb_page(tlb->mm, vpte, tsize, 0); + } +} + +static void __init setup_page_sizes(void) +{ + unsigned int tlb0cfg; + unsigned int eptcfg; + int psize; + +#ifdef CONFIG_PPC_E500 + unsigned int mmucfg = mfspr(SPRN_MMUCFG); + int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E); + + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG); + unsigned int min_pg, max_pg; + + min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; + max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def; + unsigned int shift; + + def = &mmu_psize_defs[psize]; + shift = def->shift; + + if (shift == 0 || shift & 1) + continue; + + /* adjust to be in terms of 4^shift Kb */ + shift = (shift - 10) >> 1; + + if ((shift >= min_pg) && (shift <= max_pg)) + def->flags |= MMU_PAGE_SIZE_DIRECT; + } + + goto out; + } + + if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) { + u32 tlb1cfg, tlb1ps; + + tlb0cfg = mfspr(SPRN_TLB0CFG); + tlb1cfg = mfspr(SPRN_TLB1CFG); + tlb1ps = mfspr(SPRN_TLB1PS); + eptcfg = mfspr(SPRN_EPTCFG); + + if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT)) + book3e_htw_mode = PPC_HTW_E6500; + + /* + * We expect 4K subpage size and unrestricted indirect size. + * The lack of a restriction on indirect size is a Freescale + * extension, indicated by PSn = 0 but SPSn != 0. + */ + if (eptcfg != 2) + book3e_htw_mode = PPC_HTW_NONE; + + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + + if (!def->shift) + continue; + + if (tlb1ps & (1U << (def->shift - 10))) { + def->flags |= MMU_PAGE_SIZE_DIRECT; + + if (book3e_htw_mode && psize == MMU_PAGE_2M) + def->flags |= MMU_PAGE_SIZE_INDIRECT; + } + } + + goto out; + } +#endif +out: + /* Cleanup array and print summary */ + pr_info("MMU: Supported page sizes\n"); + for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) { + struct mmu_psize_def *def = &mmu_psize_defs[psize]; + const char *__page_type_names[] = { + "unsupported", + "direct", + "indirect", + "direct & indirect" + }; + if (def->flags == 0) { + def->shift = 0; + continue; + } + pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10), + __page_type_names[def->flags & 0x3]); + } +} + +static void __init setup_mmu_htw(void) +{ + /* + * If we want to use HW tablewalk, enable it by patching the TLB miss + * handlers to branch to the one dedicated to it. + */ + + switch (book3e_htw_mode) { +#ifdef CONFIG_PPC_E500 + case PPC_HTW_E6500: + extlb_level_exc = EX_TLB_SIZE; + patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); + patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); + break; +#endif + } + pr_info("MMU: Book3E HW tablewalk %s\n", + book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported"); +} + +/* + * Early initialization of the MMU TLB code + */ +static void early_init_this_mmu(void) +{ + unsigned int mas4; + + /* Set MAS4 based on page table setting */ + + mas4 = 0x4 << MAS4_WIMGED_SHIFT; + switch (book3e_htw_mode) { + case PPC_HTW_E6500: + mas4 |= MAS4_INDD; + mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT; + mas4 |= MAS4_TLBSELD(1); + mmu_pte_psize = MMU_PAGE_2M; + break; + + case PPC_HTW_NONE: + mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT; + mmu_pte_psize = mmu_virtual_psize; + break; + } + mtspr(SPRN_MAS4, mas4); + +#ifdef CONFIG_PPC_E500 + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned int num_cams; + bool map = true; + + /* use a quarter of the TLBCAM for bolted linear map */ + num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; + + /* + * Only do the mapping once per core, or else the + * transient mapping would cause problems. + */ +#ifdef CONFIG_SMP + if (hweight32(get_tensr()) > 1) + map = false; +#endif + + if (map) + linear_map_top = map_mem_in_cams(linear_map_top, + num_cams, false, true); + } +#endif + + /* A sync won't hurt us after mucking around with + * the MMU configuration + */ + mb(); +} + +static void __init early_init_mmu_global(void) +{ + /* XXX This should be decided at runtime based on supported + * page sizes in the TLB, but for now let's assume 16M is + * always there and a good fit (which it probably is) + * + * Freescale booke only supports 4K pages in TLB0, so use that. + */ + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) + mmu_vmemmap_psize = MMU_PAGE_4K; + else + mmu_vmemmap_psize = MMU_PAGE_16M; + + /* XXX This code only checks for TLB 0 capabilities and doesn't + * check what page size combos are supported by the HW. It + * also doesn't handle the case where a separate array holds + * the IND entries from the array loaded by the PT. + */ + /* Look for supported page sizes */ + setup_page_sizes(); + + /* Look for HW tablewalk support */ + setup_mmu_htw(); + +#ifdef CONFIG_PPC_E500 + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + if (book3e_htw_mode == PPC_HTW_NONE) { + extlb_level_exc = EX_TLB_SIZE; + patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); + patch_exception(0x1e0, + exc_instruction_tlb_miss_bolted_book3e); + } + } +#endif + + /* Set the global containing the top of the linear mapping + * for use by the TLB miss code + */ + linear_map_top = memblock_end_of_DRAM(); + + ioremap_bot = IOREMAP_BASE; +} + +static void __init early_mmu_set_memory_limit(void) +{ +#ifdef CONFIG_PPC_E500 + if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + /* + * Limit memory so we dont have linear faults. + * Unlike memblock_set_current_limit, which limits + * memory available during early boot, this permanently + * reduces the memory available to Linux. We need to + * do this because highmem is not supported on 64-bit. + */ + memblock_enforce_memory_limit(linear_map_top); + } +#endif + + memblock_set_current_limit(linear_map_top); +} + +/* boot cpu only */ +void __init early_init_mmu(void) +{ + early_init_mmu_global(); + early_init_this_mmu(); + early_mmu_set_memory_limit(); +} + +void early_init_mmu_secondary(void) +{ + early_init_this_mmu(); +} + +void setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) +{ + /* On non-FSL Embedded 64-bit, we adjust the RMA size to match + * the bolted TLB entry. We know for now that only 1G + * entries are supported though that may eventually + * change. + * + * on FSL Embedded 64-bit, usually all RAM is bolted, but with + * unusual memory sizes it's possible for some RAM to not be mapped + * (such RAM is not used at all by Linux, since we don't support + * highmem on 64-bit). We limit ppc64_rma_size to what would be + * mappable if this memblock is the only one. Additional memblocks + * can only increase, not decrease, the amount that ends up getting + * mapped. We still limit max to 1G even if we'll eventually map + * more. This is due to what the early init code is set up to do. + * + * We crop it to the size of the first MEMBLOCK to + * avoid going over total available memory just in case... + */ +#ifdef CONFIG_PPC_E500 + if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + unsigned long linear_sz; + unsigned int num_cams; + + /* use a quarter of the TLBCAM for bolted linear map */ + num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; + + linear_sz = map_mem_in_cams(first_memblock_size, num_cams, + true, true); + + ppc64_rma_size = min_t(u64, linear_sz, 0x40000000); + } else +#endif + ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000); + + /* Finally limit subsequent allocations */ + memblock_set_current_limit(first_memblock_base + ppc64_rma_size); +} From 547acc20e56792dda8a225d125ce4c26b16ae43d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 14:42:38 +0200 Subject: [PATCH 244/268] powerpc/64e: Define mmu_pte_psize static [ Upstream commit d92b5cc29c792f1d3f0aaa3b29dddfe816c03e88 ] mmu_pte_psize is only used in the tlb_64e.c, define it static. Fixes: 25d21ad6e799 ("powerpc: Add TLB management code for 64-bit Book3E") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202408011256.1O99IB0s-lkp@intel.com/ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/beb30d280eaa5d857c38a0834b147dffd6b28aa9.1724157750.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/mm/nohash/tlb_64e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c index 1dcda261554cc6..b6af3ec4d001dc 100644 --- a/arch/powerpc/mm/nohash/tlb_64e.c +++ b/arch/powerpc/mm/nohash/tlb_64e.c @@ -33,7 +33,7 @@ * though this will probably be made common with other nohash * implementations at some point */ -int mmu_pte_psize; /* Page size used for PTE pages */ +static int mmu_pte_psize; /* Page size used for PTE pages */ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ unsigned long linear_map_top; /* Top of linear mapping */ From af4d5630d99161c965f006f9aaef4dc2a19bd10c Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 20 Aug 2024 13:28:07 +0200 Subject: [PATCH 245/268] powerpc/vdso: Don't discard rela sections [ Upstream commit 6114139c3bdde992f4a19264e4f9bfc100d8d776 ] After building the VDSO, there is a verification that it contains no dynamic relocation, see commit aff69273af61 ("vdso: Improve cmd_vdso_check to check all dynamic relocations"). This verification uses readelf -r and doesn't work if rela sections are discarded. Fixes: 8ad57add77d3 ("powerpc/build: vdso linker warning for orphan sections") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/45c3e6fc76cad05ad2cac0f5b5dfb4fae86dc9d6.1724153239.git.christophe.leroy@csgroup.eu Signed-off-by: Sasha Levin --- arch/powerpc/kernel/vdso/vdso32.lds.S | 4 +++- arch/powerpc/kernel/vdso/vdso64.lds.S | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 426e1ccc6971a3..8f57107000a247 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -74,6 +74,8 @@ SECTIONS .got : { *(.got) } :text .plt : { *(.plt) } + .rela.dyn : { *(.rela .rela*) } + _end = .; __end = .; PROVIDE(end = .); @@ -87,7 +89,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.got1 .glink .iplt .rela*) + *(.got1 .glink .iplt) } } diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index bda6c8cdd459c0..400819258c06b7 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -69,7 +69,7 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .rela.dyn ALIGN(8) : { *(.rela .rela*) } .got ALIGN(8) : { *(.got .toc) } @@ -86,7 +86,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) *(.opd) - *(.glink .iplt .plt .rela*) + *(.glink .iplt .plt) } } From 585c598082e1935c73eee03e62dad69a639a49b0 Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Fri, 23 Aug 2024 14:43:42 +0000 Subject: [PATCH 246/268] ASoC: tegra: Fix CBB error during probe() [ Upstream commit 6781b962d97bc52715a8db8cc17278cc3c23ebe8 ] When Tegra audio drivers are built as part of the kernel image, TIMEOUT_ERR is observed from cbb-fabric. Following is seen on Jetson AGX Orin during boot: [ 8.012482] ************************************** [ 8.017423] CPU:0, Error:cbb-fabric, Errmon:2 [ 8.021922] Error Code : TIMEOUT_ERR [ 8.025966] Overflow : Multiple TIMEOUT_ERR [ 8.030644] [ 8.032175] Error Code : TIMEOUT_ERR [ 8.036217] MASTER_ID : CCPLEX [ 8.039722] Address : 0x290a0a8 [ 8.043318] Cache : 0x1 -- Bufferable [ 8.047630] Protection : 0x2 -- Unprivileged, Non-Secure, Data Access [ 8.054628] Access_Type : Write [ 8.106130] WARNING: CPU: 0 PID: 124 at drivers/soc/tegra/cbb/tegra234-cbb.c:604 tegra234_cbb_isr+0x134/0x178 [ 8.240602] Call trace: [ 8.243126] tegra234_cbb_isr+0x134/0x178 [ 8.247261] __handle_irq_event_percpu+0x60/0x238 [ 8.252132] handle_irq_event+0x54/0xb8 These errors happen when MVC device, which is a child of AHUB device, tries to access its device registers. This happens as part of call tegra210_mvc_reset_vol_settings() in MVC device probe(). The root cause of this problem is, the child MVC device gets probed before the AHUB clock gets enabled. The AHUB clock is enabled in runtime PM resume of parent AHUB device and due to the wrong sequence of pm_runtime_enable() in AHUB driver, runtime PM resume doesn't happen for AHUB device when MVC makes register access. Fix this by calling pm_runtime_enable() for parent AHUB device before of_platform_populate() in AHUB driver. This ensures that clock becomes available when MVC makes register access. Fixes: 16e1bcc2caf4 ("ASoC: tegra: Add Tegra210 based AHUB driver") Signed-off-by: Mohan Kumar Signed-off-by: Ritu Chaudhary Signed-off-by: Sameer Pujar Link: https://patch.msgid.link/20240823144342.4123814-3-spujar@nvidia.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/tegra/tegra210_ahub.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c index 3f114a2adfced0..ab3c6b2544d205 100644 --- a/sound/soc/tegra/tegra210_ahub.c +++ b/sound/soc/tegra/tegra210_ahub.c @@ -2,7 +2,7 @@ // // tegra210_ahub.c - Tegra210 AHUB driver // -// Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved. #include #include @@ -1391,11 +1391,13 @@ static int tegra_ahub_probe(struct platform_device *pdev) return err; } + pm_runtime_enable(&pdev->dev); + err = of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev); - if (err) + if (err) { + pm_runtime_disable(&pdev->dev); return err; - - pm_runtime_enable(&pdev->dev); + } return 0; } From 489f2913a63f528cfe3f21722583fb981967ecda Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 21 Aug 2024 16:28:26 +0200 Subject: [PATCH 247/268] nvmet-tcp: fix kernel crash if commands allocation fails [ Upstream commit 5572a55a6f830ee3f3a994b6b962a5c327d28cb3 ] If the commands allocation fails in nvmet_tcp_alloc_cmds() the kernel crashes in nvmet_tcp_release_queue_work() because of a NULL pointer dereference. nvmet: failed to install queue 0 cntlid 1 ret 6 Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Fix the bug by setting queue->nr_cmds to zero in case nvmet_tcp_alloc_cmd() fails. Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver") Signed-off-by: Maurizio Lombardi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/target/tcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c65a1f4421f603..bd142aed20f456 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1859,8 +1859,10 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) } queue->nr_cmds = sq->size * 2; - if (nvmet_tcp_alloc_cmds(queue)) + if (nvmet_tcp_alloc_cmds(queue)) { + queue->nr_cmds = 0; return NVME_SC_INTERNAL; + } return 0; } From 05500a48d80d13390b1a7666cabbdc5f10fec667 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Aug 2024 11:20:57 -0700 Subject: [PATCH 248/268] nvme-pci: allocate tagset on reset if necessary [ Upstream commit 6f01bdbfef3b62955cf6503a8425d527b3a5cf94 ] If a drive is unable to create IO queues on the initial probe, a subsequent reset will need to allocate the tagset if IO queue creation is successful. Without this, blk_mq_update_nr_hw_queues will crash on a bad pointer due to the invalid tagset. Fixes: eac3ef262941f62 ("nvme-pci: split the initial probe from the rest path") Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Sasha Levin --- drivers/nvme/host/pci.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 89e0798af780d0..7fc1ab4d9e7d85 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2471,6 +2471,12 @@ static unsigned int nvme_pci_nr_maps(struct nvme_dev *dev) static void nvme_pci_update_nr_queues(struct nvme_dev *dev) { + if (!dev->ctrl.tagset) { + nvme_alloc_io_tag_set(&dev->ctrl, &dev->tagset, &nvme_mq_ops, + nvme_pci_nr_maps(dev), sizeof(struct nvme_iod)); + return; + } + blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); /* free previously allocated queues that are no longer usable */ nvme_free_queues(dev, dev->online_queues); From f39bde3f782570f44794ed9b0075d1e9057f849c Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Wed, 21 Aug 2024 12:10:04 +0800 Subject: [PATCH 249/268] ASoc: SOF: topology: Clear SOF link platform name upon unload [ Upstream commit e0be875c5bf03a9676a6bfed9e0f1766922a7dbd ] The SOF topology loading function sets the device name for the platform component link. This should be unset when unloading the topology, otherwise a machine driver unbind/bind or reprobe would complain about an invalid component as having both its component name and of_node set: mt8186_mt6366 sound: ASoC: Both Component name/of_node are set for AFE_SOF_DL1 mt8186_mt6366 sound: error -EINVAL: Cannot register card mt8186_mt6366 sound: probe with driver mt8186_mt6366 failed with error -22 This happens with machine drivers that set the of_node separately. Clear the SOF link platform name in the topology unload callback. Fixes: 311ce4fe7637 ("ASoC: SOF: Add support for loading topologies") Signed-off-by: Chen-Yu Tsai Link: https://patch.msgid.link/20240821041006.2618855-1-wenst@chromium.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sof/topology.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 7133ec13322b3f..cf1e63daad86bf 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -2040,6 +2040,8 @@ static int sof_link_unload(struct snd_soc_component *scomp, struct snd_soc_dobj if (!slink) return 0; + slink->link->platforms->name = NULL; + kfree(slink->tuples); list_del(&slink->list); kfree(slink->hw_configs); From b9bb9634368e2d6f1c87da9a08d201e089d5ec30 Mon Sep 17 00:00:00 2001 From: Matteo Martelli Date: Thu, 1 Aug 2024 14:07:19 +0200 Subject: [PATCH 250/268] ASoC: sunxi: sun4i-i2s: fix LRCLK polarity in i2s mode [ Upstream commit 3e83957e8dd7433a69116780d9bad217b00913ea ] This fixes the LRCLK polarity for sun8i-h3 and sun50i-h6 in i2s mode which was wrongly inverted. The LRCLK was being set in reversed logic compared to the DAI format: inverted LRCLK for SND_SOC_DAIFMT_IB_NF and SND_SOC_DAIFMT_NB_NF; normal LRCLK for SND_SOC_DAIFMT_IB_IF and SND_SOC_DAIFMT_NB_IF. Such reversed logic applies properly for DSP_A, DSP_B, LEFT_J and RIGHT_J modes but not for I2S mode, for which the LRCLK signal results reversed to what expected on the bus. The issue is due to a misinterpretation of the LRCLK polarity bit of the H3 and H6 i2s controllers. Such bit in this case does not mean "0 => normal" or "1 => inverted" according to the expected bus operation, but it means "0 => frame starts on low edge" and "1 => frame starts on high edge" (from the User Manuals). This commit fixes the LRCLK polarity by setting the LRCLK polarity bit according to the selected bus mode and renames the LRCLK polarity bit definition to avoid further confusion. Fixes: dd657eae8164 ("ASoC: sun4i-i2s: Fix the LRCK polarity") Fixes: 73adf87b7a58 ("ASoC: sun4i-i2s: Add support for H6 I2S") Signed-off-by: Matteo Martelli Link: https://patch.msgid.link/20240801-asoc-fix-sun4i-i2s-v2-1-a8e4e9daa363@gmail.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/sunxi/sun4i-i2s.c | 143 ++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 70 deletions(-) diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index 5124b6c9ceb4b9..d1cb49d54f0084 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -100,8 +100,8 @@ #define SUN8I_I2S_CTRL_MODE_PCM (0 << 4) #define SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK BIT(19) -#define SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED (1 << 19) -#define SUN8I_I2S_FMT0_LRCLK_POLARITY_NORMAL (0 << 19) +#define SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH (1 << 19) +#define SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW (0 << 19) #define SUN8I_I2S_FMT0_LRCK_PERIOD_MASK GENMASK(17, 8) #define SUN8I_I2S_FMT0_LRCK_PERIOD(period) ((period - 1) << 8) #define SUN8I_I2S_FMT0_BCLK_POLARITY_MASK BIT(7) @@ -727,65 +727,37 @@ static int sun4i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, unsigned int fmt) { - u32 mode, val; + u32 mode, lrclk_pol, bclk_pol, val; u8 offset; - /* - * DAI clock polarity - * - * The setup for LRCK contradicts the datasheet, but under a - * scope it's clear that the LRCK polarity is reversed - * compared to the expected polarity on the bus. - */ - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_IB_IF: - /* Invert both clocks */ - val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; - break; - case SND_SOC_DAIFMT_IB_NF: - /* Invert bit clock */ - val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED | - SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED; - break; - case SND_SOC_DAIFMT_NB_IF: - /* Invert frame clock */ - val = 0; - break; - case SND_SOC_DAIFMT_NB_NF: - val = SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED; - break; - default: - return -EINVAL; - } - - regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG, - SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK | - SUN8I_I2S_FMT0_BCLK_POLARITY_MASK, - val); - /* DAI Mode */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_DSP_A: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_PCM; offset = 1; break; case SND_SOC_DAIFMT_DSP_B: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_PCM; offset = 0; break; case SND_SOC_DAIFMT_I2S: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW; mode = SUN8I_I2S_CTRL_MODE_LEFT; offset = 1; break; case SND_SOC_DAIFMT_LEFT_J: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_LEFT; offset = 0; break; case SND_SOC_DAIFMT_RIGHT_J: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_RIGHT; offset = 0; break; @@ -803,6 +775,35 @@ static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, SUN8I_I2S_TX_CHAN_OFFSET_MASK, SUN8I_I2S_TX_CHAN_OFFSET(offset)); + /* DAI clock polarity */ + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_NORMAL; + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_IB_IF: + /* Invert both clocks */ + lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK; + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; + break; + case SND_SOC_DAIFMT_IB_NF: + /* Invert bit clock */ + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; + break; + case SND_SOC_DAIFMT_NB_IF: + /* Invert frame clock */ + lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK; + break; + case SND_SOC_DAIFMT_NB_NF: + /* No inversion */ + break; + default: + return -EINVAL; + } + + regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG, + SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK | + SUN8I_I2S_FMT0_BCLK_POLARITY_MASK, + lrclk_pol | bclk_pol); + /* DAI clock master masks */ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { case SND_SOC_DAIFMT_BP_FP: @@ -834,65 +835,37 @@ static int sun8i_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, static int sun50i_h6_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, unsigned int fmt) { - u32 mode, val; + u32 mode, lrclk_pol, bclk_pol, val; u8 offset; - /* - * DAI clock polarity - * - * The setup for LRCK contradicts the datasheet, but under a - * scope it's clear that the LRCK polarity is reversed - * compared to the expected polarity on the bus. - */ - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_IB_IF: - /* Invert both clocks */ - val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; - break; - case SND_SOC_DAIFMT_IB_NF: - /* Invert bit clock */ - val = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED | - SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED; - break; - case SND_SOC_DAIFMT_NB_IF: - /* Invert frame clock */ - val = 0; - break; - case SND_SOC_DAIFMT_NB_NF: - val = SUN8I_I2S_FMT0_LRCLK_POLARITY_INVERTED; - break; - default: - return -EINVAL; - } - - regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG, - SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK | - SUN8I_I2S_FMT0_BCLK_POLARITY_MASK, - val); - /* DAI Mode */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_DSP_A: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_PCM; offset = 1; break; case SND_SOC_DAIFMT_DSP_B: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_PCM; offset = 0; break; case SND_SOC_DAIFMT_I2S: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_LOW; mode = SUN8I_I2S_CTRL_MODE_LEFT; offset = 1; break; case SND_SOC_DAIFMT_LEFT_J: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_LEFT; offset = 0; break; case SND_SOC_DAIFMT_RIGHT_J: + lrclk_pol = SUN8I_I2S_FMT0_LRCLK_POLARITY_START_HIGH; mode = SUN8I_I2S_CTRL_MODE_RIGHT; offset = 0; break; @@ -910,6 +883,36 @@ static int sun50i_h6_i2s_set_soc_fmt(const struct sun4i_i2s *i2s, SUN50I_H6_I2S_TX_CHAN_SEL_OFFSET_MASK, SUN50I_H6_I2S_TX_CHAN_SEL_OFFSET(offset)); + /* DAI clock polarity */ + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_NORMAL; + + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_IB_IF: + /* Invert both clocks */ + lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK; + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; + break; + case SND_SOC_DAIFMT_IB_NF: + /* Invert bit clock */ + bclk_pol = SUN8I_I2S_FMT0_BCLK_POLARITY_INVERTED; + break; + case SND_SOC_DAIFMT_NB_IF: + /* Invert frame clock */ + lrclk_pol ^= SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK; + break; + case SND_SOC_DAIFMT_NB_NF: + /* No inversion */ + break; + default: + return -EINVAL; + } + + regmap_update_bits(i2s->regmap, SUN4I_I2S_FMT0_REG, + SUN8I_I2S_FMT0_LRCLK_POLARITY_MASK | + SUN8I_I2S_FMT0_BCLK_POLARITY_MASK, + lrclk_pol | bclk_pol); + + /* DAI clock master masks */ switch (fmt & SND_SOC_DAIFMT_CLOCK_PROVIDER_MASK) { case SND_SOC_DAIFMT_BP_FP: From a5e871d26ba182c2eb245761e90f6a8e8bad3001 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 19 Aug 2024 16:36:26 -0700 Subject: [PATCH 251/268] clk: qcom: gcc-sm8550: Don't use parking clk_ops for QUPs [ Upstream commit d10eeb75168b84ed9559c58efe2756c2e0bc052a ] The QUPs aren't shared in a way that requires parking the RCG at an always on parent in case some other entity turns on the clk. The hardware is capable of setting a new frequency itself with the DFS mode, so parking is unnecessary. Furthermore, there aren't any GDSCs for these devices, so there isn't a possibility of the GDSC turning on the clks for housekeeping purposes. This wasn't a problem to mark these clks shared until we started parking shared RCGs at clk registration time in commit 01a0a6cc8cfd ("clk: qcom: Park shared RCGs upon registration"). Parking at init is actually harmful to the UART when earlycon is used. If the device is pumping out data while the frequency changes you'll see garbage on the serial console until the driver can probe and actually set a proper frequency. Revert the QUP part of commit 929c75d57566 ("clk: qcom: gcc-sm8550: Mark RCGs shared where applicable") so that the QUPs don't get parked during clk registration and break UART operations. Fixes: 01a0a6cc8cfd ("clk: qcom: Park shared RCGs upon registration") Fixes: 929c75d57566 ("clk: qcom: gcc-sm8550: Mark RCGs shared where applicable") Cc: Konrad Dybcio Cc: Bjorn Andersson Cc: Taniya Das Reported-by: Amit Pundir Closes: https://lore.kernel.org/CAMi1Hd1KQBE4kKUdAn8E5FV+BiKzuv+8FoyWQrrTHPDoYTuhgA@mail.gmail.com Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240819233628.2074654-2-swboyd@chromium.org Tested-by: Amit Pundir Tested-by: Neil Armstrong # on SM8550-QRD Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/gcc-sm8550.c | 52 +++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/drivers/clk/qcom/gcc-sm8550.c b/drivers/clk/qcom/gcc-sm8550.c index b883dffe5f7aaa..bf7b7c5d4606c8 100644 --- a/drivers/clk/qcom/gcc-sm8550.c +++ b/drivers/clk/qcom/gcc-sm8550.c @@ -536,7 +536,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s0_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -551,7 +551,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s1_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -566,7 +566,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s2_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -581,7 +581,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s3_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -596,7 +596,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s4_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -611,7 +611,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s5_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -626,7 +626,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s6_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -641,7 +641,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s7_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -656,7 +656,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s8_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -671,7 +671,7 @@ static struct clk_rcg2 gcc_qupv3_i2c_s9_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }, }; @@ -700,7 +700,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s0_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s0_clk_src = { @@ -717,7 +717,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s1_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s1_clk_src = { @@ -750,7 +750,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s2_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s2_clk_src = { @@ -767,7 +767,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s3_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s3_clk_src = { @@ -784,7 +784,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s4_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s4_clk_src = { @@ -801,7 +801,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s5_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s5_clk_src = { @@ -818,7 +818,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s6_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s6_clk_src = { @@ -835,7 +835,7 @@ static struct clk_init_data gcc_qupv3_wrap1_s7_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap1_s7_clk_src = { @@ -852,7 +852,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s0_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s0_clk_src = { @@ -869,7 +869,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s1_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s1_clk_src = { @@ -886,7 +886,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s2_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s2_clk_src = { @@ -903,7 +903,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s3_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s3_clk_src = { @@ -920,7 +920,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s4_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s4_clk_src = { @@ -937,7 +937,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s5_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s5_clk_src = { @@ -975,7 +975,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s6_clk_src_init = { .parent_data = gcc_parent_data_8, .num_parents = ARRAY_SIZE(gcc_parent_data_8), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s6_clk_src = { @@ -992,7 +992,7 @@ static struct clk_init_data gcc_qupv3_wrap2_s7_clk_src_init = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_ops, }; static struct clk_rcg2 gcc_qupv3_wrap2_s7_clk_src = { From 7c391eaf2c632554d273522da2dc66a94e93512c Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 19 Aug 2024 16:36:27 -0700 Subject: [PATCH 252/268] clk: qcom: gcc-sm8550: Don't park the USB RCG at registration time [ Upstream commit 7b6dfa1bbe7f727315d2e05a2fc8e4cfeb779156 ] Amit Pundir reports that audio and USB-C host mode stops working if the gcc_usb30_prim_master_clk_src clk is registered and clk_rcg2_shared_init() parks it on XO. Skip parking this clk at registration time to fix those issues. Partially revert commit 01a0a6cc8cfd ("clk: qcom: Park shared RCGs upon registration") by skipping the parking bit for this clk, but keep the part where we cache the config register. That's still necessary to figure out the true parent of the clk at registration time. Fixes: 01a0a6cc8cfd ("clk: qcom: Park shared RCGs upon registration") Fixes: 929c75d57566 ("clk: qcom: gcc-sm8550: Mark RCGs shared where applicable") Cc: Konrad Dybcio Cc: Bjorn Andersson Cc: Taniya Das Reported-by: Amit Pundir Closes: https://lore.kernel.org/CAMi1Hd1KQBE4kKUdAn8E5FV+BiKzuv+8FoyWQrrTHPDoYTuhgA@mail.gmail.com Signed-off-by: Stephen Boyd Link: https://lore.kernel.org/r/20240819233628.2074654-3-swboyd@chromium.org Tested-by: Amit Pundir Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/qcom/clk-rcg.h | 1 + drivers/clk/qcom/clk-rcg2.c | 30 ++++++++++++++++++++++++++++++ drivers/clk/qcom/gcc-sm8550.c | 2 +- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/clk/qcom/clk-rcg.h b/drivers/clk/qcom/clk-rcg.h index e6d84c8c7989c8..84c497f361bc6b 100644 --- a/drivers/clk/qcom/clk-rcg.h +++ b/drivers/clk/qcom/clk-rcg.h @@ -176,6 +176,7 @@ extern const struct clk_ops clk_byte2_ops; extern const struct clk_ops clk_pixel_ops; extern const struct clk_ops clk_gfx3d_ops; extern const struct clk_ops clk_rcg2_shared_ops; +extern const struct clk_ops clk_rcg2_shared_no_init_park_ops; extern const struct clk_ops clk_dp_ops; struct clk_rcg_dfs_data { diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b9f2a29be927c1..461f54fe5e4f1f 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -1182,6 +1182,36 @@ const struct clk_ops clk_rcg2_shared_ops = { }; EXPORT_SYMBOL_GPL(clk_rcg2_shared_ops); +static int clk_rcg2_shared_no_init_park(struct clk_hw *hw) +{ + struct clk_rcg2 *rcg = to_clk_rcg2(hw); + + /* + * Read the config register so that the parent is properly mapped at + * registration time. + */ + regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, &rcg->parked_cfg); + + return 0; +} + +/* + * Like clk_rcg2_shared_ops but skip the init so that the clk frequency is left + * unchanged at registration time. + */ +const struct clk_ops clk_rcg2_shared_no_init_park_ops = { + .init = clk_rcg2_shared_no_init_park, + .enable = clk_rcg2_shared_enable, + .disable = clk_rcg2_shared_disable, + .get_parent = clk_rcg2_shared_get_parent, + .set_parent = clk_rcg2_shared_set_parent, + .recalc_rate = clk_rcg2_shared_recalc_rate, + .determine_rate = clk_rcg2_determine_rate, + .set_rate = clk_rcg2_shared_set_rate, + .set_rate_and_parent = clk_rcg2_shared_set_rate_and_parent, +}; +EXPORT_SYMBOL_GPL(clk_rcg2_shared_no_init_park_ops); + /* Common APIs to be used for DFS based RCGR */ static void clk_rcg2_dfs_populate_freq(struct clk_hw *hw, unsigned int l, struct freq_tbl *f) diff --git a/drivers/clk/qcom/gcc-sm8550.c b/drivers/clk/qcom/gcc-sm8550.c index bf7b7c5d4606c8..eb3765c57b6502 100644 --- a/drivers/clk/qcom/gcc-sm8550.c +++ b/drivers/clk/qcom/gcc-sm8550.c @@ -1159,7 +1159,7 @@ static struct clk_rcg2 gcc_usb30_prim_master_clk_src = { .parent_data = gcc_parent_data_0, .num_parents = ARRAY_SIZE(gcc_parent_data_0), .flags = CLK_SET_RATE_PARENT, - .ops = &clk_rcg2_shared_ops, + .ops = &clk_rcg2_shared_no_init_park_ops, }, }; From a65ebba8733727ffd9d0de78899ea6ef1791ebc7 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Aug 2024 18:58:37 +0300 Subject: [PATCH 253/268] drm/i915/fence: Mark debug_fence_init_onstack() with __maybe_unused [ Upstream commit fcd9e8afd546f6ced378d078345a89bf346d065e ] When debug_fence_init_onstack() is unused (CONFIG_DRM_I915_SELFTEST=n), it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y: .../i915_sw_fence.c:97:20: error: unused function 'debug_fence_init_onstack' [-Werror,-Wunused-function] 97 | static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) | ^~~~~~~~~~~~~~~~~~~~~~~~ Fix this by marking debug_fence_init_onstack() with __maybe_unused. See also commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"). Fixes: 214707fc2ce0 ("drm/i915/selftests: Wrap a timer into a i915_sw_fence") Signed-off-by: Andy Shevchenko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240829155950.1141978-2-andriy.shevchenko@linux.intel.com Signed-off-by: Jani Nikula (cherry picked from commit 5bf472058ffb43baf6a4cdfe1d7f58c4c194c688) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/i915_sw_fence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 8a9aad523eec2c..d4020ff3549a65 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -51,7 +51,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) debug_object_init(fence, &i915_sw_fence_debug_descr); } -static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence) { debug_object_init_on_stack(fence, &i915_sw_fence_debug_descr); } @@ -94,7 +94,7 @@ static inline void debug_fence_init(struct i915_sw_fence *fence) { } -static inline void debug_fence_init_onstack(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_init_onstack(struct i915_sw_fence *fence) { } From 60d54a45dbbbac8af9f3352042bd30b527995aef Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 29 Aug 2024 18:58:38 +0300 Subject: [PATCH 254/268] drm/i915/fence: Mark debug_fence_free() with __maybe_unused [ Upstream commit f99999536128b14b5d765a9982763b5134efdd79 ] When debug_fence_free() is unused (CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS=n), it prevents kernel builds with clang, `make W=1` and CONFIG_WERROR=y: .../i915_sw_fence.c:118:20: error: unused function 'debug_fence_free' [-Werror,-Wunused-function] 118 | static inline void debug_fence_free(struct i915_sw_fence *fence) | ^~~~~~~~~~~~~~~~ Fix this by marking debug_fence_free() with __maybe_unused. See also commit 6863f5643dd7 ("kbuild: allow Clang to find unused static inline functions for W=1 build"). Fixes: fc1584059d6c ("drm/i915: Integrate i915_sw_fence with debugobjects") Signed-off-by: Andy Shevchenko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240829155950.1141978-3-andriy.shevchenko@linux.intel.com Signed-off-by: Jani Nikula (cherry picked from commit 8be4dce5ea6f2368cc25edc71989c4690fa66964) Signed-off-by: Joonas Lahtinen Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/i915_sw_fence.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index d4020ff3549a65..1d4cc91c0e40d5 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -77,7 +77,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence) debug_object_destroy(fence, &i915_sw_fence_debug_descr); } -static inline void debug_fence_free(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence) { debug_object_free(fence, &i915_sw_fence_debug_descr); smp_wmb(); /* flush the change in state before reallocation */ @@ -115,7 +115,7 @@ static inline void debug_fence_destroy(struct i915_sw_fence *fence) { } -static inline void debug_fence_free(struct i915_sw_fence *fence) +static inline __maybe_unused void debug_fence_free(struct i915_sw_fence *fence) { } From 9ceae54e65a39d456160fe6dfe1b13a475b88834 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 26 Aug 2024 17:08:32 +0200 Subject: [PATCH 255/268] gpio: rockchip: fix OF node leak in probe() [ Upstream commit adad2e460e505a556f5ea6f0dc16fe95e62d5d76 ] Driver code is leaking OF node reference from of_get_parent() in probe(). Fixes: 936ee2675eee ("gpio/rockchip: add driver for rockchip gpio") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Heiko Stuebner Reviewed-by: Shawn Lin Link: https://lore.kernel.org/r/20240826150832.65657-1-krzysztof.kozlowski@linaro.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-rockchip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index b35b9604413f08..caeb3bdc78f8db 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -713,6 +713,7 @@ static int rockchip_gpio_probe(struct platform_device *pdev) return -ENODEV; pctldev = of_pinctrl_get(pctlnp); + of_node_put(pctlnp); if (!pctldev) return -EPROBE_DEFER; From 52b688c8087b3fb1edd547b18a7cd9c80279212e Mon Sep 17 00:00:00 2001 From: Liao Chen Date: Mon, 2 Sep 2024 11:58:48 +0000 Subject: [PATCH 256/268] gpio: modepin: Enable module autoloading [ Upstream commit a5135526426df5319d5f4bcd15ae57c45a97714b ] Add MODULE_DEVICE_TABLE(), so modules could be properly autoloaded based on the alias from of_device_id table. Fixes: 7687a5b0ee93 ("gpio: modepin: Add driver support for modepin GPIO controller") Signed-off-by: Liao Chen Reviewed-by: Michal Simek Link: https://lore.kernel.org/r/20240902115848.904227-1-liaochen4@huawei.com Signed-off-by: Bartosz Golaszewski Signed-off-by: Sasha Levin --- drivers/gpio/gpio-zynqmp-modepin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-zynqmp-modepin.c b/drivers/gpio/gpio-zynqmp-modepin.c index a0d69387c1532d..2f3c9ebfa78d1d 100644 --- a/drivers/gpio/gpio-zynqmp-modepin.c +++ b/drivers/gpio/gpio-zynqmp-modepin.c @@ -146,6 +146,7 @@ static const struct of_device_id modepin_platform_id[] = { { .compatible = "xlnx,zynqmp-gpio-modepin", }, { } }; +MODULE_DEVICE_TABLE(of, modepin_platform_id); static struct platform_driver modepin_platform_driver = { .driver = { From b27ea9c96efd2c252a981fb00d0f001b86c90f3e Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Tue, 3 Sep 2024 10:53:23 -0300 Subject: [PATCH 257/268] smb: client: fix double put of @cfile in smb2_rename_path() [ Upstream commit 3523a3df03c6f04f7ea9c2e7050102657e331a4f ] If smb2_set_path_attr() is called with a valid @cfile and returned -EINVAL, we need to call cifs_get_writable_path() again as the reference of @cfile was already dropped by previous smb2_compound_op() call. Fixes: 71f15c90e785 ("smb: client: retry compound request without reusing lease") Signed-off-by: Paulo Alcantara (Red Hat) Cc: David Howells Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/smb/client/smb2inode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index fc82d5ebf923cf..dd8acd20775219 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -1106,6 +1106,8 @@ int smb2_rename_path(const unsigned int xid, co, DELETE, SMB2_OP_RENAME, cfile, source_dentry); if (rc == -EINVAL) { cifs_dbg(FYI, "invalid lease key, resending request without lease"); + cifs_get_writable_path(tcon, from_name, + FIND_WR_WITH_DELETE, &cfile); rc = smb2_set_path_attr(xid, tcon, from_name, to_name, cifs_sb, co, DELETE, SMB2_OP_RENAME, cfile, NULL); } From 8289dc916e9ef8b9bd1d8dc6b4ccaaa27c6ee7dc Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 19 Aug 2024 00:11:31 +0000 Subject: [PATCH 258/268] riscv: Fix toolchain vector detection [ Upstream commit 5ba7a75a53dffbf727e842b5847859bb482ac4aa ] A recent change to gcc flags rv64iv as no longer valid: cc1: sorry, unimplemented: Currently the 'V' implementation requires the 'M' extension and as a result vector support is disabled. Fix this by adding m to our toolchain vector detection code. Signed-off-by: Anton Blanchard Fixes: fa8e7cce55da ("riscv: Enable Vector code to be built") Link: https://lore.kernel.org/r/20240819001131.1738806-1-antonb@tenstorrent.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index c785a020057381..d5d70dc5656ebe 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -489,8 +489,8 @@ config RISCV_ISA_SVPBMT config TOOLCHAIN_HAS_V bool default y - depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64iv) - depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32iv) + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64imv) + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32imv) depends on LLD_VERSION >= 140000 || LD_VERSION >= 23800 depends on AS_HAS_OPTION_ARCH From bd29d8452079085a1d91427ac81ceeddf7940279 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Tue, 27 Aug 2024 08:52:30 +0200 Subject: [PATCH 259/268] riscv: Do not restrict memory size because of linear mapping on nommu [ Upstream commit 5f771088a2b5edd6f2c5c9f34484ca18dc389f3e ] It makes no sense to restrict physical memory size because of linear mapping size constraints when there is no linear mapping, so only do that when mmu is enabled. Reported-by: Geert Uytterhoeven Closes: https://lore.kernel.org/linux-riscv/CAMuHMdW0bnJt5GMRtOZGkTiM7GK4UaLJCDMF_Ouq++fnDKi3_A@mail.gmail.com/ Fixes: 3b6564427aea ("riscv: Fix linear mapping checks for non-contiguous memory regions") Signed-off-by: Alexandre Ghiti Tested-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240827065230.145021-1-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index abe7a7a7686c17..3245bb525212e3 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -235,7 +235,7 @@ static void __init setup_bootmem(void) * The size of the linear page mapping may restrict the amount of * usable RAM. */ - if (IS_ENABLED(CONFIG_64BIT)) { + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) { max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; memblock_cap_memory_range(phys_ram_base, max_mapped_addr - phys_ram_base); From 136a29d8112df4ea0a57f9602ddf3579e04089dc Mon Sep 17 00:00:00 2001 From: Li Nan Date: Wed, 4 Sep 2024 11:13:48 +0800 Subject: [PATCH 260/268] ublk_drv: fix NULL pointer dereference in ublk_ctrl_start_recovery() [ Upstream commit e58f5142f88320a5b1449f96a146f2f24615c5c7 ] When two UBLK_CMD_START_USER_RECOVERY commands are submitted, the first one sets 'ubq->ubq_daemon' to NULL, and the second one triggers WARN in ublk_queue_reinit() and subsequently a NULL pointer dereference issue. Fix it by adding the check in ublk_ctrl_start_recovery() and return immediately in case of zero 'ub->nr_queues_ready'. BUG: kernel NULL pointer dereference, address: 0000000000000028 RIP: 0010:ublk_ctrl_start_recovery.constprop.0+0x82/0x180 Call Trace: ? __die+0x20/0x70 ? page_fault_oops+0x75/0x170 ? exc_page_fault+0x64/0x140 ? asm_exc_page_fault+0x22/0x30 ? ublk_ctrl_start_recovery.constprop.0+0x82/0x180 ublk_ctrl_uring_cmd+0x4f7/0x6c0 ? pick_next_task_idle+0x26/0x40 io_uring_cmd+0x9a/0x1b0 io_issue_sqe+0x193/0x3f0 io_wq_submit_work+0x9b/0x390 io_worker_handle_work+0x165/0x360 io_wq_worker+0xcb/0x2f0 ? finish_task_switch.isra.0+0x203/0x290 ? finish_task_switch.isra.0+0x203/0x290 ? __pfx_io_wq_worker+0x10/0x10 ret_from_fork+0x2d/0x50 ? __pfx_io_wq_worker+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Fixes: c732a852b419 ("ublk_drv: add START_USER_RECOVERY and END_USER_RECOVERY support") Reported-and-tested-by: Changhui Zhong Closes: https://lore.kernel.org/all/CAGVVp+UvLiS+bhNXV-h2icwX1dyybbYHeQUuH7RYqUvMQf6N3w@mail.gmail.com Reviewed-by: Ming Lei Signed-off-by: Li Nan Link: https://lore.kernel.org/r/20240904031348.4139545-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- drivers/block/ublk_drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index f4e0573c471149..bf7f68e90953bc 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2603,6 +2603,8 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub, mutex_lock(&ub->mutex); if (!ublk_can_use_recovery(ub)) goto out_unlock; + if (!ub->nr_queues_ready) + goto out_unlock; /* * START_RECOVERY is only allowd after: * From a2977c0ca3e97dc4c1665f27117810902fdeb0a9 Mon Sep 17 00:00:00 2001 From: Andrea Parri Date: Wed, 31 Jan 2024 15:49:33 +0100 Subject: [PATCH 261/268] membarrier: riscv: Add full memory barrier in switch_mm() commit d6cfd1770f20392d7009ae1fdb04733794514fa9 upstream. The membarrier system call requires a full memory barrier after storing to rq->curr, before going back to user-space. The barrier is only needed when switching between processes: the barrier is implied by mmdrop() when switching from kernel to userspace, and it's not needed when switching from userspace to kernel. Rely on the feature/mechanism ARCH_HAS_MEMBARRIER_CALLBACKS and on the primitive membarrier_arch_switch_mm(), already adopted by the PowerPC architecture, to insert the required barrier. Fixes: fab957c11efe2f ("RISC-V: Atomic and Locking Code") Signed-off-by: Andrea Parri Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20240131144936.29190-2-parri.andrea@gmail.com Signed-off-by: Palmer Dabbelt Signed-off-by: WangYuli Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 2 +- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/membarrier.h | 31 +++++++++++++++++++++++++++++ arch/riscv/mm/context.c | 2 ++ kernel/sched/core.c | 5 +++-- 5 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/include/asm/membarrier.h diff --git a/MAINTAINERS b/MAINTAINERS index f09415b2b3c5cf..ae4c0cec507360 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13702,7 +13702,7 @@ M: Mathieu Desnoyers M: "Paul E. McKenney" L: linux-kernel@vger.kernel.org S: Supported -F: arch/powerpc/include/asm/membarrier.h +F: arch/*/include/asm/membarrier.h F: include/uapi/linux/membarrier.h F: kernel/sched/membarrier.c diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d5d70dc5656ebe..fe30b24d52e189 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -27,6 +27,7 @@ config RISCV select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_MMIOWB select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PMEM_API diff --git a/arch/riscv/include/asm/membarrier.h b/arch/riscv/include/asm/membarrier.h new file mode 100644 index 00000000000000..6c016ebb5020af --- /dev/null +++ b/arch/riscv/include/asm/membarrier.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _ASM_RISCV_MEMBARRIER_H +#define _ASM_RISCV_MEMBARRIER_H + +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, + struct task_struct *tsk) +{ + /* + * Only need the full barrier when switching between processes. + * Barrier when switching from kernel to userspace is not + * required here, given that it is implied by mmdrop(). Barrier + * when switching from userspace to kernel is not needed after + * store to rq->curr. + */ + if (IS_ENABLED(CONFIG_SMP) && + likely(!(atomic_read(&next->membarrier_state) & + (MEMBARRIER_STATE_PRIVATE_EXPEDITED | + MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev)) + return; + + /* + * The membarrier system call requires a full memory barrier + * after storing to rq->curr, before going back to user-space. + * Matches a full barrier in the proximity of the membarrier + * system call entry. + */ + smp_mb(); +} + +#endif /* _ASM_RISCV_MEMBARRIER_H */ diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 217fd4de613422..ba8eb3944687cf 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -323,6 +323,8 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, if (unlikely(prev == next)) return; + membarrier_arch_switch_mm(prev, next, task); + /* * Mark the current MM context as inactive, and the next as * active. This is at least used by the icache flushing diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 97571d390f1848..9b406d9886541b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6679,8 +6679,9 @@ static void __sched notrace __schedule(unsigned int sched_mode) * * Here are the schemes providing that barrier on the * various architectures: - * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. - * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. + * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC, + * RISC-V. switch_mm() relies on membarrier_arch_switch_mm() + * on PowerPC and on RISC-V. * - finish_lock_switch() for weakly-ordered * architectures where spin_unlock is a full barrier, * - switch_to() for arm64 (weakly-ordered, spin_unlock From 8eeda5fb5938a20b8dda767c629e29f867cc061d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 6 Aug 2024 20:48:43 +0200 Subject: [PATCH 262/268] x86/mm: Fix PTI for i386 some more commit c48b5a4cf3125adb679e28ef093f66ff81368d05 upstream. So it turns out that we have to do two passes of pti_clone_entry_text(), once before initcalls, such that device and late initcalls can use user-mode-helper / modprobe and once after free_initmem() / mark_readonly(). Now obviously mark_readonly() can cause PMD splits, and pti_clone_pgtable() doesn't like that much. Allow the late clone to split PMDs so that pagetables stay in sync. [peterz: Changelog and comments] Reported-by: Guenter Roeck Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Tested-by: Guenter Roeck Link: https://lkml.kernel.org/r/20240806184843.GX37996@noisy.programming.kicks-ass.net Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/pti.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 41d8c8f475a7cc..83a6bdf0b498ef 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!pmd) return NULL; - /* We can't do anything sensible if we hit a large mapping. */ + /* Large PMD mapping found */ if (pmd_large(*pmd)) { - WARN_ON(1); - return NULL; + /* Clear the PMD if we hit a large mapping from the first round */ + if (late_text) { + set_pmd(pmd, __pmd(0)); + } else { + WARN_ON_ONCE(1); + return NULL; + } } if (pmd_none(*pmd)) { @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void) if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) return; - target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false); if (WARN_ON(!target_pte)) return; @@ -301,7 +306,7 @@ enum pti_clone_level { static void pti_clone_pgtable(unsigned long start, unsigned long end, - enum pti_clone_level level) + enum pti_clone_level level, bool late_text) { unsigned long addr; @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, return; /* Allocate PTE in the user page-table */ - target_pte = pti_user_pagetable_walk_pte(addr); + target_pte = pti_user_pagetable_walk_pte(addr, late_text); if (WARN_ON(!target_pte)) return; @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void) phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); pte_t *target_pte; - target_pte = pti_user_pagetable_walk_pte(va); + target_pte = pti_user_pagetable_walk_pte(va, false); if (WARN_ON(!target_pte)) return; @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void) start = CPU_ENTRY_AREA_BASE; end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); - pti_clone_pgtable(start, end, PTI_CLONE_PMD); + pti_clone_pgtable(start, end, PTI_CLONE_PMD, false); } #endif /* CONFIG_X86_64 */ @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry text and force it RO. */ -static void pti_clone_entry_text(void) +static void pti_clone_entry_text(bool late) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_LEVEL_KERNEL_IMAGE); + PTI_LEVEL_KERNEL_IMAGE, late); } /* @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void) * pti_set_kernel_image_nonglobal() did to clear the * global bit. */ - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); + pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false); /* * pti_clone_pgtable() will set the global bit in any PMDs @@ -638,8 +643,15 @@ void __init pti_init(void) /* Undo all global bits from the init pagetables in head_64.S: */ pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ - pti_clone_entry_text(); + /* + * This is very early in boot. Device and Late initcalls can do + * modprobe before free_initmem() and mark_readonly(). This + * pti_clone_entry_text() allows those user-mode-helpers to function, + * but notably the text is still RW. + */ + pti_clone_entry_text(false); pti_setup_espfix64(); pti_setup_vsyscall(); } @@ -656,10 +668,11 @@ void pti_finalize(void) if (!boot_cpu_has(X86_FEATURE_PTI)) return; /* - * We need to clone everything (again) that maps parts of the - * kernel image. + * This is after free_initmem() (all initcalls are done) and we've done + * mark_readonly(). Text is now NX which might've split some PMDs + * relative to the early clone. */ - pti_clone_entry_text(); + pti_clone_entry_text(true); pti_clone_kernel_text(); debug_checkwx_user(); From 7b5595f33c3c273613b590892a578d78186bb400 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 29 Aug 2024 18:25:49 +0100 Subject: [PATCH 263/268] btrfs: fix race between direct IO write and fsync when using same fd commit cd9253c23aedd61eb5ff11f37a36247cd46faf86 upstream. If we have 2 threads that are using the same file descriptor and one of them is doing direct IO writes while the other is doing fsync, we have a race where we can end up either: 1) Attempt a fsync without holding the inode's lock, triggering an assertion failures when assertions are enabled; 2) Do an invalid memory access from the fsync task because the file private points to memory allocated on stack by the direct IO task and it may be used by the fsync task after the stack was destroyed. The race happens like this: 1) A user space program opens a file descriptor with O_DIRECT; 2) The program spawns 2 threads using libpthread for example; 3) One of the threads uses the file descriptor to do direct IO writes, while the other calls fsync using the same file descriptor. 4) Call task A the thread doing direct IO writes and task B the thread doing fsyncs; 5) Task A does a direct IO write, and at btrfs_direct_write() sets the file's private to an on stack allocated private with the member 'fsync_skip_inode_lock' set to true; 6) Task B enters btrfs_sync_file() and sees that there's a private structure associated to the file which has 'fsync_skip_inode_lock' set to true, so it skips locking the inode's VFS lock; 7) Task A completes the direct IO write, and resets the file's private to NULL since it had no prior private and our private was stack allocated. Then it unlocks the inode's VFS lock; 8) Task B enters btrfs_get_ordered_extents_for_logging(), then the assertion that checks the inode's VFS lock is held fails, since task B never locked it and task A has already unlocked it. The stack trace produced is the following: assertion failed: inode_is_locked(&inode->vfs_inode), in fs/btrfs/ordered-data.c:983 ------------[ cut here ]------------ kernel BUG at fs/btrfs/ordered-data.c:983! Oops: invalid opcode: 0000 [#1] PREEMPT SMP PTI CPU: 9 PID: 5072 Comm: worker Tainted: G U OE 6.10.5-1-default #1 openSUSE Tumbleweed 69f48d427608e1c09e60ea24c6c55e2ca1b049e8 Hardware name: Acer Predator PH315-52/Covini_CFS, BIOS V1.12 07/28/2020 RIP: 0010:btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs] Code: 50 d6 86 c0 e8 (...) RSP: 0018:ffff9e4a03dcfc78 EFLAGS: 00010246 RAX: 0000000000000054 RBX: ffff9078a9868e98 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff907dce4a7800 RDI: ffff907dce4a7800 RBP: ffff907805518800 R08: 0000000000000000 R09: ffff9e4a03dcfb38 R10: ffff9e4a03dcfb30 R11: 0000000000000003 R12: ffff907684ae7800 R13: 0000000000000001 R14: ffff90774646b600 R15: 0000000000000000 FS: 00007f04b96006c0(0000) GS:ffff907dce480000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f32acbfc000 CR3: 00000001fd4fa005 CR4: 00000000003726f0 Call Trace: ? __die_body.cold+0x14/0x24 ? die+0x2e/0x50 ? do_trap+0xca/0x110 ? do_error_trap+0x6a/0x90 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? exc_invalid_op+0x50/0x70 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? asm_exc_invalid_op+0x1a/0x20 ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? btrfs_get_ordered_extents_for_logging.cold+0x1f/0x42 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] btrfs_sync_file+0x21a/0x4d0 [btrfs bb26272d49b4cdc847cf3f7faadd459b62caee9a] ? __seccomp_filter+0x31d/0x4f0 __x64_sys_fdatasync+0x4f/0x90 do_syscall_64+0x82/0x160 ? do_futex+0xcb/0x190 ? __x64_sys_futex+0x10e/0x1d0 ? switch_fpu_return+0x4f/0xd0 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 ? syscall_exit_to_user_mode+0x72/0x220 ? do_syscall_64+0x8e/0x160 entry_SYSCALL_64_after_hwframe+0x76/0x7e Another problem here is if task B grabs the private pointer and then uses it after task A has finished, since the private was allocated in the stack of task A, it results in some invalid memory access with a hard to predict result. This issue, triggering the assertion, was observed with QEMU workloads by two users in the Link tags below. Fix this by not relying on a file's private to pass information to fsync that it should skip locking the inode and instead pass this information through a special value stored in current->journal_info. This is safe because in the relevant section of the direct IO write path we are not holding a transaction handle, so current->journal_info is NULL. The following C program triggers the issue: $ cat repro.c /* Get the O_DIRECT definition. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #include #include #include #include #include #include #include static int fd; static ssize_t do_write(int fd, const void *buf, size_t count, off_t offset) { while (count > 0) { ssize_t ret; ret = pwrite(fd, buf, count, offset); if (ret < 0) { if (errno == EINTR) continue; return ret; } count -= ret; buf += ret; } return 0; } static void *fsync_loop(void *arg) { while (1) { int ret; ret = fsync(fd); if (ret != 0) { perror("Fsync failed"); exit(6); } } } int main(int argc, char *argv[]) { long pagesize; void *write_buf; pthread_t fsyncer; int ret; if (argc != 2) { fprintf(stderr, "Use: %s \n", argv[0]); return 1; } fd = open(argv[1], O_WRONLY | O_CREAT | O_TRUNC | O_DIRECT, 0666); if (fd == -1) { perror("Failed to open/create file"); return 1; } pagesize = sysconf(_SC_PAGE_SIZE); if (pagesize == -1) { perror("Failed to get page size"); return 2; } ret = posix_memalign(&write_buf, pagesize, pagesize); if (ret) { perror("Failed to allocate buffer"); return 3; } ret = pthread_create(&fsyncer, NULL, fsync_loop, NULL); if (ret != 0) { fprintf(stderr, "Failed to create writer thread: %d\n", ret); return 4; } while (1) { ret = do_write(fd, write_buf, pagesize, 0); if (ret != 0) { perror("Write failed"); exit(5); } } return 0; } $ mkfs.btrfs -f /dev/sdi $ mount /dev/sdi /mnt/sdi $ timeout 10 ./repro /mnt/sdi/foo Usually the race is triggered within less than 1 second. A test case for fstests will follow soon. Reported-by: Paulo Dias Link: https://bugzilla.kernel.org/show_bug.cgi?id=219187 Reported-by: Andreas Jahn Link: https://bugzilla.kernel.org/show_bug.cgi?id=219199 Reported-by: syzbot+4704b3cc972bd76024f1@syzkaller.appspotmail.com Link: https://lore.kernel.org/linux-btrfs/00000000000044ff540620d7dee2@google.com/ Fixes: 939b656bc8ab ("btrfs: fix corruption after buffer fault in during direct IO append write") CC: stable@vger.kernel.org # 5.15+ Reviewed-by: Josef Bacik Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.h | 1 - fs/btrfs/file.c | 25 ++++++++++--------------- fs/btrfs/transaction.h | 6 ++++++ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 86c7f8ce1715e7..06333a74d6c4cb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -445,7 +445,6 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; - bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 952cf145c6295a..15fd8c00f4c083 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1543,13 +1543,6 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) if (IS_ERR_OR_NULL(dio)) { err = PTR_ERR_OR_ZERO(dio); } else { - struct btrfs_file_private stack_private = { 0 }; - struct btrfs_file_private *private; - const bool have_private = (file->private_data != NULL); - - if (!have_private) - file->private_data = &stack_private; - /* * If we have a synchoronous write, we must make sure the fsync * triggered by the iomap_dio_complete() call below doesn't @@ -1558,13 +1551,10 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from) * partial writes due to the input buffer (or parts of it) not * being already faulted in. */ - private = file->private_data; - private->fsync_skip_inode_lock = true; + ASSERT(current->journal_info == NULL); + current->journal_info = BTRFS_TRANS_DIO_WRITE_STUB; err = iomap_dio_complete(dio); - private->fsync_skip_inode_lock = false; - - if (!have_private) - file->private_data = NULL; + current->journal_info = NULL; } /* No increment (+=) because iomap returns a cumulative value. */ @@ -1796,7 +1786,6 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { - struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); @@ -1806,7 +1795,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; - const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); + bool skip_ilock = false; + + if (current->journal_info == BTRFS_TRANS_DIO_WRITE_STUB) { + skip_ilock = true; + current->journal_info = NULL; + lockdep_assert_held(&inode->i_rwsem); + } trace_btrfs_sync_file(file, datasync); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 238a0ab85df9b8..7623db359881e5 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -12,6 +12,12 @@ #include "ctree.h" #include "misc.h" +/* + * Signal that a direct IO write is in progress, to avoid deadlock for sync + * direct IO writes when fsync is called during the direct IO write path. + */ +#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1) + /* Radix-tree tag for roots that are part of the trasaction. */ #define BTRFS_ROOT_TRANS_TAG 0 From 50b6744c12fa49d7af51e37935c582efb41173f1 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Thu, 5 Sep 2024 13:15:37 +0200 Subject: [PATCH 264/268] spi: spi-fsl-lpspi: Fix off-by-one in prescale max commit ff949d981c775332be94be70397ee1df20bc68e5 upstream. The commit 783bf5d09f86 ("spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register") doesn't implement the prescaler maximum as intended. The maximum allowed value for i.MX93 should be 1 and for i.MX7ULP it should be 7. So this needs also a adjustment of the comparison in the scldiv calculation. Fixes: 783bf5d09f86 ("spi: spi-fsl-lpspi: limit PRESCALE bit in TCR register") Signed-off-by: Stefan Wahren Link: https://patch.msgid.link/20240905111537.90389-1-wahrenst@gmx.net Signed-off-by: Mark Brown Signed-off-by: Greg Kroah-Hartman --- drivers/spi/spi-fsl-lpspi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index f02b2f74168137..f7e268cf908507 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -136,7 +136,7 @@ static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { }; static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { - .prescale_max = 8, + .prescale_max = 7, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { @@ -336,7 +336,7 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) div = DIV_ROUND_UP(perclk_rate, config.speed_hz); - for (prescale = 0; prescale < prescale_max; prescale++) { + for (prescale = 0; prescale <= prescale_max; prescale++) { scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; From 78155f30beebe128b3512c54e4d108ea3cca547b Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 8 Mar 2024 11:02:48 -0500 Subject: [PATCH 265/268] Bluetooth: hci_sync: Fix UAF in hci_acl_create_conn_sync commit 3d1c16e920c88eb5e583e1b4a10b95a5dc97ec22 upstream. This fixes the following error caused by hci_conn being freed while hcy_acl_create_conn_sync is pending: ================================================================== BUG: KASAN: slab-use-after-free in hci_acl_create_conn_sync+0xa7/0x2e0 Write of size 2 at addr ffff888002ae0036 by task kworker/u3:0/848 CPU: 0 PID: 848 Comm: kworker/u3:0 Not tainted 6.8.0-rc6-g2ab3e8d67fc1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x21/0x70 print_report+0xce/0x620 ? preempt_count_sub+0x13/0xc0 ? __virt_addr_valid+0x15f/0x310 ? hci_acl_create_conn_sync+0xa7/0x2e0 kasan_report+0xdf/0x110 ? hci_acl_create_conn_sync+0xa7/0x2e0 hci_acl_create_conn_sync+0xa7/0x2e0 ? __pfx_hci_acl_create_conn_sync+0x10/0x10 ? __pfx_lock_release+0x10/0x10 ? __pfx_hci_acl_create_conn_sync+0x10/0x10 hci_cmd_sync_work+0x138/0x1c0 process_one_work+0x405/0x800 ? __pfx_lock_acquire+0x10/0x10 ? __pfx_process_one_work+0x10/0x10 worker_thread+0x37b/0x670 ? __pfx_worker_thread+0x10/0x10 kthread+0x19b/0x1e0 ? kthread+0xfe/0x1e0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x50 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Allocated by task 847: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 __kasan_kmalloc+0x8f/0xa0 hci_conn_add+0xc6/0x970 hci_connect_acl+0x309/0x410 pair_device+0x4fb/0x710 hci_sock_sendmsg+0x933/0xef0 sock_write_iter+0x2c3/0x2d0 do_iter_readv_writev+0x21a/0x2e0 vfs_writev+0x21c/0x7b0 do_writev+0x14a/0x180 do_syscall_64+0x77/0x150 entry_SYSCALL_64_after_hwframe+0x6c/0x74 Freed by task 847: kasan_save_stack+0x33/0x60 kasan_save_track+0x14/0x30 kasan_save_free_info+0x3b/0x60 __kasan_slab_free+0xfa/0x150 kfree+0xcb/0x250 device_release+0x58/0xf0 kobject_put+0xbb/0x160 hci_conn_del+0x281/0x570 hci_conn_hash_flush+0xfc/0x130 hci_dev_close_sync+0x336/0x960 hci_dev_close+0x10e/0x140 hci_sock_ioctl+0x14a/0x5c0 sock_ioctl+0x58a/0x5d0 __x64_sys_ioctl+0x480/0xf60 do_syscall_64+0x77/0x150 entry_SYSCALL_64_after_hwframe+0x6c/0x74 Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sync.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index f3c51315eb16a7..17dcab7aac1b9c 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6748,6 +6748,9 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) struct hci_cp_create_conn cp; int err; + if (!hci_conn_valid(hdev, conn)) + return -ECANCELED; + /* Many controllers disallow HCI Create Connection while it is doing * HCI Inquiry. So we cancel the Inquiry first before issuing HCI Create * Connection. This may cause the MGMT discovering state to become false From 4d6cf010d876a5ceb3db4ae845aa6fa3de34c766 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Tue, 20 Feb 2024 13:10:47 -0500 Subject: [PATCH 266/268] Bluetooth: hci_sync: Fix UAF on create_le_conn_complete commit f7cbce60a38a6589f0dade720d4c2544959ecc0e upstream. While waiting for hci_dev_lock the hci_conn object may be cleanup causing the following trace: BUG: KASAN: slab-use-after-free in hci_connect_le_scan_cleanup+0x29/0x350 Read of size 8 at addr ffff888001a50a30 by task kworker/u3:1/111 CPU: 0 PID: 111 Comm: kworker/u3:1 Not tainted 6.8.0-rc2-00701-g8179b15ab3fd-dirty #6418 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl+0x21/0x70 print_report+0xce/0x620 ? preempt_count_sub+0x13/0xc0 ? __virt_addr_valid+0x15f/0x310 ? hci_connect_le_scan_cleanup+0x29/0x350 kasan_report+0xdf/0x110 ? hci_connect_le_scan_cleanup+0x29/0x350 hci_connect_le_scan_cleanup+0x29/0x350 create_le_conn_complete+0x25c/0x2c0 Fixes: 881559af5f5c ("Bluetooth: hci_sync: Attempt to dequeue connection attempt") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sync.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 17dcab7aac1b9c..985281def3ee57 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6824,6 +6824,9 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err) hci_dev_lock(hdev); + if (!hci_conn_valid(hdev, conn)) + goto done; + if (!err) { hci_connect_le_scan_cleanup(conn, 0x00); goto done; From 611e4281117ab749b713723311de3ed8ed3ad031 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Fri, 16 Feb 2024 15:29:55 -0500 Subject: [PATCH 267/268] Bluetooth: hci_sync: Fix UAF on hci_abort_conn_sync commit 7453847fb22c7c45334c43cc6a02ea5df5b9961d upstream. Fixes the following trace where hci_acl_create_conn_sync attempts to call hci_abort_conn_sync after timeout: BUG: KASAN: slab-use-after-free in hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) Read of size 2 at addr ffff88800322c032 by task kworker/u3:2/36 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014 Workqueue: hci0 hci_cmd_sync_work Call Trace: dump_stack_lvl (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:67 ./arch/x86/include/asm/irqflags.h:127 lib/dump_stack.c:107) print_report (mm/kasan/report.c:378 mm/kasan/report.c:488) ? preempt_count_sub (kernel/sched/core.c:5889) ? __virt_addr_valid (./arch/x86/include/asm/preempt.h:103 (discriminator 1) ./include/linux/rcupdate.h:865 (discriminator 1) ./include/linux/mmzone.h:2026 (discriminator 1) arch/x86/mm/physaddr.c:65 (discriminator 1)) ? hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) kasan_report (mm/kasan/report.c:603) ? hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) hci_abort_conn_sync (net/bluetooth/hci_sync.c:5439) ? __pfx_hci_abort_conn_sync (net/bluetooth/hci_sync.c:5433) hci_acl_create_conn_sync (net/bluetooth/hci_sync.c:6681) Fixes: 45340097ce6e ("Bluetooth: hci_conn: Only do ACL connections sequentially") Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sync.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 985281def3ee57..af7817a7c585bb 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -6796,15 +6796,10 @@ static int hci_acl_create_conn_sync(struct hci_dev *hdev, void *data) else cp.role_switch = 0x00; - err = __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, - sizeof(cp), &cp, - HCI_EV_CONN_COMPLETE, - HCI_ACL_CONN_TIMEOUT, NULL); - - if (err == -ETIMEDOUT) - hci_abort_conn_sync(hdev, conn, HCI_ERROR_LOCAL_HOST_TERM); - - return err; + return __hci_cmd_sync_status_sk(hdev, HCI_OP_CREATE_CONN, + sizeof(cp), &cp, + HCI_EV_CONN_COMPLETE, + conn->conn_timeout, NULL); } int hci_connect_acl_sync(struct hci_dev *hdev, struct hci_conn *conn) From 6d1dc55b5bab93ef868d223b740d527ee7501063 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Sep 2024 11:11:45 +0200 Subject: [PATCH 268/268] Linux 6.6.51 Link: https://lore.kernel.org/r/20240910092608.225137854@linuxfoundation.org Tested-by: Harshit Mogalapalli Tested-by: Florian Fainelli Tested-by: Mark Brown Tested-by: Shuah Khan Tested-by: Takeshi Ogasawara Tested-by: Linux Kernel Functional Testing Tested-by: Ron Economos Tested-by: Kexy Biscuit Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f7efbb59c98652..6dea0c21636820 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 50 +SUBLEVEL = 51 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth